fd52d891257bdaee6bdfcf05210d2255f2c5f660
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
118
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
122
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
126
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
134
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
138
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
140
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
147
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
223
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
298
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
367 };
368
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
375
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
442 };
443
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
525 };
526
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
564
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
600 };
601
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
677 };
678
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
754 };
755
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
827
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
841 };
842
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
921
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
935 };
936
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
950
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1016
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 4, /* cond_taken_branch_cost. */
1029 2, /* cond_not_taken_branch_cost. */
1030 };
1031
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1035
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1046
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1112
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 4, /* cond_taken_branch_cost. */
1125 2, /* cond_not_taken_branch_cost. */
1126 };
1127
1128
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1199
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 4, /* cond_taken_branch_cost. */
1212 2, /* cond_not_taken_branch_cost. */
1213 };
1214
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1285
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 4, /* cond_taken_branch_cost. */
1298 2, /* cond_not_taken_branch_cost. */
1299 };
1300
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1374
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1388 };
1389
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1473 };
1474
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1482
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1549 };
1550
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1555
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1561
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1628 };
1629
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1705 };
1706
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1782 };
1783
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1859 };
1860
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1863
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1946 };
1947
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1960
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2032 };
2033
2034
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2037
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2040
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2058
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2075
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2077
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2083 };
2084
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2087
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2095 };
2096
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2099
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2105
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2108
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2111
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2114
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2117 };
2118
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2123
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2128
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2131
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2133 {
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2167 };
2168
2169 /* The "default" register map used in 32bit mode. */
2170
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2172 {
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2184 };
2185
2186 /* The "default" register map used in 64bit mode. */
2187
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2189 {
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2201 };
2202
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2256 */
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2258 {
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2270 };
2271
2272 /* Define parameter passing and return registers. */
2273
2274 static int const x86_64_int_parameter_registers[6] =
2275 {
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2277 };
2278
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2280 {
2281 CX_REG, DX_REG, R8_REG, R9_REG
2282 };
2283
2284 static int const x86_64_int_return_registers[4] =
2285 {
2286 AX_REG, DX_REG, DI_REG, SI_REG
2287 };
2288
2289 /* Additional registers that are clobbered by SYSV calls. */
2290
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2292 {
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2297 };
2298
2299 /* Define the structure for the machine field in struct function. */
2300
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2306 };
2307
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2310
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2314
2315 saved static chain if ix86_static_chain_on_stack
2316
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2322
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2328 |
2329 [frame] |
2330 |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2333 */
2334 struct ix86_frame
2335 {
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2341
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2349
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2353 };
2354
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2357
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2360
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2363
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2366
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2370
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2383
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2386
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2390
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2393
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2396
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2400
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2404
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2407
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2412
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2416 {
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2428 };
2429
2430 #define MAX_CLASSES 8
2431
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2435
2436 \f
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2452
2453 enum ix86_function_specific_strings
2454 {
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2458 };
2459
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2477
2478 static enum calling_abi ix86_function_abi (const_tree);
2479
2480 \f
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2484
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2488
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2491
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2494
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2497 {
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2505 };
2506
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2509 {
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2536 };
2537 \f
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2540 {
2541 int i;
2542
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2549
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2551
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2555 }
2556
2557 namespace {
2558
2559 const pass_data pass_data_insert_vzeroupper =
2560 {
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2570 };
2571
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2573 {
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2577 {}
2578
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2581 {
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2585 }
2586
2587 virtual unsigned int execute (function *)
2588 {
2589 return rest_of_handle_insert_vzeroupper ();
2590 }
2591
2592 }; // class pass_insert_vzeroupper
2593
2594 } // anon namespace
2595
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2598 {
2599 return new pass_insert_vzeroupper (ctxt);
2600 }
2601
2602 /* Return true if a red-zone is in use. */
2603
2604 static inline bool
2605 ix86_using_red_zone (void)
2606 {
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2608 }
2609 \f
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2612
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2617 {
2618 struct ix86_target_opts
2619 {
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2622 };
2623
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2627 {
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2680 };
2681
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2684 {
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2713 };
2714
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2716
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2727
2728 memset (opts, '\0', sizeof (opts));
2729
2730 /* Add -march= option. */
2731 if (arch)
2732 {
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2735 }
2736
2737 /* Add -mtune= option. */
2738 if (tune)
2739 {
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2742 }
2743
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2746 {
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2754 }
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2758
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2761 {
2762 if ((isa & isa_opts[i].mask) != 0)
2763 {
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2766 }
2767 }
2768
2769 if (isa && add_nl_p)
2770 {
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2774 }
2775
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2778 {
2779 if ((flags & flag_opts[i].mask) != 0)
2780 {
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2783 }
2784 }
2785
2786 if (flags && add_nl_p)
2787 {
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2790 }
2791
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2794 {
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2797 {
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2801
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2805
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2809
2810 default:
2811 gcc_unreachable ();
2812 }
2813 }
2814
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2818
2819 gcc_assert (num < ARRAY_SIZE (opts));
2820
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2825 {
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2830 }
2831
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2835
2836 for (i = 0; i < num; i++)
2837 {
2838 size_t len2[2];
2839
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2842
2843 if (i != 0)
2844 {
2845 *ptr++ = ' ';
2846 line_len++;
2847
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2849 {
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2853 }
2854 }
2855
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2858 {
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2862 }
2863 }
2864
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2867
2868 return ret;
2869 }
2870
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2876 {
2877 return flag_fentry != 0;
2878 }
2879
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2884 {
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2888
2889 if (opts)
2890 {
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2893 }
2894 else
2895 fputs ("<no options>\n\n", stderr);
2896
2897 return;
2898 }
2899
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2906 };
2907
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2910
2911 strategy_alg:max_size:[align|noalign]
2912
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2916
2917 Examples:
2918
2919 1.
2920 -mmemcpy-strategy=libcall:-1:noalign
2921
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2923
2924
2925 2.
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2927
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2932
2933 struct stringop_size_range
2934 {
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2938 };
2939
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2942 {
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2947
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2952
2953 curr_range_str = strategy_str;
2954
2955 do
2956 {
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2963
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2966 {
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2970 }
2971
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2973 {
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2977 }
2978
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2982
2983 if (i == last_alg)
2984 {
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2989 }
2990
2991 if ((stringop_alg) i == rep_prefix_8_byte
2992 && !TARGET_64BIT)
2993 {
2994 /* rep; movq isn't available in 32-bit code. */
2995 error ("stringop strategy name %s specified for option %s "
2996 "not supported for 32-bit code",
2997 alg_name,
2998 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2999 return;
3000 }
3001
3002 input_ranges[n].max = maxs;
3003 input_ranges[n].alg = (stringop_alg) i;
3004 if (!strcmp (align, "align"))
3005 input_ranges[n].noalign = false;
3006 else if (!strcmp (align, "noalign"))
3007 input_ranges[n].noalign = true;
3008 else
3009 {
3010 error ("unknown alignment %s specified for option %s",
3011 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3012 return;
3013 }
3014 n++;
3015 curr_range_str = next_range_str;
3016 }
3017 while (curr_range_str);
3018
3019 if (input_ranges[n - 1].max != -1)
3020 {
3021 error ("the max value for the last size range should be -1"
3022 " for option %s",
3023 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3024 return;
3025 }
3026
3027 if (n > MAX_STRINGOP_ALGS)
3028 {
3029 error ("too many size ranges specified in option %s",
3030 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3031 return;
3032 }
3033
3034 /* Now override the default algs array. */
3035 for (i = 0; i < n; i++)
3036 {
3037 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3038 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3039 = input_ranges[i].alg;
3040 *const_cast<int *>(&default_algs->size[i].noalign)
3041 = input_ranges[i].noalign;
3042 }
3043 }
3044
3045 \f
3046 /* parse -mtune-ctrl= option. When DUMP is true,
3047 print the features that are explicitly set. */
3048
3049 static void
3050 parse_mtune_ctrl_str (bool dump)
3051 {
3052 if (!ix86_tune_ctrl_string)
3053 return;
3054
3055 char *next_feature_string = NULL;
3056 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3057 char *orig = curr_feature_string;
3058 int i;
3059 do
3060 {
3061 bool clear = false;
3062
3063 next_feature_string = strchr (curr_feature_string, ',');
3064 if (next_feature_string)
3065 *next_feature_string++ = '\0';
3066 if (*curr_feature_string == '^')
3067 {
3068 curr_feature_string++;
3069 clear = true;
3070 }
3071 for (i = 0; i < X86_TUNE_LAST; i++)
3072 {
3073 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3074 {
3075 ix86_tune_features[i] = !clear;
3076 if (dump)
3077 fprintf (stderr, "Explicitly %s feature %s\n",
3078 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3079 break;
3080 }
3081 }
3082 if (i == X86_TUNE_LAST)
3083 error ("Unknown parameter to option -mtune-ctrl: %s",
3084 clear ? curr_feature_string - 1 : curr_feature_string);
3085 curr_feature_string = next_feature_string;
3086 }
3087 while (curr_feature_string);
3088 free (orig);
3089 }
3090
3091 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3092 processor type. */
3093
3094 static void
3095 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3096 {
3097 unsigned int ix86_tune_mask = 1u << ix86_tune;
3098 int i;
3099
3100 for (i = 0; i < X86_TUNE_LAST; ++i)
3101 {
3102 if (ix86_tune_no_default)
3103 ix86_tune_features[i] = 0;
3104 else
3105 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3106 }
3107
3108 if (dump)
3109 {
3110 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3111 for (i = 0; i < X86_TUNE_LAST; i++)
3112 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3113 ix86_tune_features[i] ? "on" : "off");
3114 }
3115
3116 parse_mtune_ctrl_str (dump);
3117 }
3118
3119
3120 /* Default align_* from the processor table. */
3121
3122 static void
3123 ix86_default_align (struct gcc_options *opts)
3124 {
3125 if (opts->x_align_loops == 0)
3126 {
3127 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3128 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3129 }
3130 if (opts->x_align_jumps == 0)
3131 {
3132 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3133 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3134 }
3135 if (opts->x_align_functions == 0)
3136 {
3137 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3138 }
3139 }
3140
3141 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3142
3143 static void
3144 ix86_override_options_after_change (void)
3145 {
3146 ix86_default_align (&global_options);
3147 }
3148
3149 /* Override various settings based on options. If MAIN_ARGS_P, the
3150 options are from the command line, otherwise they are from
3151 attributes. */
3152
3153 static void
3154 ix86_option_override_internal (bool main_args_p,
3155 struct gcc_options *opts,
3156 struct gcc_options *opts_set)
3157 {
3158 int i;
3159 unsigned int ix86_arch_mask;
3160 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3161 const char *prefix;
3162 const char *suffix;
3163 const char *sw;
3164
3165 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3166 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3167 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3168 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3169 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3170 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3171 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3172 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3173 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3174 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3175 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3176 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3177 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3178 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3179 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3180 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3181 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3182 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3183 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3184 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3185 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3186 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3187 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3188 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3189 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3190 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3191 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3192 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3193 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3194 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3195 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3196 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3197 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3198 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3199 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3200 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3201 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3202 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3203 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3204 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3205 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3206 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3207 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3208 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3209 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3210 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3211 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3212 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3213 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3214 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3215 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3216 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3217 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3218 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3219 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3220 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3221 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3222
3223 #define PTA_CORE2 \
3224 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3225 | PTA_CX16 | PTA_FXSR)
3226 #define PTA_NEHALEM \
3227 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3228 #define PTA_WESTMERE \
3229 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3230 #define PTA_SANDYBRIDGE \
3231 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3232 #define PTA_IVYBRIDGE \
3233 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3234 #define PTA_HASWELL \
3235 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3236 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3237 #define PTA_BROADWELL \
3238 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3239 #define PTA_KNL \
3240 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3241 #define PTA_BONNELL \
3242 (PTA_CORE2 | PTA_MOVBE)
3243 #define PTA_SILVERMONT \
3244 (PTA_WESTMERE | PTA_MOVBE)
3245
3246 /* if this reaches 64, need to widen struct pta flags below */
3247
3248 static struct pta
3249 {
3250 const char *const name; /* processor name or nickname. */
3251 const enum processor_type processor;
3252 const enum attr_cpu schedule;
3253 const unsigned HOST_WIDE_INT flags;
3254 }
3255 const processor_alias_table[] =
3256 {
3257 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3258 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3259 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3260 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3261 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3262 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3263 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3264 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3265 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3266 PTA_MMX | PTA_SSE | PTA_FXSR},
3267 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3268 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3269 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3270 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3271 PTA_MMX | PTA_SSE | PTA_FXSR},
3272 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3273 PTA_MMX | PTA_SSE | PTA_FXSR},
3274 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3275 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3276 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3277 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3278 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3279 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3280 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3281 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3282 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3285 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3286 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3287 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3288 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3289 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3290 PTA_SANDYBRIDGE},
3291 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3292 PTA_SANDYBRIDGE},
3293 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3294 PTA_IVYBRIDGE},
3295 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3296 PTA_IVYBRIDGE},
3297 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3298 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3299 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3300 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3301 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3302 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3303 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3304 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3305 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3306 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3307 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3308 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3309 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3310 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3311 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3312 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3313 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3314 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3315 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3316 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3317 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3318 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3319 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3320 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3321 {"x86-64", PROCESSOR_K8, CPU_K8,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3323 {"k8", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3326 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3329 {"opteron", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3332 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3334 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3335 {"athlon64", PROCESSOR_K8, CPU_K8,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3337 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3338 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3339 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3340 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3341 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3342 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3343 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3344 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3345 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3346 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3347 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3348 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3349 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3350 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3351 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3352 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3353 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3354 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3355 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3356 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3357 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3358 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3359 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3360 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3361 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3362 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3363 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3364 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3365 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3366 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3367 | PTA_XSAVEOPT | PTA_FSGSBASE},
3368 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3369 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3370 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3371 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3372 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3373 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3374 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3375 | PTA_MOVBE},
3376 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3377 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3378 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3379 | PTA_FXSR | PTA_XSAVE},
3380 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3381 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3382 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3383 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3384 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3385 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3386
3387 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3388 PTA_64BIT
3389 | PTA_HLE /* flags are only used for -march switch. */ },
3390 };
3391
3392 /* -mrecip options. */
3393 static struct
3394 {
3395 const char *string; /* option name */
3396 unsigned int mask; /* mask bits to set */
3397 }
3398 const recip_options[] =
3399 {
3400 { "all", RECIP_MASK_ALL },
3401 { "none", RECIP_MASK_NONE },
3402 { "div", RECIP_MASK_DIV },
3403 { "sqrt", RECIP_MASK_SQRT },
3404 { "vec-div", RECIP_MASK_VEC_DIV },
3405 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3406 };
3407
3408 int const pta_size = ARRAY_SIZE (processor_alias_table);
3409
3410 /* Set up prefix/suffix so the error messages refer to either the command
3411 line argument, or the attribute(target). */
3412 if (main_args_p)
3413 {
3414 prefix = "-m";
3415 suffix = "";
3416 sw = "switch";
3417 }
3418 else
3419 {
3420 prefix = "option(\"";
3421 suffix = "\")";
3422 sw = "attribute";
3423 }
3424
3425 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3426 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3427 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3428 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3429 #ifdef TARGET_BI_ARCH
3430 else
3431 {
3432 #if TARGET_BI_ARCH == 1
3433 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3434 is on and OPTION_MASK_ABI_X32 is off. We turn off
3435 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3436 -mx32. */
3437 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3438 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3439 #else
3440 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3441 on and OPTION_MASK_ABI_64 is off. We turn off
3442 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3443 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3444 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3445 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3446 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3447 #endif
3448 }
3449 #endif
3450
3451 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3452 {
3453 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3454 OPTION_MASK_ABI_64 for TARGET_X32. */
3455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3456 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3457 }
3458 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3459 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3460 | OPTION_MASK_ABI_X32
3461 | OPTION_MASK_ABI_64);
3462 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3463 {
3464 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3465 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3466 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3467 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3468 }
3469
3470 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3471 SUBTARGET_OVERRIDE_OPTIONS;
3472 #endif
3473
3474 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3475 SUBSUBTARGET_OVERRIDE_OPTIONS;
3476 #endif
3477
3478 /* -fPIC is the default for x86_64. */
3479 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 opts->x_flag_pic = 2;
3481
3482 /* Need to check -mtune=generic first. */
3483 if (opts->x_ix86_tune_string)
3484 {
3485 /* As special support for cross compilers we read -mtune=native
3486 as -mtune=generic. With native compilers we won't see the
3487 -mtune=native, as it was changed by the driver. */
3488 if (!strcmp (opts->x_ix86_tune_string, "native"))
3489 {
3490 opts->x_ix86_tune_string = "generic";
3491 }
3492 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3493 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3494 "%stune=k8%s or %stune=generic%s instead as appropriate",
3495 prefix, suffix, prefix, suffix, prefix, suffix);
3496 }
3497 else
3498 {
3499 if (opts->x_ix86_arch_string)
3500 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3501 if (!opts->x_ix86_tune_string)
3502 {
3503 opts->x_ix86_tune_string
3504 = processor_target_table[TARGET_CPU_DEFAULT].name;
3505 ix86_tune_defaulted = 1;
3506 }
3507
3508 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3509 or defaulted. We need to use a sensible tune option. */
3510 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3511 {
3512 opts->x_ix86_tune_string = "generic";
3513 }
3514 }
3515
3516 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3517 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3518 {
3519 /* rep; movq isn't available in 32-bit code. */
3520 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3521 opts->x_ix86_stringop_alg = no_stringop;
3522 }
3523
3524 if (!opts->x_ix86_arch_string)
3525 opts->x_ix86_arch_string
3526 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3528 else
3529 ix86_arch_specified = 1;
3530
3531 if (opts_set->x_ix86_pmode)
3532 {
3533 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3534 && opts->x_ix86_pmode == PMODE_SI)
3535 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3536 && opts->x_ix86_pmode == PMODE_DI))
3537 error ("address mode %qs not supported in the %s bit mode",
3538 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3539 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3540 }
3541 else
3542 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3543 ? PMODE_DI : PMODE_SI;
3544
3545 if (!opts_set->x_ix86_abi)
3546 opts->x_ix86_abi = DEFAULT_ABI;
3547
3548 /* For targets using ms ABI enable ms-extensions, if not
3549 explicit turned off. For non-ms ABI we turn off this
3550 option. */
3551 if (!opts_set->x_flag_ms_extensions)
3552 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3553
3554 if (opts_set->x_ix86_cmodel)
3555 {
3556 switch (opts->x_ix86_cmodel)
3557 {
3558 case CM_SMALL:
3559 case CM_SMALL_PIC:
3560 if (opts->x_flag_pic)
3561 opts->x_ix86_cmodel = CM_SMALL_PIC;
3562 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 error ("code model %qs not supported in the %s bit mode",
3564 "small", "32");
3565 break;
3566
3567 case CM_MEDIUM:
3568 case CM_MEDIUM_PIC:
3569 if (opts->x_flag_pic)
3570 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3571 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3572 error ("code model %qs not supported in the %s bit mode",
3573 "medium", "32");
3574 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3575 error ("code model %qs not supported in x32 mode",
3576 "medium");
3577 break;
3578
3579 case CM_LARGE:
3580 case CM_LARGE_PIC:
3581 if (opts->x_flag_pic)
3582 opts->x_ix86_cmodel = CM_LARGE_PIC;
3583 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 error ("code model %qs not supported in the %s bit mode",
3585 "large", "32");
3586 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3587 error ("code model %qs not supported in x32 mode",
3588 "large");
3589 break;
3590
3591 case CM_32:
3592 if (opts->x_flag_pic)
3593 error ("code model %s does not support PIC mode", "32");
3594 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3595 error ("code model %qs not supported in the %s bit mode",
3596 "32", "64");
3597 break;
3598
3599 case CM_KERNEL:
3600 if (opts->x_flag_pic)
3601 {
3602 error ("code model %s does not support PIC mode", "kernel");
3603 opts->x_ix86_cmodel = CM_32;
3604 }
3605 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3606 error ("code model %qs not supported in the %s bit mode",
3607 "kernel", "32");
3608 break;
3609
3610 default:
3611 gcc_unreachable ();
3612 }
3613 }
3614 else
3615 {
3616 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3617 use of rip-relative addressing. This eliminates fixups that
3618 would otherwise be needed if this object is to be placed in a
3619 DLL, and is essentially just as efficient as direct addressing. */
3620 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (TARGET_RDOS || TARGET_PECOFF))
3622 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3623 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3624 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3625 else
3626 opts->x_ix86_cmodel = CM_32;
3627 }
3628 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3629 {
3630 error ("-masm=intel not supported in this configuration");
3631 opts->x_ix86_asm_dialect = ASM_ATT;
3632 }
3633 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3634 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3635 sorry ("%i-bit mode not compiled in",
3636 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3637
3638 for (i = 0; i < pta_size; i++)
3639 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3640 {
3641 ix86_schedule = processor_alias_table[i].schedule;
3642 ix86_arch = processor_alias_table[i].processor;
3643 /* Default cpu tuning to the architecture. */
3644 ix86_tune = ix86_arch;
3645
3646 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3647 && !(processor_alias_table[i].flags & PTA_64BIT))
3648 error ("CPU you selected does not support x86-64 "
3649 "instruction set");
3650
3651 if (processor_alias_table[i].flags & PTA_MMX
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3654 if (processor_alias_table[i].flags & PTA_3DNOW
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3657 if (processor_alias_table[i].flags & PTA_3DNOW_A
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3660 if (processor_alias_table[i].flags & PTA_SSE
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3663 if (processor_alias_table[i].flags & PTA_SSE2
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3666 if (processor_alias_table[i].flags & PTA_SSE3
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3669 if (processor_alias_table[i].flags & PTA_SSSE3
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3672 if (processor_alias_table[i].flags & PTA_SSE4_1
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3675 if (processor_alias_table[i].flags & PTA_SSE4_2
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3678 if (processor_alias_table[i].flags & PTA_AVX
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3681 if (processor_alias_table[i].flags & PTA_AVX2
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3684 if (processor_alias_table[i].flags & PTA_FMA
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3687 if (processor_alias_table[i].flags & PTA_SSE4A
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3690 if (processor_alias_table[i].flags & PTA_FMA4
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3693 if (processor_alias_table[i].flags & PTA_XOP
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3696 if (processor_alias_table[i].flags & PTA_LWP
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3699 if (processor_alias_table[i].flags & PTA_ABM
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3702 if (processor_alias_table[i].flags & PTA_BMI
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3705 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3708 if (processor_alias_table[i].flags & PTA_TBM
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3711 if (processor_alias_table[i].flags & PTA_BMI2
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3714 if (processor_alias_table[i].flags & PTA_CX16
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3717 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3720 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3721 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3724 if (processor_alias_table[i].flags & PTA_MOVBE
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3727 if (processor_alias_table[i].flags & PTA_AES
3728 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3729 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3730 if (processor_alias_table[i].flags & PTA_SHA
3731 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3732 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3733 if (processor_alias_table[i].flags & PTA_PCLMUL
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3736 if (processor_alias_table[i].flags & PTA_FSGSBASE
3737 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3739 if (processor_alias_table[i].flags & PTA_RDRND
3740 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3741 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3742 if (processor_alias_table[i].flags & PTA_F16C
3743 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3745 if (processor_alias_table[i].flags & PTA_RTM
3746 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3747 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3748 if (processor_alias_table[i].flags & PTA_HLE
3749 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3750 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3751 if (processor_alias_table[i].flags & PTA_PRFCHW
3752 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3753 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3754 if (processor_alias_table[i].flags & PTA_RDSEED
3755 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3757 if (processor_alias_table[i].flags & PTA_ADX
3758 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3760 if (processor_alias_table[i].flags & PTA_FXSR
3761 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3762 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3763 if (processor_alias_table[i].flags & PTA_XSAVE
3764 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3765 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3766 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3767 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3768 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3769 if (processor_alias_table[i].flags & PTA_AVX512F
3770 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3771 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3772 if (processor_alias_table[i].flags & PTA_AVX512ER
3773 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3774 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3775 if (processor_alias_table[i].flags & PTA_AVX512PF
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3778 if (processor_alias_table[i].flags & PTA_AVX512CD
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3781 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3784 if (processor_alias_table[i].flags & PTA_PCOMMIT
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3787 if (processor_alias_table[i].flags & PTA_CLWB
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3790 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3793 if (processor_alias_table[i].flags & PTA_XSAVEC
3794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3796 if (processor_alias_table[i].flags & PTA_XSAVES
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3799 if (processor_alias_table[i].flags & PTA_AVX512DQ
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3802 if (processor_alias_table[i].flags & PTA_AVX512BW
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3805 if (processor_alias_table[i].flags & PTA_AVX512VL
3806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3808 if (processor_alias_table[i].flags & PTA_MPX
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3811 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3814 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3817 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3818 x86_prefetch_sse = true;
3819
3820 break;
3821 }
3822
3823 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3824 error ("Intel MPX does not support x32");
3825
3826 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3827 error ("Intel MPX does not support x32");
3828
3829 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3830 error ("generic CPU can be used only for %stune=%s %s",
3831 prefix, suffix, sw);
3832 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3833 error ("intel CPU can be used only for %stune=%s %s",
3834 prefix, suffix, sw);
3835 else if (i == pta_size)
3836 error ("bad value (%s) for %sarch=%s %s",
3837 opts->x_ix86_arch_string, prefix, suffix, sw);
3838
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3842
3843 for (i = 0; i < pta_size; i++)
3844 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3845 {
3846 ix86_schedule = processor_alias_table[i].schedule;
3847 ix86_tune = processor_alias_table[i].processor;
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 {
3850 if (!(processor_alias_table[i].flags & PTA_64BIT))
3851 {
3852 if (ix86_tune_defaulted)
3853 {
3854 opts->x_ix86_tune_string = "x86-64";
3855 for (i = 0; i < pta_size; i++)
3856 if (! strcmp (opts->x_ix86_tune_string,
3857 processor_alias_table[i].name))
3858 break;
3859 ix86_schedule = processor_alias_table[i].schedule;
3860 ix86_tune = processor_alias_table[i].processor;
3861 }
3862 else
3863 error ("CPU you selected does not support x86-64 "
3864 "instruction set");
3865 }
3866 }
3867 /* Intel CPUs have always interpreted SSE prefetch instructions as
3868 NOPs; so, we can enable SSE prefetch instructions even when
3869 -mtune (rather than -march) points us to a processor that has them.
3870 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3871 higher processors. */
3872 if (TARGET_CMOV
3873 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3874 x86_prefetch_sse = true;
3875 break;
3876 }
3877
3878 if (ix86_tune_specified && i == pta_size)
3879 error ("bad value (%s) for %stune=%s %s",
3880 opts->x_ix86_tune_string, prefix, suffix, sw);
3881
3882 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3883
3884 #ifndef USE_IX86_FRAME_POINTER
3885 #define USE_IX86_FRAME_POINTER 0
3886 #endif
3887
3888 #ifndef USE_X86_64_FRAME_POINTER
3889 #define USE_X86_64_FRAME_POINTER 0
3890 #endif
3891
3892 /* Set the default values for switches whose default depends on TARGET_64BIT
3893 in case they weren't overwritten by command line options. */
3894 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3895 {
3896 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3897 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3898 if (opts->x_flag_asynchronous_unwind_tables
3899 && !opts_set->x_flag_unwind_tables
3900 && TARGET_64BIT_MS_ABI)
3901 opts->x_flag_unwind_tables = 1;
3902 if (opts->x_flag_asynchronous_unwind_tables == 2)
3903 opts->x_flag_unwind_tables
3904 = opts->x_flag_asynchronous_unwind_tables = 1;
3905 if (opts->x_flag_pcc_struct_return == 2)
3906 opts->x_flag_pcc_struct_return = 0;
3907 }
3908 else
3909 {
3910 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3911 opts->x_flag_omit_frame_pointer
3912 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3913 if (opts->x_flag_asynchronous_unwind_tables == 2)
3914 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3915 if (opts->x_flag_pcc_struct_return == 2)
3916 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3917 }
3918
3919 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3920 /* TODO: ix86_cost should be chosen at instruction or function granuality
3921 so for cold code we use size_cost even in !optimize_size compilation. */
3922 if (opts->x_optimize_size)
3923 ix86_cost = &ix86_size_cost;
3924 else
3925 ix86_cost = ix86_tune_cost;
3926
3927 /* Arrange to set up i386_stack_locals for all functions. */
3928 init_machine_status = ix86_init_machine_status;
3929
3930 /* Validate -mregparm= value. */
3931 if (opts_set->x_ix86_regparm)
3932 {
3933 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3934 warning (0, "-mregparm is ignored in 64-bit mode");
3935 if (opts->x_ix86_regparm > REGPARM_MAX)
3936 {
3937 error ("-mregparm=%d is not between 0 and %d",
3938 opts->x_ix86_regparm, REGPARM_MAX);
3939 opts->x_ix86_regparm = 0;
3940 }
3941 }
3942 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3943 opts->x_ix86_regparm = REGPARM_MAX;
3944
3945 /* Default align_* from the processor table. */
3946 ix86_default_align (opts);
3947
3948 /* Provide default for -mbranch-cost= value. */
3949 if (!opts_set->x_ix86_branch_cost)
3950 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3951
3952 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3953 {
3954 opts->x_target_flags
3955 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3956
3957 /* Enable by default the SSE and MMX builtins. Do allow the user to
3958 explicitly disable any of these. In particular, disabling SSE and
3959 MMX for kernel code is extremely useful. */
3960 if (!ix86_arch_specified)
3961 opts->x_ix86_isa_flags
3962 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3963 | TARGET_SUBTARGET64_ISA_DEFAULT)
3964 & ~opts->x_ix86_isa_flags_explicit);
3965
3966 if (TARGET_RTD_P (opts->x_target_flags))
3967 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3968 }
3969 else
3970 {
3971 opts->x_target_flags
3972 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3973
3974 if (!ix86_arch_specified)
3975 opts->x_ix86_isa_flags
3976 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3977
3978 /* i386 ABI does not specify red zone. It still makes sense to use it
3979 when programmer takes care to stack from being destroyed. */
3980 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3981 opts->x_target_flags |= MASK_NO_RED_ZONE;
3982 }
3983
3984 /* Keep nonleaf frame pointers. */
3985 if (opts->x_flag_omit_frame_pointer)
3986 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3987 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3988 opts->x_flag_omit_frame_pointer = 1;
3989
3990 /* If we're doing fast math, we don't care about comparison order
3991 wrt NaNs. This lets us use a shorter comparison sequence. */
3992 if (opts->x_flag_finite_math_only)
3993 opts->x_target_flags &= ~MASK_IEEE_FP;
3994
3995 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3996 since the insns won't need emulation. */
3997 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3998 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3999
4000 /* Likewise, if the target doesn't have a 387, or we've specified
4001 software floating point, don't use 387 inline intrinsics. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4004
4005 /* Turn on MMX builtins for -msse. */
4006 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4007 opts->x_ix86_isa_flags
4008 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4009
4010 /* Enable SSE prefetch. */
4011 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4012 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4013 x86_prefetch_sse = true;
4014
4015 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4016 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4017 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4018 opts->x_ix86_isa_flags
4019 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4020
4021 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4022 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4023 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4024 opts->x_ix86_isa_flags
4025 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4026
4027 /* Enable lzcnt instruction for -mabm. */
4028 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4029 opts->x_ix86_isa_flags
4030 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4031
4032 /* Validate -mpreferred-stack-boundary= value or default it to
4033 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4034 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4035 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4036 {
4037 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4038 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4039 int max = (TARGET_SEH ? 4 : 12);
4040
4041 if (opts->x_ix86_preferred_stack_boundary_arg < min
4042 || opts->x_ix86_preferred_stack_boundary_arg > max)
4043 {
4044 if (min == max)
4045 error ("-mpreferred-stack-boundary is not supported "
4046 "for this target");
4047 else
4048 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4049 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4050 }
4051 else
4052 ix86_preferred_stack_boundary
4053 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4054 }
4055
4056 /* Set the default value for -mstackrealign. */
4057 if (opts->x_ix86_force_align_arg_pointer == -1)
4058 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4059
4060 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4061
4062 /* Validate -mincoming-stack-boundary= value or default it to
4063 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4064 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4065 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4066 {
4067 if (opts->x_ix86_incoming_stack_boundary_arg
4068 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4069 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4070 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4071 opts->x_ix86_incoming_stack_boundary_arg,
4072 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4073 else
4074 {
4075 ix86_user_incoming_stack_boundary
4076 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4077 ix86_incoming_stack_boundary
4078 = ix86_user_incoming_stack_boundary;
4079 }
4080 }
4081
4082 #ifndef NO_PROFILE_COUNTERS
4083 if (flag_nop_mcount)
4084 error ("-mnop-mcount is not compatible with this target");
4085 #endif
4086 if (flag_nop_mcount && flag_pic)
4087 error ("-mnop-mcount is not implemented for -fPIC");
4088
4089 /* Accept -msseregparm only if at least SSE support is enabled. */
4090 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4091 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4093
4094 if (opts_set->x_ix86_fpmath)
4095 {
4096 if (opts->x_ix86_fpmath & FPMATH_SSE)
4097 {
4098 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4099 {
4100 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4101 opts->x_ix86_fpmath = FPMATH_387;
4102 }
4103 else if ((opts->x_ix86_fpmath & FPMATH_387)
4104 && !TARGET_80387_P (opts->x_target_flags))
4105 {
4106 warning (0, "387 instruction set disabled, using SSE arithmetics");
4107 opts->x_ix86_fpmath = FPMATH_SSE;
4108 }
4109 }
4110 }
4111 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4112 fpmath=387. The second is however default at many targets since the
4113 extra 80bit precision of temporaries is considered to be part of ABI.
4114 Overwrite the default at least for -ffast-math.
4115 TODO: -mfpmath=both seems to produce same performing code with bit
4116 smaller binaries. It is however not clear if register allocation is
4117 ready for this setting.
4118 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4119 codegen. We may switch to 387 with -ffast-math for size optimized
4120 functions. */
4121 else if (fast_math_flags_set_p (&global_options)
4122 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4123 opts->x_ix86_fpmath = FPMATH_SSE;
4124 else
4125 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4126
4127 /* If the i387 is disabled, then do not return values in it. */
4128 if (!TARGET_80387_P (opts->x_target_flags))
4129 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4130
4131 /* Use external vectorized library in vectorizing intrinsics. */
4132 if (opts_set->x_ix86_veclibabi_type)
4133 switch (opts->x_ix86_veclibabi_type)
4134 {
4135 case ix86_veclibabi_type_svml:
4136 ix86_veclib_handler = ix86_veclibabi_svml;
4137 break;
4138
4139 case ix86_veclibabi_type_acml:
4140 ix86_veclib_handler = ix86_veclibabi_acml;
4141 break;
4142
4143 default:
4144 gcc_unreachable ();
4145 }
4146
4147 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4148 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4149 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4150
4151 /* If stack probes are required, the space used for large function
4152 arguments on the stack must also be probed, so enable
4153 -maccumulate-outgoing-args so this happens in the prologue. */
4154 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4155 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4156 {
4157 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4158 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4159 "for correctness", prefix, suffix);
4160 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4161 }
4162
4163 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4164 {
4165 char *p;
4166 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4167 p = strchr (internal_label_prefix, 'X');
4168 internal_label_prefix_len = p - internal_label_prefix;
4169 *p = '\0';
4170 }
4171
4172 /* When scheduling description is not available, disable scheduler pass
4173 so it won't slow down the compilation and make x87 code slower. */
4174 if (!TARGET_SCHEDULE)
4175 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4176
4177 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4178 ix86_tune_cost->simultaneous_prefetches,
4179 opts->x_param_values,
4180 opts_set->x_param_values);
4181 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4182 ix86_tune_cost->prefetch_block,
4183 opts->x_param_values,
4184 opts_set->x_param_values);
4185 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4186 ix86_tune_cost->l1_cache_size,
4187 opts->x_param_values,
4188 opts_set->x_param_values);
4189 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4190 ix86_tune_cost->l2_cache_size,
4191 opts->x_param_values,
4192 opts_set->x_param_values);
4193
4194 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4195 if (opts->x_flag_prefetch_loop_arrays < 0
4196 && HAVE_prefetch
4197 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4198 && !opts->x_optimize_size
4199 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4200 opts->x_flag_prefetch_loop_arrays = 1;
4201
4202 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4203 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4204 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4205 targetm.expand_builtin_va_start = NULL;
4206
4207 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4208 {
4209 ix86_gen_leave = gen_leave_rex64;
4210 if (Pmode == DImode)
4211 {
4212 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4213 ix86_gen_tls_local_dynamic_base_64
4214 = gen_tls_local_dynamic_base_64_di;
4215 }
4216 else
4217 {
4218 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4219 ix86_gen_tls_local_dynamic_base_64
4220 = gen_tls_local_dynamic_base_64_si;
4221 }
4222 }
4223 else
4224 ix86_gen_leave = gen_leave;
4225
4226 if (Pmode == DImode)
4227 {
4228 ix86_gen_add3 = gen_adddi3;
4229 ix86_gen_sub3 = gen_subdi3;
4230 ix86_gen_sub3_carry = gen_subdi3_carry;
4231 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4232 ix86_gen_andsp = gen_anddi3;
4233 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4234 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4235 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4236 ix86_gen_monitor = gen_sse3_monitor_di;
4237 }
4238 else
4239 {
4240 ix86_gen_add3 = gen_addsi3;
4241 ix86_gen_sub3 = gen_subsi3;
4242 ix86_gen_sub3_carry = gen_subsi3_carry;
4243 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4244 ix86_gen_andsp = gen_andsi3;
4245 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4246 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4247 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4248 ix86_gen_monitor = gen_sse3_monitor_si;
4249 }
4250
4251 #ifdef USE_IX86_CLD
4252 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4253 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4254 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4255 #endif
4256
4257 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4258 {
4259 if (opts->x_flag_fentry > 0)
4260 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4261 "with -fpic");
4262 opts->x_flag_fentry = 0;
4263 }
4264 else if (TARGET_SEH)
4265 {
4266 if (opts->x_flag_fentry == 0)
4267 sorry ("-mno-fentry isn%'t compatible with SEH");
4268 opts->x_flag_fentry = 1;
4269 }
4270 else if (opts->x_flag_fentry < 0)
4271 {
4272 #if defined(PROFILE_BEFORE_PROLOGUE)
4273 opts->x_flag_fentry = 1;
4274 #else
4275 opts->x_flag_fentry = 0;
4276 #endif
4277 }
4278
4279 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4280 opts->x_target_flags |= MASK_VZEROUPPER;
4281 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4282 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4283 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4284 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4285 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4286 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4287 /* Enable 128-bit AVX instruction generation
4288 for the auto-vectorizer. */
4289 if (TARGET_AVX128_OPTIMAL
4290 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4291 opts->x_target_flags |= MASK_PREFER_AVX128;
4292
4293 if (opts->x_ix86_recip_name)
4294 {
4295 char *p = ASTRDUP (opts->x_ix86_recip_name);
4296 char *q;
4297 unsigned int mask, i;
4298 bool invert;
4299
4300 while ((q = strtok (p, ",")) != NULL)
4301 {
4302 p = NULL;
4303 if (*q == '!')
4304 {
4305 invert = true;
4306 q++;
4307 }
4308 else
4309 invert = false;
4310
4311 if (!strcmp (q, "default"))
4312 mask = RECIP_MASK_ALL;
4313 else
4314 {
4315 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4316 if (!strcmp (q, recip_options[i].string))
4317 {
4318 mask = recip_options[i].mask;
4319 break;
4320 }
4321
4322 if (i == ARRAY_SIZE (recip_options))
4323 {
4324 error ("unknown option for -mrecip=%s", q);
4325 invert = false;
4326 mask = RECIP_MASK_NONE;
4327 }
4328 }
4329
4330 opts->x_recip_mask_explicit |= mask;
4331 if (invert)
4332 opts->x_recip_mask &= ~mask;
4333 else
4334 opts->x_recip_mask |= mask;
4335 }
4336 }
4337
4338 if (TARGET_RECIP_P (opts->x_target_flags))
4339 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4340 else if (opts_set->x_target_flags & MASK_RECIP)
4341 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4342
4343 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4344 for 64-bit Bionic. */
4345 if (TARGET_HAS_BIONIC
4346 && !(opts_set->x_target_flags
4347 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4348 opts->x_target_flags |= (TARGET_64BIT
4349 ? MASK_LONG_DOUBLE_128
4350 : MASK_LONG_DOUBLE_64);
4351
4352 /* Only one of them can be active. */
4353 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4354 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4355
4356 /* Save the initial options in case the user does function specific
4357 options. */
4358 if (main_args_p)
4359 target_option_default_node = target_option_current_node
4360 = build_target_option_node (opts);
4361
4362 /* Handle stack protector */
4363 if (!opts_set->x_ix86_stack_protector_guard)
4364 opts->x_ix86_stack_protector_guard
4365 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4366
4367 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4368 if (opts->x_ix86_tune_memcpy_strategy)
4369 {
4370 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4371 ix86_parse_stringop_strategy_string (str, false);
4372 free (str);
4373 }
4374
4375 if (opts->x_ix86_tune_memset_strategy)
4376 {
4377 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4378 ix86_parse_stringop_strategy_string (str, true);
4379 free (str);
4380 }
4381 }
4382
4383 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4384
4385 static void
4386 ix86_option_override (void)
4387 {
4388 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4389 struct register_pass_info insert_vzeroupper_info
4390 = { pass_insert_vzeroupper, "reload",
4391 1, PASS_POS_INSERT_AFTER
4392 };
4393
4394 ix86_option_override_internal (true, &global_options, &global_options_set);
4395
4396
4397 /* This needs to be done at start up. It's convenient to do it here. */
4398 register_pass (&insert_vzeroupper_info);
4399 }
4400
4401 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4402 static char *
4403 ix86_offload_options (void)
4404 {
4405 if (TARGET_LP64)
4406 return xstrdup ("-foffload-abi=lp64");
4407 return xstrdup ("-foffload-abi=ilp32");
4408 }
4409
4410 /* Update register usage after having seen the compiler flags. */
4411
4412 static void
4413 ix86_conditional_register_usage (void)
4414 {
4415 int i, c_mask;
4416
4417 /* For 32-bit targets, squash the REX registers. */
4418 if (! TARGET_64BIT)
4419 {
4420 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4421 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4422 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4423 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4424 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4426 }
4427
4428 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4429 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4430 : TARGET_64BIT ? (1 << 2)
4431 : (1 << 1));
4432
4433 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4434
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4436 {
4437 /* Set/reset conditionally defined registers from
4438 CALL_USED_REGISTERS initializer. */
4439 if (call_used_regs[i] > 1)
4440 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4441
4442 /* Calculate registers of CLOBBERED_REGS register set
4443 as call used registers from GENERAL_REGS register set. */
4444 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4445 && call_used_regs[i])
4446 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4447 }
4448
4449 /* If MMX is disabled, squash the registers. */
4450 if (! TARGET_MMX)
4451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4452 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4453 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4454
4455 /* If SSE is disabled, squash the registers. */
4456 if (! TARGET_SSE)
4457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4458 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4459 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4460
4461 /* If the FPU is disabled, squash the registers. */
4462 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4463 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4464 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4465 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4466
4467 /* If AVX512F is disabled, squash the registers. */
4468 if (! TARGET_AVX512F)
4469 {
4470 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4471 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4472
4473 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4474 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4475 }
4476
4477 /* If MPX is disabled, squash the registers. */
4478 if (! TARGET_MPX)
4479 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4480 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4481 }
4482
4483 \f
4484 /* Save the current options */
4485
4486 static void
4487 ix86_function_specific_save (struct cl_target_option *ptr,
4488 struct gcc_options *opts)
4489 {
4490 ptr->arch = ix86_arch;
4491 ptr->schedule = ix86_schedule;
4492 ptr->prefetch_sse = x86_prefetch_sse;
4493 ptr->tune = ix86_tune;
4494 ptr->branch_cost = ix86_branch_cost;
4495 ptr->tune_defaulted = ix86_tune_defaulted;
4496 ptr->arch_specified = ix86_arch_specified;
4497 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4498 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4499 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4500 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4501 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4502 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4503 ptr->x_ix86_abi = opts->x_ix86_abi;
4504 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4505 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4506 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4507 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4508 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4509 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4510 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4511 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4512 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4513 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4514 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4515 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4516 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4517 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4518 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4519 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4520 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4521 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4522 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4523 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4524
4525 /* The fields are char but the variables are not; make sure the
4526 values fit in the fields. */
4527 gcc_assert (ptr->arch == ix86_arch);
4528 gcc_assert (ptr->schedule == ix86_schedule);
4529 gcc_assert (ptr->tune == ix86_tune);
4530 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4531 }
4532
4533 /* Restore the current options */
4534
4535 static void
4536 ix86_function_specific_restore (struct gcc_options *opts,
4537 struct cl_target_option *ptr)
4538 {
4539 enum processor_type old_tune = ix86_tune;
4540 enum processor_type old_arch = ix86_arch;
4541 unsigned int ix86_arch_mask;
4542 int i;
4543
4544 /* We don't change -fPIC. */
4545 opts->x_flag_pic = flag_pic;
4546
4547 ix86_arch = (enum processor_type) ptr->arch;
4548 ix86_schedule = (enum attr_cpu) ptr->schedule;
4549 ix86_tune = (enum processor_type) ptr->tune;
4550 x86_prefetch_sse = ptr->prefetch_sse;
4551 opts->x_ix86_branch_cost = ptr->branch_cost;
4552 ix86_tune_defaulted = ptr->tune_defaulted;
4553 ix86_arch_specified = ptr->arch_specified;
4554 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4555 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4556 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4557 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4558 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4559 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4560 opts->x_ix86_abi = ptr->x_ix86_abi;
4561 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4562 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4563 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4564 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4565 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4566 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4567 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4568 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4569 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4570 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4571 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4572 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4573 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4574 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4575 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4576 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4577 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4578 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4579 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4580 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4581 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4582 /* TODO: ix86_cost should be chosen at instruction or function granuality
4583 so for cold code we use size_cost even in !optimize_size compilation. */
4584 if (opts->x_optimize_size)
4585 ix86_cost = &ix86_size_cost;
4586 else
4587 ix86_cost = ix86_tune_cost;
4588
4589 /* Recreate the arch feature tests if the arch changed */
4590 if (old_arch != ix86_arch)
4591 {
4592 ix86_arch_mask = 1u << ix86_arch;
4593 for (i = 0; i < X86_ARCH_LAST; ++i)
4594 ix86_arch_features[i]
4595 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4596 }
4597
4598 /* Recreate the tune optimization tests */
4599 if (old_tune != ix86_tune)
4600 set_ix86_tune_features (ix86_tune, false);
4601 }
4602
4603 /* Adjust target options after streaming them in. This is mainly about
4604 reconciling them with global options. */
4605
4606 static void
4607 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4608 {
4609 /* flag_pic is a global option, but ix86_cmodel is target saved option
4610 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4611 for PIC, or error out. */
4612 if (flag_pic)
4613 switch (ptr->x_ix86_cmodel)
4614 {
4615 case CM_SMALL:
4616 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4617 break;
4618
4619 case CM_MEDIUM:
4620 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4621 break;
4622
4623 case CM_LARGE:
4624 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4625 break;
4626
4627 case CM_KERNEL:
4628 error ("code model %s does not support PIC mode", "kernel");
4629 break;
4630
4631 default:
4632 break;
4633 }
4634 else
4635 switch (ptr->x_ix86_cmodel)
4636 {
4637 case CM_SMALL_PIC:
4638 ptr->x_ix86_cmodel = CM_SMALL;
4639 break;
4640
4641 case CM_MEDIUM_PIC:
4642 ptr->x_ix86_cmodel = CM_MEDIUM;
4643 break;
4644
4645 case CM_LARGE_PIC:
4646 ptr->x_ix86_cmodel = CM_LARGE;
4647 break;
4648
4649 default:
4650 break;
4651 }
4652 }
4653
4654 /* Print the current options */
4655
4656 static void
4657 ix86_function_specific_print (FILE *file, int indent,
4658 struct cl_target_option *ptr)
4659 {
4660 char *target_string
4661 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4662 NULL, NULL, ptr->x_ix86_fpmath, false);
4663
4664 gcc_assert (ptr->arch < PROCESSOR_max);
4665 fprintf (file, "%*sarch = %d (%s)\n",
4666 indent, "",
4667 ptr->arch, processor_target_table[ptr->arch].name);
4668
4669 gcc_assert (ptr->tune < PROCESSOR_max);
4670 fprintf (file, "%*stune = %d (%s)\n",
4671 indent, "",
4672 ptr->tune, processor_target_table[ptr->tune].name);
4673
4674 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4675
4676 if (target_string)
4677 {
4678 fprintf (file, "%*s%s\n", indent, "", target_string);
4679 free (target_string);
4680 }
4681 }
4682
4683 \f
4684 /* Inner function to process the attribute((target(...))), take an argument and
4685 set the current options from the argument. If we have a list, recursively go
4686 over the list. */
4687
4688 static bool
4689 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4690 struct gcc_options *opts,
4691 struct gcc_options *opts_set,
4692 struct gcc_options *enum_opts_set)
4693 {
4694 char *next_optstr;
4695 bool ret = true;
4696
4697 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4698 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4699 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4700 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4701 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4702
4703 enum ix86_opt_type
4704 {
4705 ix86_opt_unknown,
4706 ix86_opt_yes,
4707 ix86_opt_no,
4708 ix86_opt_str,
4709 ix86_opt_enum,
4710 ix86_opt_isa
4711 };
4712
4713 static const struct
4714 {
4715 const char *string;
4716 size_t len;
4717 enum ix86_opt_type type;
4718 int opt;
4719 int mask;
4720 } attrs[] = {
4721 /* isa options */
4722 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4723 IX86_ATTR_ISA ("abm", OPT_mabm),
4724 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4725 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4726 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4727 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4728 IX86_ATTR_ISA ("aes", OPT_maes),
4729 IX86_ATTR_ISA ("sha", OPT_msha),
4730 IX86_ATTR_ISA ("avx", OPT_mavx),
4731 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4732 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4733 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4734 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4735 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4736 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4737 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4738 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4739 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4740 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4741 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4742 IX86_ATTR_ISA ("sse", OPT_msse),
4743 IX86_ATTR_ISA ("sse2", OPT_msse2),
4744 IX86_ATTR_ISA ("sse3", OPT_msse3),
4745 IX86_ATTR_ISA ("sse4", OPT_msse4),
4746 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4747 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4748 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4749 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4750 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4751 IX86_ATTR_ISA ("fma", OPT_mfma),
4752 IX86_ATTR_ISA ("xop", OPT_mxop),
4753 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4754 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4755 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4756 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4757 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4758 IX86_ATTR_ISA ("hle", OPT_mhle),
4759 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4760 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4761 IX86_ATTR_ISA ("adx", OPT_madx),
4762 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4763 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4764 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4765 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4766 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4767 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4768 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4769 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4770 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4771 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4772 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4773
4774 /* enum options */
4775 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4776
4777 /* string options */
4778 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4779 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4780
4781 /* flag options */
4782 IX86_ATTR_YES ("cld",
4783 OPT_mcld,
4784 MASK_CLD),
4785
4786 IX86_ATTR_NO ("fancy-math-387",
4787 OPT_mfancy_math_387,
4788 MASK_NO_FANCY_MATH_387),
4789
4790 IX86_ATTR_YES ("ieee-fp",
4791 OPT_mieee_fp,
4792 MASK_IEEE_FP),
4793
4794 IX86_ATTR_YES ("inline-all-stringops",
4795 OPT_minline_all_stringops,
4796 MASK_INLINE_ALL_STRINGOPS),
4797
4798 IX86_ATTR_YES ("inline-stringops-dynamically",
4799 OPT_minline_stringops_dynamically,
4800 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4801
4802 IX86_ATTR_NO ("align-stringops",
4803 OPT_mno_align_stringops,
4804 MASK_NO_ALIGN_STRINGOPS),
4805
4806 IX86_ATTR_YES ("recip",
4807 OPT_mrecip,
4808 MASK_RECIP),
4809
4810 };
4811
4812 /* If this is a list, recurse to get the options. */
4813 if (TREE_CODE (args) == TREE_LIST)
4814 {
4815 bool ret = true;
4816
4817 for (; args; args = TREE_CHAIN (args))
4818 if (TREE_VALUE (args)
4819 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4820 p_strings, opts, opts_set,
4821 enum_opts_set))
4822 ret = false;
4823
4824 return ret;
4825 }
4826
4827 else if (TREE_CODE (args) != STRING_CST)
4828 {
4829 error ("attribute %<target%> argument not a string");
4830 return false;
4831 }
4832
4833 /* Handle multiple arguments separated by commas. */
4834 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4835
4836 while (next_optstr && *next_optstr != '\0')
4837 {
4838 char *p = next_optstr;
4839 char *orig_p = p;
4840 char *comma = strchr (next_optstr, ',');
4841 const char *opt_string;
4842 size_t len, opt_len;
4843 int opt;
4844 bool opt_set_p;
4845 char ch;
4846 unsigned i;
4847 enum ix86_opt_type type = ix86_opt_unknown;
4848 int mask = 0;
4849
4850 if (comma)
4851 {
4852 *comma = '\0';
4853 len = comma - next_optstr;
4854 next_optstr = comma + 1;
4855 }
4856 else
4857 {
4858 len = strlen (p);
4859 next_optstr = NULL;
4860 }
4861
4862 /* Recognize no-xxx. */
4863 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4864 {
4865 opt_set_p = false;
4866 p += 3;
4867 len -= 3;
4868 }
4869 else
4870 opt_set_p = true;
4871
4872 /* Find the option. */
4873 ch = *p;
4874 opt = N_OPTS;
4875 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4876 {
4877 type = attrs[i].type;
4878 opt_len = attrs[i].len;
4879 if (ch == attrs[i].string[0]
4880 && ((type != ix86_opt_str && type != ix86_opt_enum)
4881 ? len == opt_len
4882 : len > opt_len)
4883 && memcmp (p, attrs[i].string, opt_len) == 0)
4884 {
4885 opt = attrs[i].opt;
4886 mask = attrs[i].mask;
4887 opt_string = attrs[i].string;
4888 break;
4889 }
4890 }
4891
4892 /* Process the option. */
4893 if (opt == N_OPTS)
4894 {
4895 error ("attribute(target(\"%s\")) is unknown", orig_p);
4896 ret = false;
4897 }
4898
4899 else if (type == ix86_opt_isa)
4900 {
4901 struct cl_decoded_option decoded;
4902
4903 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4904 ix86_handle_option (opts, opts_set,
4905 &decoded, input_location);
4906 }
4907
4908 else if (type == ix86_opt_yes || type == ix86_opt_no)
4909 {
4910 if (type == ix86_opt_no)
4911 opt_set_p = !opt_set_p;
4912
4913 if (opt_set_p)
4914 opts->x_target_flags |= mask;
4915 else
4916 opts->x_target_flags &= ~mask;
4917 }
4918
4919 else if (type == ix86_opt_str)
4920 {
4921 if (p_strings[opt])
4922 {
4923 error ("option(\"%s\") was already specified", opt_string);
4924 ret = false;
4925 }
4926 else
4927 p_strings[opt] = xstrdup (p + opt_len);
4928 }
4929
4930 else if (type == ix86_opt_enum)
4931 {
4932 bool arg_ok;
4933 int value;
4934
4935 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4936 if (arg_ok)
4937 set_option (opts, enum_opts_set, opt, value,
4938 p + opt_len, DK_UNSPECIFIED, input_location,
4939 global_dc);
4940 else
4941 {
4942 error ("attribute(target(\"%s\")) is unknown", orig_p);
4943 ret = false;
4944 }
4945 }
4946
4947 else
4948 gcc_unreachable ();
4949 }
4950
4951 return ret;
4952 }
4953
4954 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4955
4956 tree
4957 ix86_valid_target_attribute_tree (tree args,
4958 struct gcc_options *opts,
4959 struct gcc_options *opts_set)
4960 {
4961 const char *orig_arch_string = opts->x_ix86_arch_string;
4962 const char *orig_tune_string = opts->x_ix86_tune_string;
4963 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4964 int orig_tune_defaulted = ix86_tune_defaulted;
4965 int orig_arch_specified = ix86_arch_specified;
4966 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4967 tree t = NULL_TREE;
4968 int i;
4969 struct cl_target_option *def
4970 = TREE_TARGET_OPTION (target_option_default_node);
4971 struct gcc_options enum_opts_set;
4972
4973 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4974
4975 /* Process each of the options on the chain. */
4976 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4977 opts_set, &enum_opts_set))
4978 return error_mark_node;
4979
4980 /* If the changed options are different from the default, rerun
4981 ix86_option_override_internal, and then save the options away.
4982 The string options are are attribute options, and will be undone
4983 when we copy the save structure. */
4984 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4985 || opts->x_target_flags != def->x_target_flags
4986 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4987 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4988 || enum_opts_set.x_ix86_fpmath)
4989 {
4990 /* If we are using the default tune= or arch=, undo the string assigned,
4991 and use the default. */
4992 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4993 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4994 else if (!orig_arch_specified)
4995 opts->x_ix86_arch_string = NULL;
4996
4997 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4998 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4999 else if (orig_tune_defaulted)
5000 opts->x_ix86_tune_string = NULL;
5001
5002 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5003 if (enum_opts_set.x_ix86_fpmath)
5004 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5005 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5006 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5007 {
5008 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5009 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5010 }
5011
5012 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5013 ix86_option_override_internal (false, opts, opts_set);
5014
5015 /* Add any builtin functions with the new isa if any. */
5016 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5017
5018 /* Save the current options unless we are validating options for
5019 #pragma. */
5020 t = build_target_option_node (opts);
5021
5022 opts->x_ix86_arch_string = orig_arch_string;
5023 opts->x_ix86_tune_string = orig_tune_string;
5024 opts_set->x_ix86_fpmath = orig_fpmath_set;
5025
5026 /* Free up memory allocated to hold the strings */
5027 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5028 free (option_strings[i]);
5029 }
5030
5031 return t;
5032 }
5033
5034 /* Hook to validate attribute((target("string"))). */
5035
5036 static bool
5037 ix86_valid_target_attribute_p (tree fndecl,
5038 tree ARG_UNUSED (name),
5039 tree args,
5040 int ARG_UNUSED (flags))
5041 {
5042 struct gcc_options func_options;
5043 tree new_target, new_optimize;
5044 bool ret = true;
5045
5046 /* attribute((target("default"))) does nothing, beyond
5047 affecting multi-versioning. */
5048 if (TREE_VALUE (args)
5049 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5050 && TREE_CHAIN (args) == NULL_TREE
5051 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5052 return true;
5053
5054 tree old_optimize = build_optimization_node (&global_options);
5055
5056 /* Get the optimization options of the current function. */
5057 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5058
5059 if (!func_optimize)
5060 func_optimize = old_optimize;
5061
5062 /* Init func_options. */
5063 memset (&func_options, 0, sizeof (func_options));
5064 init_options_struct (&func_options, NULL);
5065 lang_hooks.init_options_struct (&func_options);
5066
5067 cl_optimization_restore (&func_options,
5068 TREE_OPTIMIZATION (func_optimize));
5069
5070 /* Initialize func_options to the default before its target options can
5071 be set. */
5072 cl_target_option_restore (&func_options,
5073 TREE_TARGET_OPTION (target_option_default_node));
5074
5075 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5076 &global_options_set);
5077
5078 new_optimize = build_optimization_node (&func_options);
5079
5080 if (new_target == error_mark_node)
5081 ret = false;
5082
5083 else if (fndecl && new_target)
5084 {
5085 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5086
5087 if (old_optimize != new_optimize)
5088 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5089 }
5090
5091 return ret;
5092 }
5093
5094 \f
5095 /* Hook to determine if one function can safely inline another. */
5096
5097 static bool
5098 ix86_can_inline_p (tree caller, tree callee)
5099 {
5100 bool ret = false;
5101 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5102 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5103
5104 /* If callee has no option attributes, then it is ok to inline. */
5105 if (!callee_tree)
5106 ret = true;
5107
5108 /* If caller has no option attributes, but callee does then it is not ok to
5109 inline. */
5110 else if (!caller_tree)
5111 ret = false;
5112
5113 else
5114 {
5115 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5116 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5117
5118 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5119 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5120 function. */
5121 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5122 != callee_opts->x_ix86_isa_flags)
5123 ret = false;
5124
5125 /* See if we have the same non-isa options. */
5126 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5127 ret = false;
5128
5129 /* See if arch, tune, etc. are the same. */
5130 else if (caller_opts->arch != callee_opts->arch)
5131 ret = false;
5132
5133 else if (caller_opts->tune != callee_opts->tune)
5134 ret = false;
5135
5136 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5137 ret = false;
5138
5139 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5140 ret = false;
5141
5142 else
5143 ret = true;
5144 }
5145
5146 return ret;
5147 }
5148
5149 \f
5150 /* Remember the last target of ix86_set_current_function. */
5151 static GTY(()) tree ix86_previous_fndecl;
5152
5153 /* Set targets globals to the default (or current #pragma GCC target
5154 if active). Invalidate ix86_previous_fndecl cache. */
5155
5156 void
5157 ix86_reset_previous_fndecl (void)
5158 {
5159 tree new_tree = target_option_current_node;
5160 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5161 if (TREE_TARGET_GLOBALS (new_tree))
5162 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5163 else if (new_tree == target_option_default_node)
5164 restore_target_globals (&default_target_globals);
5165 else
5166 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5167 ix86_previous_fndecl = NULL_TREE;
5168 }
5169
5170 /* Establish appropriate back-end context for processing the function
5171 FNDECL. The argument might be NULL to indicate processing at top
5172 level, outside of any function scope. */
5173 static void
5174 ix86_set_current_function (tree fndecl)
5175 {
5176 /* Only change the context if the function changes. This hook is called
5177 several times in the course of compiling a function, and we don't want to
5178 slow things down too much or call target_reinit when it isn't safe. */
5179 if (fndecl == ix86_previous_fndecl)
5180 return;
5181
5182 tree old_tree;
5183 if (ix86_previous_fndecl == NULL_TREE)
5184 old_tree = target_option_current_node;
5185 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5186 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5187 else
5188 old_tree = target_option_default_node;
5189
5190 if (fndecl == NULL_TREE)
5191 {
5192 if (old_tree != target_option_current_node)
5193 ix86_reset_previous_fndecl ();
5194 return;
5195 }
5196
5197 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5198 if (new_tree == NULL_TREE)
5199 new_tree = target_option_default_node;
5200
5201 if (old_tree != new_tree)
5202 {
5203 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5204 if (TREE_TARGET_GLOBALS (new_tree))
5205 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5206 else if (new_tree == target_option_default_node)
5207 restore_target_globals (&default_target_globals);
5208 else
5209 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5210 }
5211 ix86_previous_fndecl = fndecl;
5212 }
5213
5214 \f
5215 /* Return true if this goes in large data/bss. */
5216
5217 static bool
5218 ix86_in_large_data_p (tree exp)
5219 {
5220 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5221 return false;
5222
5223 /* Functions are never large data. */
5224 if (TREE_CODE (exp) == FUNCTION_DECL)
5225 return false;
5226
5227 /* Automatic variables are never large data. */
5228 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5229 return false;
5230
5231 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5232 {
5233 const char *section = DECL_SECTION_NAME (exp);
5234 if (strcmp (section, ".ldata") == 0
5235 || strcmp (section, ".lbss") == 0)
5236 return true;
5237 return false;
5238 }
5239 else
5240 {
5241 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5242
5243 /* If this is an incomplete type with size 0, then we can't put it
5244 in data because it might be too big when completed. Also,
5245 int_size_in_bytes returns -1 if size can vary or is larger than
5246 an integer in which case also it is safer to assume that it goes in
5247 large data. */
5248 if (size <= 0 || size > ix86_section_threshold)
5249 return true;
5250 }
5251
5252 return false;
5253 }
5254
5255 /* Switch to the appropriate section for output of DECL.
5256 DECL is either a `VAR_DECL' node or a constant of some sort.
5257 RELOC indicates whether forming the initial value of DECL requires
5258 link-time relocations. */
5259
5260 ATTRIBUTE_UNUSED static section *
5261 x86_64_elf_select_section (tree decl, int reloc,
5262 unsigned HOST_WIDE_INT align)
5263 {
5264 if (ix86_in_large_data_p (decl))
5265 {
5266 const char *sname = NULL;
5267 unsigned int flags = SECTION_WRITE;
5268 switch (categorize_decl_for_section (decl, reloc))
5269 {
5270 case SECCAT_DATA:
5271 sname = ".ldata";
5272 break;
5273 case SECCAT_DATA_REL:
5274 sname = ".ldata.rel";
5275 break;
5276 case SECCAT_DATA_REL_LOCAL:
5277 sname = ".ldata.rel.local";
5278 break;
5279 case SECCAT_DATA_REL_RO:
5280 sname = ".ldata.rel.ro";
5281 break;
5282 case SECCAT_DATA_REL_RO_LOCAL:
5283 sname = ".ldata.rel.ro.local";
5284 break;
5285 case SECCAT_BSS:
5286 sname = ".lbss";
5287 flags |= SECTION_BSS;
5288 break;
5289 case SECCAT_RODATA:
5290 case SECCAT_RODATA_MERGE_STR:
5291 case SECCAT_RODATA_MERGE_STR_INIT:
5292 case SECCAT_RODATA_MERGE_CONST:
5293 sname = ".lrodata";
5294 flags = 0;
5295 break;
5296 case SECCAT_SRODATA:
5297 case SECCAT_SDATA:
5298 case SECCAT_SBSS:
5299 gcc_unreachable ();
5300 case SECCAT_TEXT:
5301 case SECCAT_TDATA:
5302 case SECCAT_TBSS:
5303 /* We don't split these for medium model. Place them into
5304 default sections and hope for best. */
5305 break;
5306 }
5307 if (sname)
5308 {
5309 /* We might get called with string constants, but get_named_section
5310 doesn't like them as they are not DECLs. Also, we need to set
5311 flags in that case. */
5312 if (!DECL_P (decl))
5313 return get_section (sname, flags, NULL);
5314 return get_named_section (decl, sname, reloc);
5315 }
5316 }
5317 return default_elf_select_section (decl, reloc, align);
5318 }
5319
5320 /* Select a set of attributes for section NAME based on the properties
5321 of DECL and whether or not RELOC indicates that DECL's initializer
5322 might contain runtime relocations. */
5323
5324 static unsigned int ATTRIBUTE_UNUSED
5325 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5326 {
5327 unsigned int flags = default_section_type_flags (decl, name, reloc);
5328
5329 if (decl == NULL_TREE
5330 && (strcmp (name, ".ldata.rel.ro") == 0
5331 || strcmp (name, ".ldata.rel.ro.local") == 0))
5332 flags |= SECTION_RELRO;
5333
5334 if (strcmp (name, ".lbss") == 0
5335 || strncmp (name, ".lbss.", 5) == 0
5336 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5337 flags |= SECTION_BSS;
5338
5339 return flags;
5340 }
5341
5342 /* Build up a unique section name, expressed as a
5343 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5344 RELOC indicates whether the initial value of EXP requires
5345 link-time relocations. */
5346
5347 static void ATTRIBUTE_UNUSED
5348 x86_64_elf_unique_section (tree decl, int reloc)
5349 {
5350 if (ix86_in_large_data_p (decl))
5351 {
5352 const char *prefix = NULL;
5353 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5354 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5355
5356 switch (categorize_decl_for_section (decl, reloc))
5357 {
5358 case SECCAT_DATA:
5359 case SECCAT_DATA_REL:
5360 case SECCAT_DATA_REL_LOCAL:
5361 case SECCAT_DATA_REL_RO:
5362 case SECCAT_DATA_REL_RO_LOCAL:
5363 prefix = one_only ? ".ld" : ".ldata";
5364 break;
5365 case SECCAT_BSS:
5366 prefix = one_only ? ".lb" : ".lbss";
5367 break;
5368 case SECCAT_RODATA:
5369 case SECCAT_RODATA_MERGE_STR:
5370 case SECCAT_RODATA_MERGE_STR_INIT:
5371 case SECCAT_RODATA_MERGE_CONST:
5372 prefix = one_only ? ".lr" : ".lrodata";
5373 break;
5374 case SECCAT_SRODATA:
5375 case SECCAT_SDATA:
5376 case SECCAT_SBSS:
5377 gcc_unreachable ();
5378 case SECCAT_TEXT:
5379 case SECCAT_TDATA:
5380 case SECCAT_TBSS:
5381 /* We don't split these for medium model. Place them into
5382 default sections and hope for best. */
5383 break;
5384 }
5385 if (prefix)
5386 {
5387 const char *name, *linkonce;
5388 char *string;
5389
5390 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5391 name = targetm.strip_name_encoding (name);
5392
5393 /* If we're using one_only, then there needs to be a .gnu.linkonce
5394 prefix to the section name. */
5395 linkonce = one_only ? ".gnu.linkonce" : "";
5396
5397 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5398
5399 set_decl_section_name (decl, string);
5400 return;
5401 }
5402 }
5403 default_unique_section (decl, reloc);
5404 }
5405
5406 #ifdef COMMON_ASM_OP
5407 /* This says how to output assembler code to declare an
5408 uninitialized external linkage data object.
5409
5410 For medium model x86-64 we need to use .largecomm opcode for
5411 large objects. */
5412 void
5413 x86_elf_aligned_common (FILE *file,
5414 const char *name, unsigned HOST_WIDE_INT size,
5415 int align)
5416 {
5417 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5418 && size > (unsigned int)ix86_section_threshold)
5419 fputs ("\t.largecomm\t", file);
5420 else
5421 fputs (COMMON_ASM_OP, file);
5422 assemble_name (file, name);
5423 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5424 size, align / BITS_PER_UNIT);
5425 }
5426 #endif
5427
5428 /* Utility function for targets to use in implementing
5429 ASM_OUTPUT_ALIGNED_BSS. */
5430
5431 void
5432 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5433 unsigned HOST_WIDE_INT size, int align)
5434 {
5435 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5436 && size > (unsigned int)ix86_section_threshold)
5437 switch_to_section (get_named_section (decl, ".lbss", 0));
5438 else
5439 switch_to_section (bss_section);
5440 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5441 #ifdef ASM_DECLARE_OBJECT_NAME
5442 last_assemble_variable_decl = decl;
5443 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5444 #else
5445 /* Standard thing is just output label for the object. */
5446 ASM_OUTPUT_LABEL (file, name);
5447 #endif /* ASM_DECLARE_OBJECT_NAME */
5448 ASM_OUTPUT_SKIP (file, size ? size : 1);
5449 }
5450 \f
5451 /* Decide whether we must probe the stack before any space allocation
5452 on this target. It's essentially TARGET_STACK_PROBE except when
5453 -fstack-check causes the stack to be already probed differently. */
5454
5455 bool
5456 ix86_target_stack_probe (void)
5457 {
5458 /* Do not probe the stack twice if static stack checking is enabled. */
5459 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5460 return false;
5461
5462 return TARGET_STACK_PROBE;
5463 }
5464 \f
5465 /* Decide whether we can make a sibling call to a function. DECL is the
5466 declaration of the function being targeted by the call and EXP is the
5467 CALL_EXPR representing the call. */
5468
5469 static bool
5470 ix86_function_ok_for_sibcall (tree decl, tree exp)
5471 {
5472 tree type, decl_or_type;
5473 rtx a, b;
5474
5475 /* If we are generating position-independent code, we cannot sibcall
5476 optimize direct calls to global functions, as the PLT requires
5477 %ebx be live. (Darwin does not have a PLT.) */
5478 if (!TARGET_MACHO
5479 && !TARGET_64BIT
5480 && flag_pic
5481 && decl && !targetm.binds_local_p (decl))
5482 return false;
5483
5484 /* If we need to align the outgoing stack, then sibcalling would
5485 unalign the stack, which may break the called function. */
5486 if (ix86_minimum_incoming_stack_boundary (true)
5487 < PREFERRED_STACK_BOUNDARY)
5488 return false;
5489
5490 if (decl)
5491 {
5492 decl_or_type = decl;
5493 type = TREE_TYPE (decl);
5494 }
5495 else
5496 {
5497 /* We're looking at the CALL_EXPR, we need the type of the function. */
5498 type = CALL_EXPR_FN (exp); /* pointer expression */
5499 type = TREE_TYPE (type); /* pointer type */
5500 type = TREE_TYPE (type); /* function type */
5501 decl_or_type = type;
5502 }
5503
5504 /* Check that the return value locations are the same. Like
5505 if we are returning floats on the 80387 register stack, we cannot
5506 make a sibcall from a function that doesn't return a float to a
5507 function that does or, conversely, from a function that does return
5508 a float to a function that doesn't; the necessary stack adjustment
5509 would not be executed. This is also the place we notice
5510 differences in the return value ABI. Note that it is ok for one
5511 of the functions to have void return type as long as the return
5512 value of the other is passed in a register. */
5513 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5514 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5515 cfun->decl, false);
5516 if (STACK_REG_P (a) || STACK_REG_P (b))
5517 {
5518 if (!rtx_equal_p (a, b))
5519 return false;
5520 }
5521 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5522 ;
5523 else if (!rtx_equal_p (a, b))
5524 return false;
5525
5526 if (TARGET_64BIT)
5527 {
5528 /* The SYSV ABI has more call-clobbered registers;
5529 disallow sibcalls from MS to SYSV. */
5530 if (cfun->machine->call_abi == MS_ABI
5531 && ix86_function_type_abi (type) == SYSV_ABI)
5532 return false;
5533 }
5534 else
5535 {
5536 /* If this call is indirect, we'll need to be able to use a
5537 call-clobbered register for the address of the target function.
5538 Make sure that all such registers are not used for passing
5539 parameters. Note that DLLIMPORT functions are indirect. */
5540 if (!decl
5541 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5542 {
5543 if (ix86_function_regparm (type, NULL) >= 3)
5544 {
5545 /* ??? Need to count the actual number of registers to be used,
5546 not the possible number of registers. Fix later. */
5547 return false;
5548 }
5549 }
5550 }
5551
5552 /* Otherwise okay. That also includes certain types of indirect calls. */
5553 return true;
5554 }
5555
5556 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5557 and "sseregparm" calling convention attributes;
5558 arguments as in struct attribute_spec.handler. */
5559
5560 static tree
5561 ix86_handle_cconv_attribute (tree *node, tree name,
5562 tree args,
5563 int,
5564 bool *no_add_attrs)
5565 {
5566 if (TREE_CODE (*node) != FUNCTION_TYPE
5567 && TREE_CODE (*node) != METHOD_TYPE
5568 && TREE_CODE (*node) != FIELD_DECL
5569 && TREE_CODE (*node) != TYPE_DECL)
5570 {
5571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5572 name);
5573 *no_add_attrs = true;
5574 return NULL_TREE;
5575 }
5576
5577 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5578 if (is_attribute_p ("regparm", name))
5579 {
5580 tree cst;
5581
5582 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5583 {
5584 error ("fastcall and regparm attributes are not compatible");
5585 }
5586
5587 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5588 {
5589 error ("regparam and thiscall attributes are not compatible");
5590 }
5591
5592 cst = TREE_VALUE (args);
5593 if (TREE_CODE (cst) != INTEGER_CST)
5594 {
5595 warning (OPT_Wattributes,
5596 "%qE attribute requires an integer constant argument",
5597 name);
5598 *no_add_attrs = true;
5599 }
5600 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5601 {
5602 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5603 name, REGPARM_MAX);
5604 *no_add_attrs = true;
5605 }
5606
5607 return NULL_TREE;
5608 }
5609
5610 if (TARGET_64BIT)
5611 {
5612 /* Do not warn when emulating the MS ABI. */
5613 if ((TREE_CODE (*node) != FUNCTION_TYPE
5614 && TREE_CODE (*node) != METHOD_TYPE)
5615 || ix86_function_type_abi (*node) != MS_ABI)
5616 warning (OPT_Wattributes, "%qE attribute ignored",
5617 name);
5618 *no_add_attrs = true;
5619 return NULL_TREE;
5620 }
5621
5622 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5623 if (is_attribute_p ("fastcall", name))
5624 {
5625 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5626 {
5627 error ("fastcall and cdecl attributes are not compatible");
5628 }
5629 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5630 {
5631 error ("fastcall and stdcall attributes are not compatible");
5632 }
5633 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5634 {
5635 error ("fastcall and regparm attributes are not compatible");
5636 }
5637 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5638 {
5639 error ("fastcall and thiscall attributes are not compatible");
5640 }
5641 }
5642
5643 /* Can combine stdcall with fastcall (redundant), regparm and
5644 sseregparm. */
5645 else if (is_attribute_p ("stdcall", name))
5646 {
5647 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5648 {
5649 error ("stdcall and cdecl attributes are not compatible");
5650 }
5651 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5652 {
5653 error ("stdcall and fastcall attributes are not compatible");
5654 }
5655 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5656 {
5657 error ("stdcall and thiscall attributes are not compatible");
5658 }
5659 }
5660
5661 /* Can combine cdecl with regparm and sseregparm. */
5662 else if (is_attribute_p ("cdecl", name))
5663 {
5664 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5665 {
5666 error ("stdcall and cdecl attributes are not compatible");
5667 }
5668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5669 {
5670 error ("fastcall and cdecl attributes are not compatible");
5671 }
5672 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5673 {
5674 error ("cdecl and thiscall attributes are not compatible");
5675 }
5676 }
5677 else if (is_attribute_p ("thiscall", name))
5678 {
5679 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5680 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5681 name);
5682 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5683 {
5684 error ("stdcall and thiscall attributes are not compatible");
5685 }
5686 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5687 {
5688 error ("fastcall and thiscall attributes are not compatible");
5689 }
5690 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5691 {
5692 error ("cdecl and thiscall attributes are not compatible");
5693 }
5694 }
5695
5696 /* Can combine sseregparm with all attributes. */
5697
5698 return NULL_TREE;
5699 }
5700
5701 /* The transactional memory builtins are implicitly regparm or fastcall
5702 depending on the ABI. Override the generic do-nothing attribute that
5703 these builtins were declared with, and replace it with one of the two
5704 attributes that we expect elsewhere. */
5705
5706 static tree
5707 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5708 int flags, bool *no_add_attrs)
5709 {
5710 tree alt;
5711
5712 /* In no case do we want to add the placeholder attribute. */
5713 *no_add_attrs = true;
5714
5715 /* The 64-bit ABI is unchanged for transactional memory. */
5716 if (TARGET_64BIT)
5717 return NULL_TREE;
5718
5719 /* ??? Is there a better way to validate 32-bit windows? We have
5720 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5721 if (CHECK_STACK_LIMIT > 0)
5722 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5723 else
5724 {
5725 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5726 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5727 }
5728 decl_attributes (node, alt, flags);
5729
5730 return NULL_TREE;
5731 }
5732
5733 /* This function determines from TYPE the calling-convention. */
5734
5735 unsigned int
5736 ix86_get_callcvt (const_tree type)
5737 {
5738 unsigned int ret = 0;
5739 bool is_stdarg;
5740 tree attrs;
5741
5742 if (TARGET_64BIT)
5743 return IX86_CALLCVT_CDECL;
5744
5745 attrs = TYPE_ATTRIBUTES (type);
5746 if (attrs != NULL_TREE)
5747 {
5748 if (lookup_attribute ("cdecl", attrs))
5749 ret |= IX86_CALLCVT_CDECL;
5750 else if (lookup_attribute ("stdcall", attrs))
5751 ret |= IX86_CALLCVT_STDCALL;
5752 else if (lookup_attribute ("fastcall", attrs))
5753 ret |= IX86_CALLCVT_FASTCALL;
5754 else if (lookup_attribute ("thiscall", attrs))
5755 ret |= IX86_CALLCVT_THISCALL;
5756
5757 /* Regparam isn't allowed for thiscall and fastcall. */
5758 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5759 {
5760 if (lookup_attribute ("regparm", attrs))
5761 ret |= IX86_CALLCVT_REGPARM;
5762 if (lookup_attribute ("sseregparm", attrs))
5763 ret |= IX86_CALLCVT_SSEREGPARM;
5764 }
5765
5766 if (IX86_BASE_CALLCVT(ret) != 0)
5767 return ret;
5768 }
5769
5770 is_stdarg = stdarg_p (type);
5771 if (TARGET_RTD && !is_stdarg)
5772 return IX86_CALLCVT_STDCALL | ret;
5773
5774 if (ret != 0
5775 || is_stdarg
5776 || TREE_CODE (type) != METHOD_TYPE
5777 || ix86_function_type_abi (type) != MS_ABI)
5778 return IX86_CALLCVT_CDECL | ret;
5779
5780 return IX86_CALLCVT_THISCALL;
5781 }
5782
5783 /* Return 0 if the attributes for two types are incompatible, 1 if they
5784 are compatible, and 2 if they are nearly compatible (which causes a
5785 warning to be generated). */
5786
5787 static int
5788 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5789 {
5790 unsigned int ccvt1, ccvt2;
5791
5792 if (TREE_CODE (type1) != FUNCTION_TYPE
5793 && TREE_CODE (type1) != METHOD_TYPE)
5794 return 1;
5795
5796 ccvt1 = ix86_get_callcvt (type1);
5797 ccvt2 = ix86_get_callcvt (type2);
5798 if (ccvt1 != ccvt2)
5799 return 0;
5800 if (ix86_function_regparm (type1, NULL)
5801 != ix86_function_regparm (type2, NULL))
5802 return 0;
5803
5804 return 1;
5805 }
5806 \f
5807 /* Return the regparm value for a function with the indicated TYPE and DECL.
5808 DECL may be NULL when calling function indirectly
5809 or considering a libcall. */
5810
5811 static int
5812 ix86_function_regparm (const_tree type, const_tree decl)
5813 {
5814 tree attr;
5815 int regparm;
5816 unsigned int ccvt;
5817
5818 if (TARGET_64BIT)
5819 return (ix86_function_type_abi (type) == SYSV_ABI
5820 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5821 ccvt = ix86_get_callcvt (type);
5822 regparm = ix86_regparm;
5823
5824 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5825 {
5826 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5827 if (attr)
5828 {
5829 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5830 return regparm;
5831 }
5832 }
5833 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5834 return 2;
5835 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5836 return 1;
5837
5838 /* Use register calling convention for local functions when possible. */
5839 if (decl
5840 && TREE_CODE (decl) == FUNCTION_DECL)
5841 {
5842 cgraph_node *target = cgraph_node::get (decl);
5843 if (target)
5844 target = target->function_symbol ();
5845
5846 /* Caller and callee must agree on the calling convention, so
5847 checking here just optimize means that with
5848 __attribute__((optimize (...))) caller could use regparm convention
5849 and callee not, or vice versa. Instead look at whether the callee
5850 is optimized or not. */
5851 if (target && opt_for_fn (target->decl, optimize)
5852 && !(profile_flag && !flag_fentry))
5853 {
5854 cgraph_local_info *i = &target->local;
5855 if (i && i->local && i->can_change_signature)
5856 {
5857 int local_regparm, globals = 0, regno;
5858
5859 /* Make sure no regparm register is taken by a
5860 fixed register variable. */
5861 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5862 local_regparm++)
5863 if (fixed_regs[local_regparm])
5864 break;
5865
5866 /* We don't want to use regparm(3) for nested functions as
5867 these use a static chain pointer in the third argument. */
5868 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5869 local_regparm = 2;
5870
5871 /* Save a register for the split stack. */
5872 if (local_regparm == 3 && flag_split_stack)
5873 local_regparm = 2;
5874
5875 /* Each fixed register usage increases register pressure,
5876 so less registers should be used for argument passing.
5877 This functionality can be overriden by an explicit
5878 regparm value. */
5879 for (regno = AX_REG; regno <= DI_REG; regno++)
5880 if (fixed_regs[regno])
5881 globals++;
5882
5883 local_regparm
5884 = globals < local_regparm ? local_regparm - globals : 0;
5885
5886 if (local_regparm > regparm)
5887 regparm = local_regparm;
5888 }
5889 }
5890 }
5891
5892 return regparm;
5893 }
5894
5895 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5896 DFmode (2) arguments in SSE registers for a function with the
5897 indicated TYPE and DECL. DECL may be NULL when calling function
5898 indirectly or considering a libcall. Return -1 if any FP parameter
5899 should be rejected by error. This is used in siutation we imply SSE
5900 calling convetion but the function is called from another function with
5901 SSE disabled. Otherwise return 0. */
5902
5903 static int
5904 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5905 {
5906 gcc_assert (!TARGET_64BIT);
5907
5908 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5909 by the sseregparm attribute. */
5910 if (TARGET_SSEREGPARM
5911 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5912 {
5913 if (!TARGET_SSE)
5914 {
5915 if (warn)
5916 {
5917 if (decl)
5918 error ("calling %qD with attribute sseregparm without "
5919 "SSE/SSE2 enabled", decl);
5920 else
5921 error ("calling %qT with attribute sseregparm without "
5922 "SSE/SSE2 enabled", type);
5923 }
5924 return 0;
5925 }
5926
5927 return 2;
5928 }
5929
5930 if (!decl)
5931 return 0;
5932
5933 cgraph_node *target = cgraph_node::get (decl);
5934 if (target)
5935 target = target->function_symbol ();
5936
5937 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5938 (and DFmode for SSE2) arguments in SSE registers. */
5939 if (target
5940 /* TARGET_SSE_MATH */
5941 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5942 && opt_for_fn (target->decl, optimize)
5943 && !(profile_flag && !flag_fentry))
5944 {
5945 cgraph_local_info *i = &target->local;
5946 if (i && i->local && i->can_change_signature)
5947 {
5948 /* Refuse to produce wrong code when local function with SSE enabled
5949 is called from SSE disabled function.
5950 FIXME: We need a way to detect these cases cross-ltrans partition
5951 and avoid using SSE calling conventions on local functions called
5952 from function with SSE disabled. For now at least delay the
5953 warning until we know we are going to produce wrong code.
5954 See PR66047 */
5955 if (!TARGET_SSE && warn)
5956 return -1;
5957 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5958 ->x_ix86_isa_flags) ? 2 : 1;
5959 }
5960 }
5961
5962 return 0;
5963 }
5964
5965 /* Return true if EAX is live at the start of the function. Used by
5966 ix86_expand_prologue to determine if we need special help before
5967 calling allocate_stack_worker. */
5968
5969 static bool
5970 ix86_eax_live_at_start_p (void)
5971 {
5972 /* Cheat. Don't bother working forward from ix86_function_regparm
5973 to the function type to whether an actual argument is located in
5974 eax. Instead just look at cfg info, which is still close enough
5975 to correct at this point. This gives false positives for broken
5976 functions that might use uninitialized data that happens to be
5977 allocated in eax, but who cares? */
5978 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5979 }
5980
5981 static bool
5982 ix86_keep_aggregate_return_pointer (tree fntype)
5983 {
5984 tree attr;
5985
5986 if (!TARGET_64BIT)
5987 {
5988 attr = lookup_attribute ("callee_pop_aggregate_return",
5989 TYPE_ATTRIBUTES (fntype));
5990 if (attr)
5991 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5992
5993 /* For 32-bit MS-ABI the default is to keep aggregate
5994 return pointer. */
5995 if (ix86_function_type_abi (fntype) == MS_ABI)
5996 return true;
5997 }
5998 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5999 }
6000
6001 /* Value is the number of bytes of arguments automatically
6002 popped when returning from a subroutine call.
6003 FUNDECL is the declaration node of the function (as a tree),
6004 FUNTYPE is the data type of the function (as a tree),
6005 or for a library call it is an identifier node for the subroutine name.
6006 SIZE is the number of bytes of arguments passed on the stack.
6007
6008 On the 80386, the RTD insn may be used to pop them if the number
6009 of args is fixed, but if the number is variable then the caller
6010 must pop them all. RTD can't be used for library calls now
6011 because the library is compiled with the Unix compiler.
6012 Use of RTD is a selectable option, since it is incompatible with
6013 standard Unix calling sequences. If the option is not selected,
6014 the caller must always pop the args.
6015
6016 The attribute stdcall is equivalent to RTD on a per module basis. */
6017
6018 static int
6019 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6020 {
6021 unsigned int ccvt;
6022
6023 /* None of the 64-bit ABIs pop arguments. */
6024 if (TARGET_64BIT)
6025 return 0;
6026
6027 ccvt = ix86_get_callcvt (funtype);
6028
6029 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6030 | IX86_CALLCVT_THISCALL)) != 0
6031 && ! stdarg_p (funtype))
6032 return size;
6033
6034 /* Lose any fake structure return argument if it is passed on the stack. */
6035 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6036 && !ix86_keep_aggregate_return_pointer (funtype))
6037 {
6038 int nregs = ix86_function_regparm (funtype, fundecl);
6039 if (nregs == 0)
6040 return GET_MODE_SIZE (Pmode);
6041 }
6042
6043 return 0;
6044 }
6045
6046 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6047
6048 static bool
6049 ix86_legitimate_combined_insn (rtx_insn *insn)
6050 {
6051 /* Check operand constraints in case hard registers were propagated
6052 into insn pattern. This check prevents combine pass from
6053 generating insn patterns with invalid hard register operands.
6054 These invalid insns can eventually confuse reload to error out
6055 with a spill failure. See also PRs 46829 and 46843. */
6056 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6057 {
6058 int i;
6059
6060 extract_insn (insn);
6061 preprocess_constraints (insn);
6062
6063 int n_operands = recog_data.n_operands;
6064 int n_alternatives = recog_data.n_alternatives;
6065 for (i = 0; i < n_operands; i++)
6066 {
6067 rtx op = recog_data.operand[i];
6068 machine_mode mode = GET_MODE (op);
6069 const operand_alternative *op_alt;
6070 int offset = 0;
6071 bool win;
6072 int j;
6073
6074 /* For pre-AVX disallow unaligned loads/stores where the
6075 instructions don't support it. */
6076 if (!TARGET_AVX
6077 && VECTOR_MODE_P (GET_MODE (op))
6078 && misaligned_operand (op, GET_MODE (op)))
6079 {
6080 int min_align = get_attr_ssememalign (insn);
6081 if (min_align == 0)
6082 return false;
6083 }
6084
6085 /* A unary operator may be accepted by the predicate, but it
6086 is irrelevant for matching constraints. */
6087 if (UNARY_P (op))
6088 op = XEXP (op, 0);
6089
6090 if (GET_CODE (op) == SUBREG)
6091 {
6092 if (REG_P (SUBREG_REG (op))
6093 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6094 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6095 GET_MODE (SUBREG_REG (op)),
6096 SUBREG_BYTE (op),
6097 GET_MODE (op));
6098 op = SUBREG_REG (op);
6099 }
6100
6101 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6102 continue;
6103
6104 op_alt = recog_op_alt;
6105
6106 /* Operand has no constraints, anything is OK. */
6107 win = !n_alternatives;
6108
6109 alternative_mask preferred = get_preferred_alternatives (insn);
6110 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6111 {
6112 if (!TEST_BIT (preferred, j))
6113 continue;
6114 if (op_alt[i].anything_ok
6115 || (op_alt[i].matches != -1
6116 && operands_match_p
6117 (recog_data.operand[i],
6118 recog_data.operand[op_alt[i].matches]))
6119 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6120 {
6121 win = true;
6122 break;
6123 }
6124 }
6125
6126 if (!win)
6127 return false;
6128 }
6129 }
6130
6131 return true;
6132 }
6133 \f
6134 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6135
6136 static unsigned HOST_WIDE_INT
6137 ix86_asan_shadow_offset (void)
6138 {
6139 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6140 : HOST_WIDE_INT_C (0x7fff8000))
6141 : (HOST_WIDE_INT_1 << 29);
6142 }
6143 \f
6144 /* Argument support functions. */
6145
6146 /* Return true when register may be used to pass function parameters. */
6147 bool
6148 ix86_function_arg_regno_p (int regno)
6149 {
6150 int i;
6151 const int *parm_regs;
6152
6153 if (TARGET_MPX && BND_REGNO_P (regno))
6154 return true;
6155
6156 if (!TARGET_64BIT)
6157 {
6158 if (TARGET_MACHO)
6159 return (regno < REGPARM_MAX
6160 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6161 else
6162 return (regno < REGPARM_MAX
6163 || (TARGET_MMX && MMX_REGNO_P (regno)
6164 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6165 || (TARGET_SSE && SSE_REGNO_P (regno)
6166 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6167 }
6168
6169 if (TARGET_SSE && SSE_REGNO_P (regno)
6170 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6171 return true;
6172
6173 /* TODO: The function should depend on current function ABI but
6174 builtins.c would need updating then. Therefore we use the
6175 default ABI. */
6176
6177 /* RAX is used as hidden argument to va_arg functions. */
6178 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6179 return true;
6180
6181 if (ix86_abi == MS_ABI)
6182 parm_regs = x86_64_ms_abi_int_parameter_registers;
6183 else
6184 parm_regs = x86_64_int_parameter_registers;
6185 for (i = 0; i < (ix86_abi == MS_ABI
6186 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6187 if (regno == parm_regs[i])
6188 return true;
6189 return false;
6190 }
6191
6192 /* Return if we do not know how to pass TYPE solely in registers. */
6193
6194 static bool
6195 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6196 {
6197 if (must_pass_in_stack_var_size_or_pad (mode, type))
6198 return true;
6199
6200 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6201 The layout_type routine is crafty and tries to trick us into passing
6202 currently unsupported vector types on the stack by using TImode. */
6203 return (!TARGET_64BIT && mode == TImode
6204 && type && TREE_CODE (type) != VECTOR_TYPE);
6205 }
6206
6207 /* It returns the size, in bytes, of the area reserved for arguments passed
6208 in registers for the function represented by fndecl dependent to the used
6209 abi format. */
6210 int
6211 ix86_reg_parm_stack_space (const_tree fndecl)
6212 {
6213 enum calling_abi call_abi = SYSV_ABI;
6214 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6215 call_abi = ix86_function_abi (fndecl);
6216 else
6217 call_abi = ix86_function_type_abi (fndecl);
6218 if (TARGET_64BIT && call_abi == MS_ABI)
6219 return 32;
6220 return 0;
6221 }
6222
6223 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6224 call abi used. */
6225 enum calling_abi
6226 ix86_function_type_abi (const_tree fntype)
6227 {
6228 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6229 {
6230 enum calling_abi abi = ix86_abi;
6231 if (abi == SYSV_ABI)
6232 {
6233 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6234 {
6235 if (TARGET_X32)
6236 {
6237 static bool warned = false;
6238 if (!warned)
6239 {
6240 error ("X32 does not support ms_abi attribute");
6241 warned = true;
6242 }
6243 }
6244 abi = MS_ABI;
6245 }
6246 }
6247 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6248 abi = SYSV_ABI;
6249 return abi;
6250 }
6251 return ix86_abi;
6252 }
6253
6254 /* We add this as a workaround in order to use libc_has_function
6255 hook in i386.md. */
6256 bool
6257 ix86_libc_has_function (enum function_class fn_class)
6258 {
6259 return targetm.libc_has_function (fn_class);
6260 }
6261
6262 static bool
6263 ix86_function_ms_hook_prologue (const_tree fn)
6264 {
6265 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6266 {
6267 if (decl_function_context (fn) != NULL_TREE)
6268 error_at (DECL_SOURCE_LOCATION (fn),
6269 "ms_hook_prologue is not compatible with nested function");
6270 else
6271 return true;
6272 }
6273 return false;
6274 }
6275
6276 static enum calling_abi
6277 ix86_function_abi (const_tree fndecl)
6278 {
6279 if (! fndecl)
6280 return ix86_abi;
6281 return ix86_function_type_abi (TREE_TYPE (fndecl));
6282 }
6283
6284 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6285 call abi used. */
6286 enum calling_abi
6287 ix86_cfun_abi (void)
6288 {
6289 if (! cfun)
6290 return ix86_abi;
6291 return cfun->machine->call_abi;
6292 }
6293
6294 /* Write the extra assembler code needed to declare a function properly. */
6295
6296 void
6297 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6298 tree decl)
6299 {
6300 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6301
6302 if (is_ms_hook)
6303 {
6304 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6305 unsigned int filler_cc = 0xcccccccc;
6306
6307 for (i = 0; i < filler_count; i += 4)
6308 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6309 }
6310
6311 #ifdef SUBTARGET_ASM_UNWIND_INIT
6312 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6313 #endif
6314
6315 ASM_OUTPUT_LABEL (asm_out_file, fname);
6316
6317 /* Output magic byte marker, if hot-patch attribute is set. */
6318 if (is_ms_hook)
6319 {
6320 if (TARGET_64BIT)
6321 {
6322 /* leaq [%rsp + 0], %rsp */
6323 asm_fprintf (asm_out_file, ASM_BYTE
6324 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6325 }
6326 else
6327 {
6328 /* movl.s %edi, %edi
6329 push %ebp
6330 movl.s %esp, %ebp */
6331 asm_fprintf (asm_out_file, ASM_BYTE
6332 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6333 }
6334 }
6335 }
6336
6337 /* regclass.c */
6338 extern void init_regs (void);
6339
6340 /* Implementation of call abi switching target hook. Specific to FNDECL
6341 the specific call register sets are set. See also
6342 ix86_conditional_register_usage for more details. */
6343 void
6344 ix86_call_abi_override (const_tree fndecl)
6345 {
6346 if (fndecl == NULL_TREE)
6347 cfun->machine->call_abi = ix86_abi;
6348 else
6349 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6350 }
6351
6352 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6353 expensive re-initialization of init_regs each time we switch function context
6354 since this is needed only during RTL expansion. */
6355 static void
6356 ix86_maybe_switch_abi (void)
6357 {
6358 if (TARGET_64BIT &&
6359 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6360 reinit_regs ();
6361 }
6362
6363 /* Return 1 if pseudo register should be created and used to hold
6364 GOT address for PIC code. */
6365 bool
6366 ix86_use_pseudo_pic_reg (void)
6367 {
6368 if ((TARGET_64BIT
6369 && (ix86_cmodel == CM_SMALL_PIC
6370 || TARGET_PECOFF))
6371 || !flag_pic)
6372 return false;
6373 return true;
6374 }
6375
6376 /* Initialize large model PIC register. */
6377
6378 static void
6379 ix86_init_large_pic_reg (unsigned int tmp_regno)
6380 {
6381 rtx_code_label *label;
6382 rtx tmp_reg;
6383
6384 gcc_assert (Pmode == DImode);
6385 label = gen_label_rtx ();
6386 emit_label (label);
6387 LABEL_PRESERVE_P (label) = 1;
6388 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6389 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6390 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6391 label));
6392 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6393 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6394 pic_offset_table_rtx, tmp_reg));
6395 }
6396
6397 /* Create and initialize PIC register if required. */
6398 static void
6399 ix86_init_pic_reg (void)
6400 {
6401 edge entry_edge;
6402 rtx_insn *seq;
6403
6404 if (!ix86_use_pseudo_pic_reg ())
6405 return;
6406
6407 start_sequence ();
6408
6409 if (TARGET_64BIT)
6410 {
6411 if (ix86_cmodel == CM_LARGE_PIC)
6412 ix86_init_large_pic_reg (R11_REG);
6413 else
6414 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6415 }
6416 else
6417 {
6418 /* If there is future mcount call in the function it is more profitable
6419 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6420 rtx reg = crtl->profile
6421 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6422 : pic_offset_table_rtx;
6423 rtx_insn *insn = emit_insn (gen_set_got (reg));
6424 RTX_FRAME_RELATED_P (insn) = 1;
6425 if (crtl->profile)
6426 emit_move_insn (pic_offset_table_rtx, reg);
6427 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6428 }
6429
6430 seq = get_insns ();
6431 end_sequence ();
6432
6433 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6434 insert_insn_on_edge (seq, entry_edge);
6435 commit_one_edge_insertion (entry_edge);
6436 }
6437
6438 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6439 for a call to a function whose data type is FNTYPE.
6440 For a library call, FNTYPE is 0. */
6441
6442 void
6443 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6444 tree fntype, /* tree ptr for function decl */
6445 rtx libname, /* SYMBOL_REF of library name or 0 */
6446 tree fndecl,
6447 int caller)
6448 {
6449 struct cgraph_local_info *i = NULL;
6450 struct cgraph_node *target = NULL;
6451
6452 memset (cum, 0, sizeof (*cum));
6453
6454 if (fndecl)
6455 {
6456 target = cgraph_node::get (fndecl);
6457 if (target)
6458 {
6459 target = target->function_symbol ();
6460 i = cgraph_node::local_info (target->decl);
6461 cum->call_abi = ix86_function_abi (target->decl);
6462 }
6463 else
6464 cum->call_abi = ix86_function_abi (fndecl);
6465 }
6466 else
6467 cum->call_abi = ix86_function_type_abi (fntype);
6468
6469 cum->caller = caller;
6470
6471 /* Set up the number of registers to use for passing arguments. */
6472 cum->nregs = ix86_regparm;
6473 if (TARGET_64BIT)
6474 {
6475 cum->nregs = (cum->call_abi == SYSV_ABI
6476 ? X86_64_REGPARM_MAX
6477 : X86_64_MS_REGPARM_MAX);
6478 }
6479 if (TARGET_SSE)
6480 {
6481 cum->sse_nregs = SSE_REGPARM_MAX;
6482 if (TARGET_64BIT)
6483 {
6484 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6485 ? X86_64_SSE_REGPARM_MAX
6486 : X86_64_MS_SSE_REGPARM_MAX);
6487 }
6488 }
6489 if (TARGET_MMX)
6490 cum->mmx_nregs = MMX_REGPARM_MAX;
6491 cum->warn_avx512f = true;
6492 cum->warn_avx = true;
6493 cum->warn_sse = true;
6494 cum->warn_mmx = true;
6495
6496 /* Because type might mismatch in between caller and callee, we need to
6497 use actual type of function for local calls.
6498 FIXME: cgraph_analyze can be told to actually record if function uses
6499 va_start so for local functions maybe_vaarg can be made aggressive
6500 helping K&R code.
6501 FIXME: once typesytem is fixed, we won't need this code anymore. */
6502 if (i && i->local && i->can_change_signature)
6503 fntype = TREE_TYPE (target->decl);
6504 cum->stdarg = stdarg_p (fntype);
6505 cum->maybe_vaarg = (fntype
6506 ? (!prototype_p (fntype) || stdarg_p (fntype))
6507 : !libname);
6508
6509 cum->bnd_regno = FIRST_BND_REG;
6510 cum->bnds_in_bt = 0;
6511 cum->force_bnd_pass = 0;
6512 cum->decl = fndecl;
6513
6514 if (!TARGET_64BIT)
6515 {
6516 /* If there are variable arguments, then we won't pass anything
6517 in registers in 32-bit mode. */
6518 if (stdarg_p (fntype))
6519 {
6520 cum->nregs = 0;
6521 cum->sse_nregs = 0;
6522 cum->mmx_nregs = 0;
6523 cum->warn_avx512f = false;
6524 cum->warn_avx = false;
6525 cum->warn_sse = false;
6526 cum->warn_mmx = false;
6527 return;
6528 }
6529
6530 /* Use ecx and edx registers if function has fastcall attribute,
6531 else look for regparm information. */
6532 if (fntype)
6533 {
6534 unsigned int ccvt = ix86_get_callcvt (fntype);
6535 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6536 {
6537 cum->nregs = 1;
6538 cum->fastcall = 1; /* Same first register as in fastcall. */
6539 }
6540 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6541 {
6542 cum->nregs = 2;
6543 cum->fastcall = 1;
6544 }
6545 else
6546 cum->nregs = ix86_function_regparm (fntype, fndecl);
6547 }
6548
6549 /* Set up the number of SSE registers used for passing SFmode
6550 and DFmode arguments. Warn for mismatching ABI. */
6551 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6552 }
6553 }
6554
6555 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6556 But in the case of vector types, it is some vector mode.
6557
6558 When we have only some of our vector isa extensions enabled, then there
6559 are some modes for which vector_mode_supported_p is false. For these
6560 modes, the generic vector support in gcc will choose some non-vector mode
6561 in order to implement the type. By computing the natural mode, we'll
6562 select the proper ABI location for the operand and not depend on whatever
6563 the middle-end decides to do with these vector types.
6564
6565 The midde-end can't deal with the vector types > 16 bytes. In this
6566 case, we return the original mode and warn ABI change if CUM isn't
6567 NULL.
6568
6569 If INT_RETURN is true, warn ABI change if the vector mode isn't
6570 available for function return value. */
6571
6572 static machine_mode
6573 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6574 bool in_return)
6575 {
6576 machine_mode mode = TYPE_MODE (type);
6577
6578 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6579 {
6580 HOST_WIDE_INT size = int_size_in_bytes (type);
6581 if ((size == 8 || size == 16 || size == 32 || size == 64)
6582 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6583 && TYPE_VECTOR_SUBPARTS (type) > 1)
6584 {
6585 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6586
6587 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6588 mode = MIN_MODE_VECTOR_FLOAT;
6589 else
6590 mode = MIN_MODE_VECTOR_INT;
6591
6592 /* Get the mode which has this inner mode and number of units. */
6593 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6594 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6595 && GET_MODE_INNER (mode) == innermode)
6596 {
6597 if (size == 64 && !TARGET_AVX512F)
6598 {
6599 static bool warnedavx512f;
6600 static bool warnedavx512f_ret;
6601
6602 if (cum && cum->warn_avx512f && !warnedavx512f)
6603 {
6604 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6605 "without AVX512F enabled changes the ABI"))
6606 warnedavx512f = true;
6607 }
6608 else if (in_return && !warnedavx512f_ret)
6609 {
6610 if (warning (OPT_Wpsabi, "AVX512F vector return "
6611 "without AVX512F enabled changes the ABI"))
6612 warnedavx512f_ret = true;
6613 }
6614
6615 return TYPE_MODE (type);
6616 }
6617 else if (size == 32 && !TARGET_AVX)
6618 {
6619 static bool warnedavx;
6620 static bool warnedavx_ret;
6621
6622 if (cum && cum->warn_avx && !warnedavx)
6623 {
6624 if (warning (OPT_Wpsabi, "AVX vector argument "
6625 "without AVX enabled changes the ABI"))
6626 warnedavx = true;
6627 }
6628 else if (in_return && !warnedavx_ret)
6629 {
6630 if (warning (OPT_Wpsabi, "AVX vector return "
6631 "without AVX enabled changes the ABI"))
6632 warnedavx_ret = true;
6633 }
6634
6635 return TYPE_MODE (type);
6636 }
6637 else if (((size == 8 && TARGET_64BIT) || size == 16)
6638 && !TARGET_SSE)
6639 {
6640 static bool warnedsse;
6641 static bool warnedsse_ret;
6642
6643 if (cum && cum->warn_sse && !warnedsse)
6644 {
6645 if (warning (OPT_Wpsabi, "SSE vector argument "
6646 "without SSE enabled changes the ABI"))
6647 warnedsse = true;
6648 }
6649 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6650 {
6651 if (warning (OPT_Wpsabi, "SSE vector return "
6652 "without SSE enabled changes the ABI"))
6653 warnedsse_ret = true;
6654 }
6655 }
6656 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6657 {
6658 static bool warnedmmx;
6659 static bool warnedmmx_ret;
6660
6661 if (cum && cum->warn_mmx && !warnedmmx)
6662 {
6663 if (warning (OPT_Wpsabi, "MMX vector argument "
6664 "without MMX enabled changes the ABI"))
6665 warnedmmx = true;
6666 }
6667 else if (in_return && !warnedmmx_ret)
6668 {
6669 if (warning (OPT_Wpsabi, "MMX vector return "
6670 "without MMX enabled changes the ABI"))
6671 warnedmmx_ret = true;
6672 }
6673 }
6674 return mode;
6675 }
6676
6677 gcc_unreachable ();
6678 }
6679 }
6680
6681 return mode;
6682 }
6683
6684 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6685 this may not agree with the mode that the type system has chosen for the
6686 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6687 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6688
6689 static rtx
6690 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6691 unsigned int regno)
6692 {
6693 rtx tmp;
6694
6695 if (orig_mode != BLKmode)
6696 tmp = gen_rtx_REG (orig_mode, regno);
6697 else
6698 {
6699 tmp = gen_rtx_REG (mode, regno);
6700 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6701 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6702 }
6703
6704 return tmp;
6705 }
6706
6707 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6708 of this code is to classify each 8bytes of incoming argument by the register
6709 class and assign registers accordingly. */
6710
6711 /* Return the union class of CLASS1 and CLASS2.
6712 See the x86-64 PS ABI for details. */
6713
6714 static enum x86_64_reg_class
6715 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6716 {
6717 /* Rule #1: If both classes are equal, this is the resulting class. */
6718 if (class1 == class2)
6719 return class1;
6720
6721 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6722 the other class. */
6723 if (class1 == X86_64_NO_CLASS)
6724 return class2;
6725 if (class2 == X86_64_NO_CLASS)
6726 return class1;
6727
6728 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6729 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6730 return X86_64_MEMORY_CLASS;
6731
6732 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6733 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6734 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6735 return X86_64_INTEGERSI_CLASS;
6736 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6737 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6738 return X86_64_INTEGER_CLASS;
6739
6740 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6741 MEMORY is used. */
6742 if (class1 == X86_64_X87_CLASS
6743 || class1 == X86_64_X87UP_CLASS
6744 || class1 == X86_64_COMPLEX_X87_CLASS
6745 || class2 == X86_64_X87_CLASS
6746 || class2 == X86_64_X87UP_CLASS
6747 || class2 == X86_64_COMPLEX_X87_CLASS)
6748 return X86_64_MEMORY_CLASS;
6749
6750 /* Rule #6: Otherwise class SSE is used. */
6751 return X86_64_SSE_CLASS;
6752 }
6753
6754 /* Classify the argument of type TYPE and mode MODE.
6755 CLASSES will be filled by the register class used to pass each word
6756 of the operand. The number of words is returned. In case the parameter
6757 should be passed in memory, 0 is returned. As a special case for zero
6758 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6759
6760 BIT_OFFSET is used internally for handling records and specifies offset
6761 of the offset in bits modulo 512 to avoid overflow cases.
6762
6763 See the x86-64 PS ABI for details.
6764 */
6765
6766 static int
6767 classify_argument (machine_mode mode, const_tree type,
6768 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6769 {
6770 HOST_WIDE_INT bytes =
6771 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6772 int words
6773 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6774
6775 /* Variable sized entities are always passed/returned in memory. */
6776 if (bytes < 0)
6777 return 0;
6778
6779 if (mode != VOIDmode
6780 && targetm.calls.must_pass_in_stack (mode, type))
6781 return 0;
6782
6783 if (type && AGGREGATE_TYPE_P (type))
6784 {
6785 int i;
6786 tree field;
6787 enum x86_64_reg_class subclasses[MAX_CLASSES];
6788
6789 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6790 if (bytes > 64)
6791 return 0;
6792
6793 for (i = 0; i < words; i++)
6794 classes[i] = X86_64_NO_CLASS;
6795
6796 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6797 signalize memory class, so handle it as special case. */
6798 if (!words)
6799 {
6800 classes[0] = X86_64_NO_CLASS;
6801 return 1;
6802 }
6803
6804 /* Classify each field of record and merge classes. */
6805 switch (TREE_CODE (type))
6806 {
6807 case RECORD_TYPE:
6808 /* And now merge the fields of structure. */
6809 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6810 {
6811 if (TREE_CODE (field) == FIELD_DECL)
6812 {
6813 int num;
6814
6815 if (TREE_TYPE (field) == error_mark_node)
6816 continue;
6817
6818 /* Bitfields are always classified as integer. Handle them
6819 early, since later code would consider them to be
6820 misaligned integers. */
6821 if (DECL_BIT_FIELD (field))
6822 {
6823 for (i = (int_bit_position (field)
6824 + (bit_offset % 64)) / 8 / 8;
6825 i < ((int_bit_position (field) + (bit_offset % 64))
6826 + tree_to_shwi (DECL_SIZE (field))
6827 + 63) / 8 / 8; i++)
6828 classes[i] =
6829 merge_classes (X86_64_INTEGER_CLASS,
6830 classes[i]);
6831 }
6832 else
6833 {
6834 int pos;
6835
6836 type = TREE_TYPE (field);
6837
6838 /* Flexible array member is ignored. */
6839 if (TYPE_MODE (type) == BLKmode
6840 && TREE_CODE (type) == ARRAY_TYPE
6841 && TYPE_SIZE (type) == NULL_TREE
6842 && TYPE_DOMAIN (type) != NULL_TREE
6843 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6844 == NULL_TREE))
6845 {
6846 static bool warned;
6847
6848 if (!warned && warn_psabi)
6849 {
6850 warned = true;
6851 inform (input_location,
6852 "the ABI of passing struct with"
6853 " a flexible array member has"
6854 " changed in GCC 4.4");
6855 }
6856 continue;
6857 }
6858 num = classify_argument (TYPE_MODE (type), type,
6859 subclasses,
6860 (int_bit_position (field)
6861 + bit_offset) % 512);
6862 if (!num)
6863 return 0;
6864 pos = (int_bit_position (field)
6865 + (bit_offset % 64)) / 8 / 8;
6866 for (i = 0; i < num && (i + pos) < words; i++)
6867 classes[i + pos] =
6868 merge_classes (subclasses[i], classes[i + pos]);
6869 }
6870 }
6871 }
6872 break;
6873
6874 case ARRAY_TYPE:
6875 /* Arrays are handled as small records. */
6876 {
6877 int num;
6878 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6879 TREE_TYPE (type), subclasses, bit_offset);
6880 if (!num)
6881 return 0;
6882
6883 /* The partial classes are now full classes. */
6884 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6885 subclasses[0] = X86_64_SSE_CLASS;
6886 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6887 && !((bit_offset % 64) == 0 && bytes == 4))
6888 subclasses[0] = X86_64_INTEGER_CLASS;
6889
6890 for (i = 0; i < words; i++)
6891 classes[i] = subclasses[i % num];
6892
6893 break;
6894 }
6895 case UNION_TYPE:
6896 case QUAL_UNION_TYPE:
6897 /* Unions are similar to RECORD_TYPE but offset is always 0.
6898 */
6899 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6900 {
6901 if (TREE_CODE (field) == FIELD_DECL)
6902 {
6903 int num;
6904
6905 if (TREE_TYPE (field) == error_mark_node)
6906 continue;
6907
6908 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6909 TREE_TYPE (field), subclasses,
6910 bit_offset);
6911 if (!num)
6912 return 0;
6913 for (i = 0; i < num && i < words; i++)
6914 classes[i] = merge_classes (subclasses[i], classes[i]);
6915 }
6916 }
6917 break;
6918
6919 default:
6920 gcc_unreachable ();
6921 }
6922
6923 if (words > 2)
6924 {
6925 /* When size > 16 bytes, if the first one isn't
6926 X86_64_SSE_CLASS or any other ones aren't
6927 X86_64_SSEUP_CLASS, everything should be passed in
6928 memory. */
6929 if (classes[0] != X86_64_SSE_CLASS)
6930 return 0;
6931
6932 for (i = 1; i < words; i++)
6933 if (classes[i] != X86_64_SSEUP_CLASS)
6934 return 0;
6935 }
6936
6937 /* Final merger cleanup. */
6938 for (i = 0; i < words; i++)
6939 {
6940 /* If one class is MEMORY, everything should be passed in
6941 memory. */
6942 if (classes[i] == X86_64_MEMORY_CLASS)
6943 return 0;
6944
6945 /* The X86_64_SSEUP_CLASS should be always preceded by
6946 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6947 if (classes[i] == X86_64_SSEUP_CLASS
6948 && classes[i - 1] != X86_64_SSE_CLASS
6949 && classes[i - 1] != X86_64_SSEUP_CLASS)
6950 {
6951 /* The first one should never be X86_64_SSEUP_CLASS. */
6952 gcc_assert (i != 0);
6953 classes[i] = X86_64_SSE_CLASS;
6954 }
6955
6956 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6957 everything should be passed in memory. */
6958 if (classes[i] == X86_64_X87UP_CLASS
6959 && (classes[i - 1] != X86_64_X87_CLASS))
6960 {
6961 static bool warned;
6962
6963 /* The first one should never be X86_64_X87UP_CLASS. */
6964 gcc_assert (i != 0);
6965 if (!warned && warn_psabi)
6966 {
6967 warned = true;
6968 inform (input_location,
6969 "the ABI of passing union with long double"
6970 " has changed in GCC 4.4");
6971 }
6972 return 0;
6973 }
6974 }
6975 return words;
6976 }
6977
6978 /* Compute alignment needed. We align all types to natural boundaries with
6979 exception of XFmode that is aligned to 64bits. */
6980 if (mode != VOIDmode && mode != BLKmode)
6981 {
6982 int mode_alignment = GET_MODE_BITSIZE (mode);
6983
6984 if (mode == XFmode)
6985 mode_alignment = 128;
6986 else if (mode == XCmode)
6987 mode_alignment = 256;
6988 if (COMPLEX_MODE_P (mode))
6989 mode_alignment /= 2;
6990 /* Misaligned fields are always returned in memory. */
6991 if (bit_offset % mode_alignment)
6992 return 0;
6993 }
6994
6995 /* for V1xx modes, just use the base mode */
6996 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6997 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6998 mode = GET_MODE_INNER (mode);
6999
7000 /* Classification of atomic types. */
7001 switch (mode)
7002 {
7003 case SDmode:
7004 case DDmode:
7005 classes[0] = X86_64_SSE_CLASS;
7006 return 1;
7007 case TDmode:
7008 classes[0] = X86_64_SSE_CLASS;
7009 classes[1] = X86_64_SSEUP_CLASS;
7010 return 2;
7011 case DImode:
7012 case SImode:
7013 case HImode:
7014 case QImode:
7015 case CSImode:
7016 case CHImode:
7017 case CQImode:
7018 {
7019 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7020
7021 /* Analyze last 128 bits only. */
7022 size = (size - 1) & 0x7f;
7023
7024 if (size < 32)
7025 {
7026 classes[0] = X86_64_INTEGERSI_CLASS;
7027 return 1;
7028 }
7029 else if (size < 64)
7030 {
7031 classes[0] = X86_64_INTEGER_CLASS;
7032 return 1;
7033 }
7034 else if (size < 64+32)
7035 {
7036 classes[0] = X86_64_INTEGER_CLASS;
7037 classes[1] = X86_64_INTEGERSI_CLASS;
7038 return 2;
7039 }
7040 else if (size < 64+64)
7041 {
7042 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7043 return 2;
7044 }
7045 else
7046 gcc_unreachable ();
7047 }
7048 case CDImode:
7049 case TImode:
7050 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7051 return 2;
7052 case COImode:
7053 case OImode:
7054 /* OImode shouldn't be used directly. */
7055 gcc_unreachable ();
7056 case CTImode:
7057 return 0;
7058 case SFmode:
7059 if (!(bit_offset % 64))
7060 classes[0] = X86_64_SSESF_CLASS;
7061 else
7062 classes[0] = X86_64_SSE_CLASS;
7063 return 1;
7064 case DFmode:
7065 classes[0] = X86_64_SSEDF_CLASS;
7066 return 1;
7067 case XFmode:
7068 classes[0] = X86_64_X87_CLASS;
7069 classes[1] = X86_64_X87UP_CLASS;
7070 return 2;
7071 case TFmode:
7072 classes[0] = X86_64_SSE_CLASS;
7073 classes[1] = X86_64_SSEUP_CLASS;
7074 return 2;
7075 case SCmode:
7076 classes[0] = X86_64_SSE_CLASS;
7077 if (!(bit_offset % 64))
7078 return 1;
7079 else
7080 {
7081 static bool warned;
7082
7083 if (!warned && warn_psabi)
7084 {
7085 warned = true;
7086 inform (input_location,
7087 "the ABI of passing structure with complex float"
7088 " member has changed in GCC 4.4");
7089 }
7090 classes[1] = X86_64_SSESF_CLASS;
7091 return 2;
7092 }
7093 case DCmode:
7094 classes[0] = X86_64_SSEDF_CLASS;
7095 classes[1] = X86_64_SSEDF_CLASS;
7096 return 2;
7097 case XCmode:
7098 classes[0] = X86_64_COMPLEX_X87_CLASS;
7099 return 1;
7100 case TCmode:
7101 /* This modes is larger than 16 bytes. */
7102 return 0;
7103 case V8SFmode:
7104 case V8SImode:
7105 case V32QImode:
7106 case V16HImode:
7107 case V4DFmode:
7108 case V4DImode:
7109 classes[0] = X86_64_SSE_CLASS;
7110 classes[1] = X86_64_SSEUP_CLASS;
7111 classes[2] = X86_64_SSEUP_CLASS;
7112 classes[3] = X86_64_SSEUP_CLASS;
7113 return 4;
7114 case V8DFmode:
7115 case V16SFmode:
7116 case V8DImode:
7117 case V16SImode:
7118 case V32HImode:
7119 case V64QImode:
7120 classes[0] = X86_64_SSE_CLASS;
7121 classes[1] = X86_64_SSEUP_CLASS;
7122 classes[2] = X86_64_SSEUP_CLASS;
7123 classes[3] = X86_64_SSEUP_CLASS;
7124 classes[4] = X86_64_SSEUP_CLASS;
7125 classes[5] = X86_64_SSEUP_CLASS;
7126 classes[6] = X86_64_SSEUP_CLASS;
7127 classes[7] = X86_64_SSEUP_CLASS;
7128 return 8;
7129 case V4SFmode:
7130 case V4SImode:
7131 case V16QImode:
7132 case V8HImode:
7133 case V2DFmode:
7134 case V2DImode:
7135 classes[0] = X86_64_SSE_CLASS;
7136 classes[1] = X86_64_SSEUP_CLASS;
7137 return 2;
7138 case V1TImode:
7139 case V1DImode:
7140 case V2SFmode:
7141 case V2SImode:
7142 case V4HImode:
7143 case V8QImode:
7144 classes[0] = X86_64_SSE_CLASS;
7145 return 1;
7146 case BLKmode:
7147 case VOIDmode:
7148 return 0;
7149 default:
7150 gcc_assert (VECTOR_MODE_P (mode));
7151
7152 if (bytes > 16)
7153 return 0;
7154
7155 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7156
7157 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7158 classes[0] = X86_64_INTEGERSI_CLASS;
7159 else
7160 classes[0] = X86_64_INTEGER_CLASS;
7161 classes[1] = X86_64_INTEGER_CLASS;
7162 return 1 + (bytes > 8);
7163 }
7164 }
7165
7166 /* Examine the argument and return set number of register required in each
7167 class. Return true iff parameter should be passed in memory. */
7168
7169 static bool
7170 examine_argument (machine_mode mode, const_tree type, int in_return,
7171 int *int_nregs, int *sse_nregs)
7172 {
7173 enum x86_64_reg_class regclass[MAX_CLASSES];
7174 int n = classify_argument (mode, type, regclass, 0);
7175
7176 *int_nregs = 0;
7177 *sse_nregs = 0;
7178
7179 if (!n)
7180 return true;
7181 for (n--; n >= 0; n--)
7182 switch (regclass[n])
7183 {
7184 case X86_64_INTEGER_CLASS:
7185 case X86_64_INTEGERSI_CLASS:
7186 (*int_nregs)++;
7187 break;
7188 case X86_64_SSE_CLASS:
7189 case X86_64_SSESF_CLASS:
7190 case X86_64_SSEDF_CLASS:
7191 (*sse_nregs)++;
7192 break;
7193 case X86_64_NO_CLASS:
7194 case X86_64_SSEUP_CLASS:
7195 break;
7196 case X86_64_X87_CLASS:
7197 case X86_64_X87UP_CLASS:
7198 case X86_64_COMPLEX_X87_CLASS:
7199 if (!in_return)
7200 return true;
7201 break;
7202 case X86_64_MEMORY_CLASS:
7203 gcc_unreachable ();
7204 }
7205
7206 return false;
7207 }
7208
7209 /* Construct container for the argument used by GCC interface. See
7210 FUNCTION_ARG for the detailed description. */
7211
7212 static rtx
7213 construct_container (machine_mode mode, machine_mode orig_mode,
7214 const_tree type, int in_return, int nintregs, int nsseregs,
7215 const int *intreg, int sse_regno)
7216 {
7217 /* The following variables hold the static issued_error state. */
7218 static bool issued_sse_arg_error;
7219 static bool issued_sse_ret_error;
7220 static bool issued_x87_ret_error;
7221
7222 machine_mode tmpmode;
7223 int bytes =
7224 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7225 enum x86_64_reg_class regclass[MAX_CLASSES];
7226 int n;
7227 int i;
7228 int nexps = 0;
7229 int needed_sseregs, needed_intregs;
7230 rtx exp[MAX_CLASSES];
7231 rtx ret;
7232
7233 n = classify_argument (mode, type, regclass, 0);
7234 if (!n)
7235 return NULL;
7236 if (examine_argument (mode, type, in_return, &needed_intregs,
7237 &needed_sseregs))
7238 return NULL;
7239 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7240 return NULL;
7241
7242 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7243 some less clueful developer tries to use floating-point anyway. */
7244 if (needed_sseregs && !TARGET_SSE)
7245 {
7246 if (in_return)
7247 {
7248 if (!issued_sse_ret_error)
7249 {
7250 error ("SSE register return with SSE disabled");
7251 issued_sse_ret_error = true;
7252 }
7253 }
7254 else if (!issued_sse_arg_error)
7255 {
7256 error ("SSE register argument with SSE disabled");
7257 issued_sse_arg_error = true;
7258 }
7259 return NULL;
7260 }
7261
7262 /* Likewise, error if the ABI requires us to return values in the
7263 x87 registers and the user specified -mno-80387. */
7264 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7265 for (i = 0; i < n; i++)
7266 if (regclass[i] == X86_64_X87_CLASS
7267 || regclass[i] == X86_64_X87UP_CLASS
7268 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7269 {
7270 if (!issued_x87_ret_error)
7271 {
7272 error ("x87 register return with x87 disabled");
7273 issued_x87_ret_error = true;
7274 }
7275 return NULL;
7276 }
7277
7278 /* First construct simple cases. Avoid SCmode, since we want to use
7279 single register to pass this type. */
7280 if (n == 1 && mode != SCmode)
7281 switch (regclass[0])
7282 {
7283 case X86_64_INTEGER_CLASS:
7284 case X86_64_INTEGERSI_CLASS:
7285 return gen_rtx_REG (mode, intreg[0]);
7286 case X86_64_SSE_CLASS:
7287 case X86_64_SSESF_CLASS:
7288 case X86_64_SSEDF_CLASS:
7289 if (mode != BLKmode)
7290 return gen_reg_or_parallel (mode, orig_mode,
7291 SSE_REGNO (sse_regno));
7292 break;
7293 case X86_64_X87_CLASS:
7294 case X86_64_COMPLEX_X87_CLASS:
7295 return gen_rtx_REG (mode, FIRST_STACK_REG);
7296 case X86_64_NO_CLASS:
7297 /* Zero sized array, struct or class. */
7298 return NULL;
7299 default:
7300 gcc_unreachable ();
7301 }
7302 if (n == 2
7303 && regclass[0] == X86_64_SSE_CLASS
7304 && regclass[1] == X86_64_SSEUP_CLASS
7305 && mode != BLKmode)
7306 return gen_reg_or_parallel (mode, orig_mode,
7307 SSE_REGNO (sse_regno));
7308 if (n == 4
7309 && regclass[0] == X86_64_SSE_CLASS
7310 && regclass[1] == X86_64_SSEUP_CLASS
7311 && regclass[2] == X86_64_SSEUP_CLASS
7312 && regclass[3] == X86_64_SSEUP_CLASS
7313 && mode != BLKmode)
7314 return gen_reg_or_parallel (mode, orig_mode,
7315 SSE_REGNO (sse_regno));
7316 if (n == 8
7317 && regclass[0] == X86_64_SSE_CLASS
7318 && regclass[1] == X86_64_SSEUP_CLASS
7319 && regclass[2] == X86_64_SSEUP_CLASS
7320 && regclass[3] == X86_64_SSEUP_CLASS
7321 && regclass[4] == X86_64_SSEUP_CLASS
7322 && regclass[5] == X86_64_SSEUP_CLASS
7323 && regclass[6] == X86_64_SSEUP_CLASS
7324 && regclass[7] == X86_64_SSEUP_CLASS
7325 && mode != BLKmode)
7326 return gen_reg_or_parallel (mode, orig_mode,
7327 SSE_REGNO (sse_regno));
7328 if (n == 2
7329 && regclass[0] == X86_64_X87_CLASS
7330 && regclass[1] == X86_64_X87UP_CLASS)
7331 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7332
7333 if (n == 2
7334 && regclass[0] == X86_64_INTEGER_CLASS
7335 && regclass[1] == X86_64_INTEGER_CLASS
7336 && (mode == CDImode || mode == TImode)
7337 && intreg[0] + 1 == intreg[1])
7338 return gen_rtx_REG (mode, intreg[0]);
7339
7340 /* Otherwise figure out the entries of the PARALLEL. */
7341 for (i = 0; i < n; i++)
7342 {
7343 int pos;
7344
7345 switch (regclass[i])
7346 {
7347 case X86_64_NO_CLASS:
7348 break;
7349 case X86_64_INTEGER_CLASS:
7350 case X86_64_INTEGERSI_CLASS:
7351 /* Merge TImodes on aligned occasions here too. */
7352 if (i * 8 + 8 > bytes)
7353 tmpmode
7354 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7355 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7356 tmpmode = SImode;
7357 else
7358 tmpmode = DImode;
7359 /* We've requested 24 bytes we
7360 don't have mode for. Use DImode. */
7361 if (tmpmode == BLKmode)
7362 tmpmode = DImode;
7363 exp [nexps++]
7364 = gen_rtx_EXPR_LIST (VOIDmode,
7365 gen_rtx_REG (tmpmode, *intreg),
7366 GEN_INT (i*8));
7367 intreg++;
7368 break;
7369 case X86_64_SSESF_CLASS:
7370 exp [nexps++]
7371 = gen_rtx_EXPR_LIST (VOIDmode,
7372 gen_rtx_REG (SFmode,
7373 SSE_REGNO (sse_regno)),
7374 GEN_INT (i*8));
7375 sse_regno++;
7376 break;
7377 case X86_64_SSEDF_CLASS:
7378 exp [nexps++]
7379 = gen_rtx_EXPR_LIST (VOIDmode,
7380 gen_rtx_REG (DFmode,
7381 SSE_REGNO (sse_regno)),
7382 GEN_INT (i*8));
7383 sse_regno++;
7384 break;
7385 case X86_64_SSE_CLASS:
7386 pos = i;
7387 switch (n)
7388 {
7389 case 1:
7390 tmpmode = DImode;
7391 break;
7392 case 2:
7393 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7394 {
7395 tmpmode = TImode;
7396 i++;
7397 }
7398 else
7399 tmpmode = DImode;
7400 break;
7401 case 4:
7402 gcc_assert (i == 0
7403 && regclass[1] == X86_64_SSEUP_CLASS
7404 && regclass[2] == X86_64_SSEUP_CLASS
7405 && regclass[3] == X86_64_SSEUP_CLASS);
7406 tmpmode = OImode;
7407 i += 3;
7408 break;
7409 case 8:
7410 gcc_assert (i == 0
7411 && regclass[1] == X86_64_SSEUP_CLASS
7412 && regclass[2] == X86_64_SSEUP_CLASS
7413 && regclass[3] == X86_64_SSEUP_CLASS
7414 && regclass[4] == X86_64_SSEUP_CLASS
7415 && regclass[5] == X86_64_SSEUP_CLASS
7416 && regclass[6] == X86_64_SSEUP_CLASS
7417 && regclass[7] == X86_64_SSEUP_CLASS);
7418 tmpmode = XImode;
7419 i += 7;
7420 break;
7421 default:
7422 gcc_unreachable ();
7423 }
7424 exp [nexps++]
7425 = gen_rtx_EXPR_LIST (VOIDmode,
7426 gen_rtx_REG (tmpmode,
7427 SSE_REGNO (sse_regno)),
7428 GEN_INT (pos*8));
7429 sse_regno++;
7430 break;
7431 default:
7432 gcc_unreachable ();
7433 }
7434 }
7435
7436 /* Empty aligned struct, union or class. */
7437 if (nexps == 0)
7438 return NULL;
7439
7440 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7441 for (i = 0; i < nexps; i++)
7442 XVECEXP (ret, 0, i) = exp [i];
7443 return ret;
7444 }
7445
7446 /* Update the data in CUM to advance over an argument of mode MODE
7447 and data type TYPE. (TYPE is null for libcalls where that information
7448 may not be available.)
7449
7450 Return a number of integer regsiters advanced over. */
7451
7452 static int
7453 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7454 const_tree type, HOST_WIDE_INT bytes,
7455 HOST_WIDE_INT words)
7456 {
7457 int res = 0;
7458 bool error_p = NULL;
7459
7460 switch (mode)
7461 {
7462 default:
7463 break;
7464
7465 case BLKmode:
7466 if (bytes < 0)
7467 break;
7468 /* FALLTHRU */
7469
7470 case DImode:
7471 case SImode:
7472 case HImode:
7473 case QImode:
7474 cum->words += words;
7475 cum->nregs -= words;
7476 cum->regno += words;
7477 if (cum->nregs >= 0)
7478 res = words;
7479 if (cum->nregs <= 0)
7480 {
7481 cum->nregs = 0;
7482 cum->regno = 0;
7483 }
7484 break;
7485
7486 case OImode:
7487 /* OImode shouldn't be used directly. */
7488 gcc_unreachable ();
7489
7490 case DFmode:
7491 if (cum->float_in_sse == -1)
7492 error_p = 1;
7493 if (cum->float_in_sse < 2)
7494 break;
7495 case SFmode:
7496 if (cum->float_in_sse == -1)
7497 error_p = 1;
7498 if (cum->float_in_sse < 1)
7499 break;
7500 /* FALLTHRU */
7501
7502 case V8SFmode:
7503 case V8SImode:
7504 case V64QImode:
7505 case V32HImode:
7506 case V16SImode:
7507 case V8DImode:
7508 case V16SFmode:
7509 case V8DFmode:
7510 case V32QImode:
7511 case V16HImode:
7512 case V4DFmode:
7513 case V4DImode:
7514 case TImode:
7515 case V16QImode:
7516 case V8HImode:
7517 case V4SImode:
7518 case V2DImode:
7519 case V4SFmode:
7520 case V2DFmode:
7521 if (!type || !AGGREGATE_TYPE_P (type))
7522 {
7523 cum->sse_words += words;
7524 cum->sse_nregs -= 1;
7525 cum->sse_regno += 1;
7526 if (cum->sse_nregs <= 0)
7527 {
7528 cum->sse_nregs = 0;
7529 cum->sse_regno = 0;
7530 }
7531 }
7532 break;
7533
7534 case V8QImode:
7535 case V4HImode:
7536 case V2SImode:
7537 case V2SFmode:
7538 case V1TImode:
7539 case V1DImode:
7540 if (!type || !AGGREGATE_TYPE_P (type))
7541 {
7542 cum->mmx_words += words;
7543 cum->mmx_nregs -= 1;
7544 cum->mmx_regno += 1;
7545 if (cum->mmx_nregs <= 0)
7546 {
7547 cum->mmx_nregs = 0;
7548 cum->mmx_regno = 0;
7549 }
7550 }
7551 break;
7552 }
7553 if (error_p)
7554 {
7555 cum->float_in_sse = 0;
7556 error ("calling %qD with SSE calling convention without "
7557 "SSE/SSE2 enabled", cum->decl);
7558 sorry ("this is a GCC bug that can be worked around by adding "
7559 "attribute used to function called");
7560 }
7561
7562 return res;
7563 }
7564
7565 static int
7566 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7567 const_tree type, HOST_WIDE_INT words, bool named)
7568 {
7569 int int_nregs, sse_nregs;
7570
7571 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7572 if (!named && (VALID_AVX512F_REG_MODE (mode)
7573 || VALID_AVX256_REG_MODE (mode)))
7574 return 0;
7575
7576 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7577 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7578 {
7579 cum->nregs -= int_nregs;
7580 cum->sse_nregs -= sse_nregs;
7581 cum->regno += int_nregs;
7582 cum->sse_regno += sse_nregs;
7583 return int_nregs;
7584 }
7585 else
7586 {
7587 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7588 cum->words = (cum->words + align - 1) & ~(align - 1);
7589 cum->words += words;
7590 return 0;
7591 }
7592 }
7593
7594 static int
7595 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7596 HOST_WIDE_INT words)
7597 {
7598 /* Otherwise, this should be passed indirect. */
7599 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7600
7601 cum->words += words;
7602 if (cum->nregs > 0)
7603 {
7604 cum->nregs -= 1;
7605 cum->regno += 1;
7606 return 1;
7607 }
7608 return 0;
7609 }
7610
7611 /* Update the data in CUM to advance over an argument of mode MODE and
7612 data type TYPE. (TYPE is null for libcalls where that information
7613 may not be available.) */
7614
7615 static void
7616 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7617 const_tree type, bool named)
7618 {
7619 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7620 HOST_WIDE_INT bytes, words;
7621 int nregs;
7622
7623 if (mode == BLKmode)
7624 bytes = int_size_in_bytes (type);
7625 else
7626 bytes = GET_MODE_SIZE (mode);
7627 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7628
7629 if (type)
7630 mode = type_natural_mode (type, NULL, false);
7631
7632 if ((type && POINTER_BOUNDS_TYPE_P (type))
7633 || POINTER_BOUNDS_MODE_P (mode))
7634 {
7635 /* If we pass bounds in BT then just update remained bounds count. */
7636 if (cum->bnds_in_bt)
7637 {
7638 cum->bnds_in_bt--;
7639 return;
7640 }
7641
7642 /* Update remained number of bounds to force. */
7643 if (cum->force_bnd_pass)
7644 cum->force_bnd_pass--;
7645
7646 cum->bnd_regno++;
7647
7648 return;
7649 }
7650
7651 /* The first arg not going to Bounds Tables resets this counter. */
7652 cum->bnds_in_bt = 0;
7653 /* For unnamed args we always pass bounds to avoid bounds mess when
7654 passed and received types do not match. If bounds do not follow
7655 unnamed arg, still pretend required number of bounds were passed. */
7656 if (cum->force_bnd_pass)
7657 {
7658 cum->bnd_regno += cum->force_bnd_pass;
7659 cum->force_bnd_pass = 0;
7660 }
7661
7662 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7663 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7664 else if (TARGET_64BIT)
7665 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7666 else
7667 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7668
7669 /* For stdarg we expect bounds to be passed for each value passed
7670 in register. */
7671 if (cum->stdarg)
7672 cum->force_bnd_pass = nregs;
7673 /* For pointers passed in memory we expect bounds passed in Bounds
7674 Table. */
7675 if (!nregs)
7676 cum->bnds_in_bt = chkp_type_bounds_count (type);
7677 }
7678
7679 /* Define where to put the arguments to a function.
7680 Value is zero to push the argument on the stack,
7681 or a hard register in which to store the argument.
7682
7683 MODE is the argument's machine mode.
7684 TYPE is the data type of the argument (as a tree).
7685 This is null for libcalls where that information may
7686 not be available.
7687 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7688 the preceding args and about the function being called.
7689 NAMED is nonzero if this argument is a named parameter
7690 (otherwise it is an extra parameter matching an ellipsis). */
7691
7692 static rtx
7693 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7694 machine_mode orig_mode, const_tree type,
7695 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7696 {
7697 bool error_p = false;
7698 /* Avoid the AL settings for the Unix64 ABI. */
7699 if (mode == VOIDmode)
7700 return constm1_rtx;
7701
7702 switch (mode)
7703 {
7704 default:
7705 break;
7706
7707 case BLKmode:
7708 if (bytes < 0)
7709 break;
7710 /* FALLTHRU */
7711 case DImode:
7712 case SImode:
7713 case HImode:
7714 case QImode:
7715 if (words <= cum->nregs)
7716 {
7717 int regno = cum->regno;
7718
7719 /* Fastcall allocates the first two DWORD (SImode) or
7720 smaller arguments to ECX and EDX if it isn't an
7721 aggregate type . */
7722 if (cum->fastcall)
7723 {
7724 if (mode == BLKmode
7725 || mode == DImode
7726 || (type && AGGREGATE_TYPE_P (type)))
7727 break;
7728
7729 /* ECX not EAX is the first allocated register. */
7730 if (regno == AX_REG)
7731 regno = CX_REG;
7732 }
7733 return gen_rtx_REG (mode, regno);
7734 }
7735 break;
7736
7737 case DFmode:
7738 if (cum->float_in_sse == -1)
7739 error_p = 1;
7740 if (cum->float_in_sse < 2)
7741 break;
7742 case SFmode:
7743 if (cum->float_in_sse == -1)
7744 error_p = 1;
7745 if (cum->float_in_sse < 1)
7746 break;
7747 /* FALLTHRU */
7748 case TImode:
7749 /* In 32bit, we pass TImode in xmm registers. */
7750 case V16QImode:
7751 case V8HImode:
7752 case V4SImode:
7753 case V2DImode:
7754 case V4SFmode:
7755 case V2DFmode:
7756 if (!type || !AGGREGATE_TYPE_P (type))
7757 {
7758 if (cum->sse_nregs)
7759 return gen_reg_or_parallel (mode, orig_mode,
7760 cum->sse_regno + FIRST_SSE_REG);
7761 }
7762 break;
7763
7764 case OImode:
7765 case XImode:
7766 /* OImode and XImode shouldn't be used directly. */
7767 gcc_unreachable ();
7768
7769 case V64QImode:
7770 case V32HImode:
7771 case V16SImode:
7772 case V8DImode:
7773 case V16SFmode:
7774 case V8DFmode:
7775 case V8SFmode:
7776 case V8SImode:
7777 case V32QImode:
7778 case V16HImode:
7779 case V4DFmode:
7780 case V4DImode:
7781 if (!type || !AGGREGATE_TYPE_P (type))
7782 {
7783 if (cum->sse_nregs)
7784 return gen_reg_or_parallel (mode, orig_mode,
7785 cum->sse_regno + FIRST_SSE_REG);
7786 }
7787 break;
7788
7789 case V8QImode:
7790 case V4HImode:
7791 case V2SImode:
7792 case V2SFmode:
7793 case V1TImode:
7794 case V1DImode:
7795 if (!type || !AGGREGATE_TYPE_P (type))
7796 {
7797 if (cum->mmx_nregs)
7798 return gen_reg_or_parallel (mode, orig_mode,
7799 cum->mmx_regno + FIRST_MMX_REG);
7800 }
7801 break;
7802 }
7803 if (error_p)
7804 {
7805 cum->float_in_sse = 0;
7806 error ("calling %qD with SSE calling convention without "
7807 "SSE/SSE2 enabled", cum->decl);
7808 sorry ("this is a GCC bug that can be worked around by adding "
7809 "attribute used to function called");
7810 }
7811
7812 return NULL_RTX;
7813 }
7814
7815 static rtx
7816 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7817 machine_mode orig_mode, const_tree type, bool named)
7818 {
7819 /* Handle a hidden AL argument containing number of registers
7820 for varargs x86-64 functions. */
7821 if (mode == VOIDmode)
7822 return GEN_INT (cum->maybe_vaarg
7823 ? (cum->sse_nregs < 0
7824 ? X86_64_SSE_REGPARM_MAX
7825 : cum->sse_regno)
7826 : -1);
7827
7828 switch (mode)
7829 {
7830 default:
7831 break;
7832
7833 case V8SFmode:
7834 case V8SImode:
7835 case V32QImode:
7836 case V16HImode:
7837 case V4DFmode:
7838 case V4DImode:
7839 case V16SFmode:
7840 case V16SImode:
7841 case V64QImode:
7842 case V32HImode:
7843 case V8DFmode:
7844 case V8DImode:
7845 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7846 if (!named)
7847 return NULL;
7848 break;
7849 }
7850
7851 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7852 cum->sse_nregs,
7853 &x86_64_int_parameter_registers [cum->regno],
7854 cum->sse_regno);
7855 }
7856
7857 static rtx
7858 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7859 machine_mode orig_mode, bool named,
7860 HOST_WIDE_INT bytes)
7861 {
7862 unsigned int regno;
7863
7864 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7865 We use value of -2 to specify that current function call is MSABI. */
7866 if (mode == VOIDmode)
7867 return GEN_INT (-2);
7868
7869 /* If we've run out of registers, it goes on the stack. */
7870 if (cum->nregs == 0)
7871 return NULL_RTX;
7872
7873 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7874
7875 /* Only floating point modes are passed in anything but integer regs. */
7876 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7877 {
7878 if (named)
7879 regno = cum->regno + FIRST_SSE_REG;
7880 else
7881 {
7882 rtx t1, t2;
7883
7884 /* Unnamed floating parameters are passed in both the
7885 SSE and integer registers. */
7886 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7887 t2 = gen_rtx_REG (mode, regno);
7888 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7889 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7890 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7891 }
7892 }
7893 /* Handle aggregated types passed in register. */
7894 if (orig_mode == BLKmode)
7895 {
7896 if (bytes > 0 && bytes <= 8)
7897 mode = (bytes > 4 ? DImode : SImode);
7898 if (mode == BLKmode)
7899 mode = DImode;
7900 }
7901
7902 return gen_reg_or_parallel (mode, orig_mode, regno);
7903 }
7904
7905 /* Return where to put the arguments to a function.
7906 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7907
7908 MODE is the argument's machine mode. TYPE is the data type of the
7909 argument. It is null for libcalls where that information may not be
7910 available. CUM gives information about the preceding args and about
7911 the function being called. NAMED is nonzero if this argument is a
7912 named parameter (otherwise it is an extra parameter matching an
7913 ellipsis). */
7914
7915 static rtx
7916 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7917 const_tree type, bool named)
7918 {
7919 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7920 machine_mode mode = omode;
7921 HOST_WIDE_INT bytes, words;
7922 rtx arg;
7923
7924 /* All pointer bounds argumntas are handled separately here. */
7925 if ((type && POINTER_BOUNDS_TYPE_P (type))
7926 || POINTER_BOUNDS_MODE_P (mode))
7927 {
7928 /* Return NULL if bounds are forced to go in Bounds Table. */
7929 if (cum->bnds_in_bt)
7930 arg = NULL;
7931 /* Return the next available bound reg if any. */
7932 else if (cum->bnd_regno <= LAST_BND_REG)
7933 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7934 /* Return the next special slot number otherwise. */
7935 else
7936 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7937
7938 return arg;
7939 }
7940
7941 if (mode == BLKmode)
7942 bytes = int_size_in_bytes (type);
7943 else
7944 bytes = GET_MODE_SIZE (mode);
7945 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7946
7947 /* To simplify the code below, represent vector types with a vector mode
7948 even if MMX/SSE are not active. */
7949 if (type && TREE_CODE (type) == VECTOR_TYPE)
7950 mode = type_natural_mode (type, cum, false);
7951
7952 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7953 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7954 else if (TARGET_64BIT)
7955 arg = function_arg_64 (cum, mode, omode, type, named);
7956 else
7957 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7958
7959 return arg;
7960 }
7961
7962 /* A C expression that indicates when an argument must be passed by
7963 reference. If nonzero for an argument, a copy of that argument is
7964 made in memory and a pointer to the argument is passed instead of
7965 the argument itself. The pointer is passed in whatever way is
7966 appropriate for passing a pointer to that type. */
7967
7968 static bool
7969 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7970 const_tree type, bool)
7971 {
7972 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7973
7974 /* Bounds are never passed by reference. */
7975 if ((type && POINTER_BOUNDS_TYPE_P (type))
7976 || POINTER_BOUNDS_MODE_P (mode))
7977 return false;
7978
7979 /* See Windows x64 Software Convention. */
7980 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7981 {
7982 int msize = (int) GET_MODE_SIZE (mode);
7983 if (type)
7984 {
7985 /* Arrays are passed by reference. */
7986 if (TREE_CODE (type) == ARRAY_TYPE)
7987 return true;
7988
7989 if (AGGREGATE_TYPE_P (type))
7990 {
7991 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7992 are passed by reference. */
7993 msize = int_size_in_bytes (type);
7994 }
7995 }
7996
7997 /* __m128 is passed by reference. */
7998 switch (msize) {
7999 case 1: case 2: case 4: case 8:
8000 break;
8001 default:
8002 return true;
8003 }
8004 }
8005 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
8006 return 1;
8007
8008 return 0;
8009 }
8010
8011 /* Return true when TYPE should be 128bit aligned for 32bit argument
8012 passing ABI. XXX: This function is obsolete and is only used for
8013 checking psABI compatibility with previous versions of GCC. */
8014
8015 static bool
8016 ix86_compat_aligned_value_p (const_tree type)
8017 {
8018 machine_mode mode = TYPE_MODE (type);
8019 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8020 || mode == TDmode
8021 || mode == TFmode
8022 || mode == TCmode)
8023 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8024 return true;
8025 if (TYPE_ALIGN (type) < 128)
8026 return false;
8027
8028 if (AGGREGATE_TYPE_P (type))
8029 {
8030 /* Walk the aggregates recursively. */
8031 switch (TREE_CODE (type))
8032 {
8033 case RECORD_TYPE:
8034 case UNION_TYPE:
8035 case QUAL_UNION_TYPE:
8036 {
8037 tree field;
8038
8039 /* Walk all the structure fields. */
8040 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8041 {
8042 if (TREE_CODE (field) == FIELD_DECL
8043 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8044 return true;
8045 }
8046 break;
8047 }
8048
8049 case ARRAY_TYPE:
8050 /* Just for use if some languages passes arrays by value. */
8051 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8052 return true;
8053 break;
8054
8055 default:
8056 gcc_unreachable ();
8057 }
8058 }
8059 return false;
8060 }
8061
8062 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8063 XXX: This function is obsolete and is only used for checking psABI
8064 compatibility with previous versions of GCC. */
8065
8066 static unsigned int
8067 ix86_compat_function_arg_boundary (machine_mode mode,
8068 const_tree type, unsigned int align)
8069 {
8070 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8071 natural boundaries. */
8072 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8073 {
8074 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8075 make an exception for SSE modes since these require 128bit
8076 alignment.
8077
8078 The handling here differs from field_alignment. ICC aligns MMX
8079 arguments to 4 byte boundaries, while structure fields are aligned
8080 to 8 byte boundaries. */
8081 if (!type)
8082 {
8083 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8084 align = PARM_BOUNDARY;
8085 }
8086 else
8087 {
8088 if (!ix86_compat_aligned_value_p (type))
8089 align = PARM_BOUNDARY;
8090 }
8091 }
8092 if (align > BIGGEST_ALIGNMENT)
8093 align = BIGGEST_ALIGNMENT;
8094 return align;
8095 }
8096
8097 /* Return true when TYPE should be 128bit aligned for 32bit argument
8098 passing ABI. */
8099
8100 static bool
8101 ix86_contains_aligned_value_p (const_tree type)
8102 {
8103 machine_mode mode = TYPE_MODE (type);
8104
8105 if (mode == XFmode || mode == XCmode)
8106 return false;
8107
8108 if (TYPE_ALIGN (type) < 128)
8109 return false;
8110
8111 if (AGGREGATE_TYPE_P (type))
8112 {
8113 /* Walk the aggregates recursively. */
8114 switch (TREE_CODE (type))
8115 {
8116 case RECORD_TYPE:
8117 case UNION_TYPE:
8118 case QUAL_UNION_TYPE:
8119 {
8120 tree field;
8121
8122 /* Walk all the structure fields. */
8123 for (field = TYPE_FIELDS (type);
8124 field;
8125 field = DECL_CHAIN (field))
8126 {
8127 if (TREE_CODE (field) == FIELD_DECL
8128 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8129 return true;
8130 }
8131 break;
8132 }
8133
8134 case ARRAY_TYPE:
8135 /* Just for use if some languages passes arrays by value. */
8136 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8137 return true;
8138 break;
8139
8140 default:
8141 gcc_unreachable ();
8142 }
8143 }
8144 else
8145 return TYPE_ALIGN (type) >= 128;
8146
8147 return false;
8148 }
8149
8150 /* Gives the alignment boundary, in bits, of an argument with the
8151 specified mode and type. */
8152
8153 static unsigned int
8154 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8155 {
8156 unsigned int align;
8157 if (type)
8158 {
8159 /* Since the main variant type is used for call, we convert it to
8160 the main variant type. */
8161 type = TYPE_MAIN_VARIANT (type);
8162 align = TYPE_ALIGN (type);
8163 }
8164 else
8165 align = GET_MODE_ALIGNMENT (mode);
8166 if (align < PARM_BOUNDARY)
8167 align = PARM_BOUNDARY;
8168 else
8169 {
8170 static bool warned;
8171 unsigned int saved_align = align;
8172
8173 if (!TARGET_64BIT)
8174 {
8175 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8176 if (!type)
8177 {
8178 if (mode == XFmode || mode == XCmode)
8179 align = PARM_BOUNDARY;
8180 }
8181 else if (!ix86_contains_aligned_value_p (type))
8182 align = PARM_BOUNDARY;
8183
8184 if (align < 128)
8185 align = PARM_BOUNDARY;
8186 }
8187
8188 if (warn_psabi
8189 && !warned
8190 && align != ix86_compat_function_arg_boundary (mode, type,
8191 saved_align))
8192 {
8193 warned = true;
8194 inform (input_location,
8195 "The ABI for passing parameters with %d-byte"
8196 " alignment has changed in GCC 4.6",
8197 align / BITS_PER_UNIT);
8198 }
8199 }
8200
8201 return align;
8202 }
8203
8204 /* Return true if N is a possible register number of function value. */
8205
8206 static bool
8207 ix86_function_value_regno_p (const unsigned int regno)
8208 {
8209 switch (regno)
8210 {
8211 case AX_REG:
8212 return true;
8213 case DX_REG:
8214 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8215 case DI_REG:
8216 case SI_REG:
8217 return TARGET_64BIT && ix86_abi != MS_ABI;
8218
8219 case FIRST_BND_REG:
8220 return chkp_function_instrumented_p (current_function_decl);
8221
8222 /* Complex values are returned in %st(0)/%st(1) pair. */
8223 case ST0_REG:
8224 case ST1_REG:
8225 /* TODO: The function should depend on current function ABI but
8226 builtins.c would need updating then. Therefore we use the
8227 default ABI. */
8228 if (TARGET_64BIT && ix86_abi == MS_ABI)
8229 return false;
8230 return TARGET_FLOAT_RETURNS_IN_80387;
8231
8232 /* Complex values are returned in %xmm0/%xmm1 pair. */
8233 case XMM0_REG:
8234 case XMM1_REG:
8235 return TARGET_SSE;
8236
8237 case MM0_REG:
8238 if (TARGET_MACHO || TARGET_64BIT)
8239 return false;
8240 return TARGET_MMX;
8241 }
8242
8243 return false;
8244 }
8245
8246 /* Define how to find the value returned by a function.
8247 VALTYPE is the data type of the value (as a tree).
8248 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8249 otherwise, FUNC is 0. */
8250
8251 static rtx
8252 function_value_32 (machine_mode orig_mode, machine_mode mode,
8253 const_tree fntype, const_tree fn)
8254 {
8255 unsigned int regno;
8256
8257 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8258 we normally prevent this case when mmx is not available. However
8259 some ABIs may require the result to be returned like DImode. */
8260 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8261 regno = FIRST_MMX_REG;
8262
8263 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8264 we prevent this case when sse is not available. However some ABIs
8265 may require the result to be returned like integer TImode. */
8266 else if (mode == TImode
8267 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8268 regno = FIRST_SSE_REG;
8269
8270 /* 32-byte vector modes in %ymm0. */
8271 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8272 regno = FIRST_SSE_REG;
8273
8274 /* 64-byte vector modes in %zmm0. */
8275 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8276 regno = FIRST_SSE_REG;
8277
8278 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8279 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8280 regno = FIRST_FLOAT_REG;
8281 else
8282 /* Most things go in %eax. */
8283 regno = AX_REG;
8284
8285 /* Override FP return register with %xmm0 for local functions when
8286 SSE math is enabled or for functions with sseregparm attribute. */
8287 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8288 {
8289 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8290 if (sse_level == -1)
8291 {
8292 error ("calling %qD with SSE caling convention without "
8293 "SSE/SSE2 enabled", fn);
8294 sorry ("this is a GCC bug that can be worked around by adding "
8295 "attribute used to function called");
8296 }
8297 else if ((sse_level >= 1 && mode == SFmode)
8298 || (sse_level == 2 && mode == DFmode))
8299 regno = FIRST_SSE_REG;
8300 }
8301
8302 /* OImode shouldn't be used directly. */
8303 gcc_assert (mode != OImode);
8304
8305 return gen_rtx_REG (orig_mode, regno);
8306 }
8307
8308 static rtx
8309 function_value_64 (machine_mode orig_mode, machine_mode mode,
8310 const_tree valtype)
8311 {
8312 rtx ret;
8313
8314 /* Handle libcalls, which don't provide a type node. */
8315 if (valtype == NULL)
8316 {
8317 unsigned int regno;
8318
8319 switch (mode)
8320 {
8321 case SFmode:
8322 case SCmode:
8323 case DFmode:
8324 case DCmode:
8325 case TFmode:
8326 case SDmode:
8327 case DDmode:
8328 case TDmode:
8329 regno = FIRST_SSE_REG;
8330 break;
8331 case XFmode:
8332 case XCmode:
8333 regno = FIRST_FLOAT_REG;
8334 break;
8335 case TCmode:
8336 return NULL;
8337 default:
8338 regno = AX_REG;
8339 }
8340
8341 return gen_rtx_REG (mode, regno);
8342 }
8343 else if (POINTER_TYPE_P (valtype))
8344 {
8345 /* Pointers are always returned in word_mode. */
8346 mode = word_mode;
8347 }
8348
8349 ret = construct_container (mode, orig_mode, valtype, 1,
8350 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8351 x86_64_int_return_registers, 0);
8352
8353 /* For zero sized structures, construct_container returns NULL, but we
8354 need to keep rest of compiler happy by returning meaningful value. */
8355 if (!ret)
8356 ret = gen_rtx_REG (orig_mode, AX_REG);
8357
8358 return ret;
8359 }
8360
8361 static rtx
8362 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8363 const_tree valtype)
8364 {
8365 unsigned int regno = AX_REG;
8366
8367 if (TARGET_SSE)
8368 {
8369 switch (GET_MODE_SIZE (mode))
8370 {
8371 case 16:
8372 if (valtype != NULL_TREE
8373 && !VECTOR_INTEGER_TYPE_P (valtype)
8374 && !VECTOR_INTEGER_TYPE_P (valtype)
8375 && !INTEGRAL_TYPE_P (valtype)
8376 && !VECTOR_FLOAT_TYPE_P (valtype))
8377 break;
8378 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8379 && !COMPLEX_MODE_P (mode))
8380 regno = FIRST_SSE_REG;
8381 break;
8382 case 8:
8383 case 4:
8384 if (mode == SFmode || mode == DFmode)
8385 regno = FIRST_SSE_REG;
8386 break;
8387 default:
8388 break;
8389 }
8390 }
8391 return gen_rtx_REG (orig_mode, regno);
8392 }
8393
8394 static rtx
8395 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8396 machine_mode orig_mode, machine_mode mode)
8397 {
8398 const_tree fn, fntype;
8399
8400 fn = NULL_TREE;
8401 if (fntype_or_decl && DECL_P (fntype_or_decl))
8402 fn = fntype_or_decl;
8403 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8404
8405 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8406 || POINTER_BOUNDS_MODE_P (mode))
8407 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8408 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8409 return function_value_ms_64 (orig_mode, mode, valtype);
8410 else if (TARGET_64BIT)
8411 return function_value_64 (orig_mode, mode, valtype);
8412 else
8413 return function_value_32 (orig_mode, mode, fntype, fn);
8414 }
8415
8416 static rtx
8417 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8418 {
8419 machine_mode mode, orig_mode;
8420
8421 orig_mode = TYPE_MODE (valtype);
8422 mode = type_natural_mode (valtype, NULL, true);
8423 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8424 }
8425
8426 /* Return an RTX representing a place where a function returns
8427 or recieves pointer bounds or NULL if no bounds are returned.
8428
8429 VALTYPE is a data type of a value returned by the function.
8430
8431 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8432 or FUNCTION_TYPE of the function.
8433
8434 If OUTGOING is false, return a place in which the caller will
8435 see the return value. Otherwise, return a place where a
8436 function returns a value. */
8437
8438 static rtx
8439 ix86_function_value_bounds (const_tree valtype,
8440 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8441 bool outgoing ATTRIBUTE_UNUSED)
8442 {
8443 rtx res = NULL_RTX;
8444
8445 if (BOUNDED_TYPE_P (valtype))
8446 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8447 else if (chkp_type_has_pointer (valtype))
8448 {
8449 bitmap slots;
8450 rtx bounds[2];
8451 bitmap_iterator bi;
8452 unsigned i, bnd_no = 0;
8453
8454 bitmap_obstack_initialize (NULL);
8455 slots = BITMAP_ALLOC (NULL);
8456 chkp_find_bound_slots (valtype, slots);
8457
8458 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8459 {
8460 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8461 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8462 gcc_assert (bnd_no < 2);
8463 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8464 }
8465
8466 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8467
8468 BITMAP_FREE (slots);
8469 bitmap_obstack_release (NULL);
8470 }
8471 else
8472 res = NULL_RTX;
8473
8474 return res;
8475 }
8476
8477 /* Pointer function arguments and return values are promoted to
8478 word_mode. */
8479
8480 static machine_mode
8481 ix86_promote_function_mode (const_tree type, machine_mode mode,
8482 int *punsignedp, const_tree fntype,
8483 int for_return)
8484 {
8485 if (type != NULL_TREE && POINTER_TYPE_P (type))
8486 {
8487 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8488 return word_mode;
8489 }
8490 return default_promote_function_mode (type, mode, punsignedp, fntype,
8491 for_return);
8492 }
8493
8494 /* Return true if a structure, union or array with MODE containing FIELD
8495 should be accessed using BLKmode. */
8496
8497 static bool
8498 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8499 {
8500 /* Union with XFmode must be in BLKmode. */
8501 return (mode == XFmode
8502 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8503 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8504 }
8505
8506 rtx
8507 ix86_libcall_value (machine_mode mode)
8508 {
8509 return ix86_function_value_1 (NULL, NULL, mode, mode);
8510 }
8511
8512 /* Return true iff type is returned in memory. */
8513
8514 static bool
8515 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8516 {
8517 #ifdef SUBTARGET_RETURN_IN_MEMORY
8518 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8519 #else
8520 const machine_mode mode = type_natural_mode (type, NULL, true);
8521 HOST_WIDE_INT size;
8522
8523 if (POINTER_BOUNDS_TYPE_P (type))
8524 return false;
8525
8526 if (TARGET_64BIT)
8527 {
8528 if (ix86_function_type_abi (fntype) == MS_ABI)
8529 {
8530 size = int_size_in_bytes (type);
8531
8532 /* __m128 is returned in xmm0. */
8533 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8534 || INTEGRAL_TYPE_P (type)
8535 || VECTOR_FLOAT_TYPE_P (type))
8536 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8537 && !COMPLEX_MODE_P (mode)
8538 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8539 return false;
8540
8541 /* Otherwise, the size must be exactly in [1248]. */
8542 return size != 1 && size != 2 && size != 4 && size != 8;
8543 }
8544 else
8545 {
8546 int needed_intregs, needed_sseregs;
8547
8548 return examine_argument (mode, type, 1,
8549 &needed_intregs, &needed_sseregs);
8550 }
8551 }
8552 else
8553 {
8554 if (mode == BLKmode)
8555 return true;
8556
8557 size = int_size_in_bytes (type);
8558
8559 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8560 return false;
8561
8562 if (VECTOR_MODE_P (mode) || mode == TImode)
8563 {
8564 /* User-created vectors small enough to fit in EAX. */
8565 if (size < 8)
8566 return false;
8567
8568 /* Unless ABI prescibes otherwise,
8569 MMX/3dNow values are returned in MM0 if available. */
8570
8571 if (size == 8)
8572 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8573
8574 /* SSE values are returned in XMM0 if available. */
8575 if (size == 16)
8576 return !TARGET_SSE;
8577
8578 /* AVX values are returned in YMM0 if available. */
8579 if (size == 32)
8580 return !TARGET_AVX;
8581
8582 /* AVX512F values are returned in ZMM0 if available. */
8583 if (size == 64)
8584 return !TARGET_AVX512F;
8585 }
8586
8587 if (mode == XFmode)
8588 return false;
8589
8590 if (size > 12)
8591 return true;
8592
8593 /* OImode shouldn't be used directly. */
8594 gcc_assert (mode != OImode);
8595
8596 return false;
8597 }
8598 #endif
8599 }
8600
8601 \f
8602 /* Create the va_list data type. */
8603
8604 /* Returns the calling convention specific va_list date type.
8605 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8606
8607 static tree
8608 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8609 {
8610 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8611
8612 /* For i386 we use plain pointer to argument area. */
8613 if (!TARGET_64BIT || abi == MS_ABI)
8614 return build_pointer_type (char_type_node);
8615
8616 record = lang_hooks.types.make_type (RECORD_TYPE);
8617 type_decl = build_decl (BUILTINS_LOCATION,
8618 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8619
8620 f_gpr = build_decl (BUILTINS_LOCATION,
8621 FIELD_DECL, get_identifier ("gp_offset"),
8622 unsigned_type_node);
8623 f_fpr = build_decl (BUILTINS_LOCATION,
8624 FIELD_DECL, get_identifier ("fp_offset"),
8625 unsigned_type_node);
8626 f_ovf = build_decl (BUILTINS_LOCATION,
8627 FIELD_DECL, get_identifier ("overflow_arg_area"),
8628 ptr_type_node);
8629 f_sav = build_decl (BUILTINS_LOCATION,
8630 FIELD_DECL, get_identifier ("reg_save_area"),
8631 ptr_type_node);
8632
8633 va_list_gpr_counter_field = f_gpr;
8634 va_list_fpr_counter_field = f_fpr;
8635
8636 DECL_FIELD_CONTEXT (f_gpr) = record;
8637 DECL_FIELD_CONTEXT (f_fpr) = record;
8638 DECL_FIELD_CONTEXT (f_ovf) = record;
8639 DECL_FIELD_CONTEXT (f_sav) = record;
8640
8641 TYPE_STUB_DECL (record) = type_decl;
8642 TYPE_NAME (record) = type_decl;
8643 TYPE_FIELDS (record) = f_gpr;
8644 DECL_CHAIN (f_gpr) = f_fpr;
8645 DECL_CHAIN (f_fpr) = f_ovf;
8646 DECL_CHAIN (f_ovf) = f_sav;
8647
8648 layout_type (record);
8649
8650 /* The correct type is an array type of one element. */
8651 return build_array_type (record, build_index_type (size_zero_node));
8652 }
8653
8654 /* Setup the builtin va_list data type and for 64-bit the additional
8655 calling convention specific va_list data types. */
8656
8657 static tree
8658 ix86_build_builtin_va_list (void)
8659 {
8660 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8661
8662 /* Initialize abi specific va_list builtin types. */
8663 if (TARGET_64BIT)
8664 {
8665 tree t;
8666 if (ix86_abi == MS_ABI)
8667 {
8668 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8669 if (TREE_CODE (t) != RECORD_TYPE)
8670 t = build_variant_type_copy (t);
8671 sysv_va_list_type_node = t;
8672 }
8673 else
8674 {
8675 t = ret;
8676 if (TREE_CODE (t) != RECORD_TYPE)
8677 t = build_variant_type_copy (t);
8678 sysv_va_list_type_node = t;
8679 }
8680 if (ix86_abi != MS_ABI)
8681 {
8682 t = ix86_build_builtin_va_list_abi (MS_ABI);
8683 if (TREE_CODE (t) != RECORD_TYPE)
8684 t = build_variant_type_copy (t);
8685 ms_va_list_type_node = t;
8686 }
8687 else
8688 {
8689 t = ret;
8690 if (TREE_CODE (t) != RECORD_TYPE)
8691 t = build_variant_type_copy (t);
8692 ms_va_list_type_node = t;
8693 }
8694 }
8695
8696 return ret;
8697 }
8698
8699 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8700
8701 static void
8702 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8703 {
8704 rtx save_area, mem;
8705 alias_set_type set;
8706 int i, max;
8707
8708 /* GPR size of varargs save area. */
8709 if (cfun->va_list_gpr_size)
8710 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8711 else
8712 ix86_varargs_gpr_size = 0;
8713
8714 /* FPR size of varargs save area. We don't need it if we don't pass
8715 anything in SSE registers. */
8716 if (TARGET_SSE && cfun->va_list_fpr_size)
8717 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8718 else
8719 ix86_varargs_fpr_size = 0;
8720
8721 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8722 return;
8723
8724 save_area = frame_pointer_rtx;
8725 set = get_varargs_alias_set ();
8726
8727 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8728 if (max > X86_64_REGPARM_MAX)
8729 max = X86_64_REGPARM_MAX;
8730
8731 for (i = cum->regno; i < max; i++)
8732 {
8733 mem = gen_rtx_MEM (word_mode,
8734 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8735 MEM_NOTRAP_P (mem) = 1;
8736 set_mem_alias_set (mem, set);
8737 emit_move_insn (mem,
8738 gen_rtx_REG (word_mode,
8739 x86_64_int_parameter_registers[i]));
8740 }
8741
8742 if (ix86_varargs_fpr_size)
8743 {
8744 machine_mode smode;
8745 rtx_code_label *label;
8746 rtx test;
8747
8748 /* Now emit code to save SSE registers. The AX parameter contains number
8749 of SSE parameter registers used to call this function, though all we
8750 actually check here is the zero/non-zero status. */
8751
8752 label = gen_label_rtx ();
8753 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8754 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8755 label));
8756
8757 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8758 we used movdqa (i.e. TImode) instead? Perhaps even better would
8759 be if we could determine the real mode of the data, via a hook
8760 into pass_stdarg. Ignore all that for now. */
8761 smode = V4SFmode;
8762 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8763 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8764
8765 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8766 if (max > X86_64_SSE_REGPARM_MAX)
8767 max = X86_64_SSE_REGPARM_MAX;
8768
8769 for (i = cum->sse_regno; i < max; ++i)
8770 {
8771 mem = plus_constant (Pmode, save_area,
8772 i * 16 + ix86_varargs_gpr_size);
8773 mem = gen_rtx_MEM (smode, mem);
8774 MEM_NOTRAP_P (mem) = 1;
8775 set_mem_alias_set (mem, set);
8776 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8777
8778 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8779 }
8780
8781 emit_label (label);
8782 }
8783 }
8784
8785 static void
8786 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8787 {
8788 alias_set_type set = get_varargs_alias_set ();
8789 int i;
8790
8791 /* Reset to zero, as there might be a sysv vaarg used
8792 before. */
8793 ix86_varargs_gpr_size = 0;
8794 ix86_varargs_fpr_size = 0;
8795
8796 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8797 {
8798 rtx reg, mem;
8799
8800 mem = gen_rtx_MEM (Pmode,
8801 plus_constant (Pmode, virtual_incoming_args_rtx,
8802 i * UNITS_PER_WORD));
8803 MEM_NOTRAP_P (mem) = 1;
8804 set_mem_alias_set (mem, set);
8805
8806 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8807 emit_move_insn (mem, reg);
8808 }
8809 }
8810
8811 static void
8812 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8813 tree type, int *, int no_rtl)
8814 {
8815 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8816 CUMULATIVE_ARGS next_cum;
8817 tree fntype;
8818
8819 /* This argument doesn't appear to be used anymore. Which is good,
8820 because the old code here didn't suppress rtl generation. */
8821 gcc_assert (!no_rtl);
8822
8823 if (!TARGET_64BIT)
8824 return;
8825
8826 fntype = TREE_TYPE (current_function_decl);
8827
8828 /* For varargs, we do not want to skip the dummy va_dcl argument.
8829 For stdargs, we do want to skip the last named argument. */
8830 next_cum = *cum;
8831 if (stdarg_p (fntype))
8832 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8833 true);
8834
8835 if (cum->call_abi == MS_ABI)
8836 setup_incoming_varargs_ms_64 (&next_cum);
8837 else
8838 setup_incoming_varargs_64 (&next_cum);
8839 }
8840
8841 static void
8842 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8843 enum machine_mode mode,
8844 tree type,
8845 int *pretend_size ATTRIBUTE_UNUSED,
8846 int no_rtl)
8847 {
8848 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8849 CUMULATIVE_ARGS next_cum;
8850 tree fntype;
8851 rtx save_area;
8852 int bnd_reg, i, max;
8853
8854 gcc_assert (!no_rtl);
8855
8856 /* Do nothing if we use plain pointer to argument area. */
8857 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8858 return;
8859
8860 fntype = TREE_TYPE (current_function_decl);
8861
8862 /* For varargs, we do not want to skip the dummy va_dcl argument.
8863 For stdargs, we do want to skip the last named argument. */
8864 next_cum = *cum;
8865 if (stdarg_p (fntype))
8866 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8867 true);
8868 save_area = frame_pointer_rtx;
8869
8870 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8871 if (max > X86_64_REGPARM_MAX)
8872 max = X86_64_REGPARM_MAX;
8873
8874 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8875 if (chkp_function_instrumented_p (current_function_decl))
8876 for (i = cum->regno; i < max; i++)
8877 {
8878 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8879 rtx reg = gen_rtx_REG (DImode,
8880 x86_64_int_parameter_registers[i]);
8881 rtx ptr = reg;
8882 rtx bounds;
8883
8884 if (bnd_reg <= LAST_BND_REG)
8885 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8886 else
8887 {
8888 rtx ldx_addr =
8889 plus_constant (Pmode, arg_pointer_rtx,
8890 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8891 bounds = gen_reg_rtx (BNDmode);
8892 emit_insn (BNDmode == BND64mode
8893 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8894 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8895 }
8896
8897 emit_insn (BNDmode == BND64mode
8898 ? gen_bnd64_stx (addr, ptr, bounds)
8899 : gen_bnd32_stx (addr, ptr, bounds));
8900
8901 bnd_reg++;
8902 }
8903 }
8904
8905
8906 /* Checks if TYPE is of kind va_list char *. */
8907
8908 static bool
8909 is_va_list_char_pointer (tree type)
8910 {
8911 tree canonic;
8912
8913 /* For 32-bit it is always true. */
8914 if (!TARGET_64BIT)
8915 return true;
8916 canonic = ix86_canonical_va_list_type (type);
8917 return (canonic == ms_va_list_type_node
8918 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8919 }
8920
8921 /* Implement va_start. */
8922
8923 static void
8924 ix86_va_start (tree valist, rtx nextarg)
8925 {
8926 HOST_WIDE_INT words, n_gpr, n_fpr;
8927 tree f_gpr, f_fpr, f_ovf, f_sav;
8928 tree gpr, fpr, ovf, sav, t;
8929 tree type;
8930 rtx ovf_rtx;
8931
8932 if (flag_split_stack
8933 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8934 {
8935 unsigned int scratch_regno;
8936
8937 /* When we are splitting the stack, we can't refer to the stack
8938 arguments using internal_arg_pointer, because they may be on
8939 the old stack. The split stack prologue will arrange to
8940 leave a pointer to the old stack arguments in a scratch
8941 register, which we here copy to a pseudo-register. The split
8942 stack prologue can't set the pseudo-register directly because
8943 it (the prologue) runs before any registers have been saved. */
8944
8945 scratch_regno = split_stack_prologue_scratch_regno ();
8946 if (scratch_regno != INVALID_REGNUM)
8947 {
8948 rtx reg;
8949 rtx_insn *seq;
8950
8951 reg = gen_reg_rtx (Pmode);
8952 cfun->machine->split_stack_varargs_pointer = reg;
8953
8954 start_sequence ();
8955 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8956 seq = get_insns ();
8957 end_sequence ();
8958
8959 push_topmost_sequence ();
8960 emit_insn_after (seq, entry_of_function ());
8961 pop_topmost_sequence ();
8962 }
8963 }
8964
8965 /* Only 64bit target needs something special. */
8966 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8967 {
8968 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8969 std_expand_builtin_va_start (valist, nextarg);
8970 else
8971 {
8972 rtx va_r, next;
8973
8974 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8975 next = expand_binop (ptr_mode, add_optab,
8976 cfun->machine->split_stack_varargs_pointer,
8977 crtl->args.arg_offset_rtx,
8978 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8979 convert_move (va_r, next, 0);
8980
8981 /* Store zero bounds for va_list. */
8982 if (chkp_function_instrumented_p (current_function_decl))
8983 chkp_expand_bounds_reset_for_mem (valist,
8984 make_tree (TREE_TYPE (valist),
8985 next));
8986
8987 }
8988 return;
8989 }
8990
8991 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8992 f_fpr = DECL_CHAIN (f_gpr);
8993 f_ovf = DECL_CHAIN (f_fpr);
8994 f_sav = DECL_CHAIN (f_ovf);
8995
8996 valist = build_simple_mem_ref (valist);
8997 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8998 /* The following should be folded into the MEM_REF offset. */
8999 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9000 f_gpr, NULL_TREE);
9001 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9002 f_fpr, NULL_TREE);
9003 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9004 f_ovf, NULL_TREE);
9005 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9006 f_sav, NULL_TREE);
9007
9008 /* Count number of gp and fp argument registers used. */
9009 words = crtl->args.info.words;
9010 n_gpr = crtl->args.info.regno;
9011 n_fpr = crtl->args.info.sse_regno;
9012
9013 if (cfun->va_list_gpr_size)
9014 {
9015 type = TREE_TYPE (gpr);
9016 t = build2 (MODIFY_EXPR, type,
9017 gpr, build_int_cst (type, n_gpr * 8));
9018 TREE_SIDE_EFFECTS (t) = 1;
9019 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9020 }
9021
9022 if (TARGET_SSE && cfun->va_list_fpr_size)
9023 {
9024 type = TREE_TYPE (fpr);
9025 t = build2 (MODIFY_EXPR, type, fpr,
9026 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9027 TREE_SIDE_EFFECTS (t) = 1;
9028 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9029 }
9030
9031 /* Find the overflow area. */
9032 type = TREE_TYPE (ovf);
9033 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9034 ovf_rtx = crtl->args.internal_arg_pointer;
9035 else
9036 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9037 t = make_tree (type, ovf_rtx);
9038 if (words != 0)
9039 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9040
9041 /* Store zero bounds for overflow area pointer. */
9042 if (chkp_function_instrumented_p (current_function_decl))
9043 chkp_expand_bounds_reset_for_mem (ovf, t);
9044
9045 t = build2 (MODIFY_EXPR, type, ovf, t);
9046 TREE_SIDE_EFFECTS (t) = 1;
9047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9048
9049 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9050 {
9051 /* Find the register save area.
9052 Prologue of the function save it right above stack frame. */
9053 type = TREE_TYPE (sav);
9054 t = make_tree (type, frame_pointer_rtx);
9055 if (!ix86_varargs_gpr_size)
9056 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9057
9058 /* Store zero bounds for save area pointer. */
9059 if (chkp_function_instrumented_p (current_function_decl))
9060 chkp_expand_bounds_reset_for_mem (sav, t);
9061
9062 t = build2 (MODIFY_EXPR, type, sav, t);
9063 TREE_SIDE_EFFECTS (t) = 1;
9064 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9065 }
9066 }
9067
9068 /* Implement va_arg. */
9069
9070 static tree
9071 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9072 gimple_seq *post_p)
9073 {
9074 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9075 tree f_gpr, f_fpr, f_ovf, f_sav;
9076 tree gpr, fpr, ovf, sav, t;
9077 int size, rsize;
9078 tree lab_false, lab_over = NULL_TREE;
9079 tree addr, t2;
9080 rtx container;
9081 int indirect_p = 0;
9082 tree ptrtype;
9083 machine_mode nat_mode;
9084 unsigned int arg_boundary;
9085
9086 /* Only 64bit target needs something special. */
9087 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9088 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9089
9090 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9091 f_fpr = DECL_CHAIN (f_gpr);
9092 f_ovf = DECL_CHAIN (f_fpr);
9093 f_sav = DECL_CHAIN (f_ovf);
9094
9095 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9096 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9097 valist = build_va_arg_indirect_ref (valist);
9098 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9099 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9100 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9101
9102 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9103 if (indirect_p)
9104 type = build_pointer_type (type);
9105 size = int_size_in_bytes (type);
9106 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9107
9108 nat_mode = type_natural_mode (type, NULL, false);
9109 switch (nat_mode)
9110 {
9111 case V8SFmode:
9112 case V8SImode:
9113 case V32QImode:
9114 case V16HImode:
9115 case V4DFmode:
9116 case V4DImode:
9117 case V16SFmode:
9118 case V16SImode:
9119 case V64QImode:
9120 case V32HImode:
9121 case V8DFmode:
9122 case V8DImode:
9123 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9124 if (!TARGET_64BIT_MS_ABI)
9125 {
9126 container = NULL;
9127 break;
9128 }
9129
9130 default:
9131 container = construct_container (nat_mode, TYPE_MODE (type),
9132 type, 0, X86_64_REGPARM_MAX,
9133 X86_64_SSE_REGPARM_MAX, intreg,
9134 0);
9135 break;
9136 }
9137
9138 /* Pull the value out of the saved registers. */
9139
9140 addr = create_tmp_var (ptr_type_node, "addr");
9141
9142 if (container)
9143 {
9144 int needed_intregs, needed_sseregs;
9145 bool need_temp;
9146 tree int_addr, sse_addr;
9147
9148 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9149 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9150
9151 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9152
9153 need_temp = (!REG_P (container)
9154 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9155 || TYPE_ALIGN (type) > 128));
9156
9157 /* In case we are passing structure, verify that it is consecutive block
9158 on the register save area. If not we need to do moves. */
9159 if (!need_temp && !REG_P (container))
9160 {
9161 /* Verify that all registers are strictly consecutive */
9162 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9163 {
9164 int i;
9165
9166 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9167 {
9168 rtx slot = XVECEXP (container, 0, i);
9169 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9170 || INTVAL (XEXP (slot, 1)) != i * 16)
9171 need_temp = true;
9172 }
9173 }
9174 else
9175 {
9176 int i;
9177
9178 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9179 {
9180 rtx slot = XVECEXP (container, 0, i);
9181 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9182 || INTVAL (XEXP (slot, 1)) != i * 8)
9183 need_temp = true;
9184 }
9185 }
9186 }
9187 if (!need_temp)
9188 {
9189 int_addr = addr;
9190 sse_addr = addr;
9191 }
9192 else
9193 {
9194 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9195 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9196 }
9197
9198 /* First ensure that we fit completely in registers. */
9199 if (needed_intregs)
9200 {
9201 t = build_int_cst (TREE_TYPE (gpr),
9202 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9203 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9204 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9205 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9206 gimplify_and_add (t, pre_p);
9207 }
9208 if (needed_sseregs)
9209 {
9210 t = build_int_cst (TREE_TYPE (fpr),
9211 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9212 + X86_64_REGPARM_MAX * 8);
9213 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9214 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9215 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9216 gimplify_and_add (t, pre_p);
9217 }
9218
9219 /* Compute index to start of area used for integer regs. */
9220 if (needed_intregs)
9221 {
9222 /* int_addr = gpr + sav; */
9223 t = fold_build_pointer_plus (sav, gpr);
9224 gimplify_assign (int_addr, t, pre_p);
9225 }
9226 if (needed_sseregs)
9227 {
9228 /* sse_addr = fpr + sav; */
9229 t = fold_build_pointer_plus (sav, fpr);
9230 gimplify_assign (sse_addr, t, pre_p);
9231 }
9232 if (need_temp)
9233 {
9234 int i, prev_size = 0;
9235 tree temp = create_tmp_var (type, "va_arg_tmp");
9236
9237 /* addr = &temp; */
9238 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9239 gimplify_assign (addr, t, pre_p);
9240
9241 for (i = 0; i < XVECLEN (container, 0); i++)
9242 {
9243 rtx slot = XVECEXP (container, 0, i);
9244 rtx reg = XEXP (slot, 0);
9245 machine_mode mode = GET_MODE (reg);
9246 tree piece_type;
9247 tree addr_type;
9248 tree daddr_type;
9249 tree src_addr, src;
9250 int src_offset;
9251 tree dest_addr, dest;
9252 int cur_size = GET_MODE_SIZE (mode);
9253
9254 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9255 prev_size = INTVAL (XEXP (slot, 1));
9256 if (prev_size + cur_size > size)
9257 {
9258 cur_size = size - prev_size;
9259 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9260 if (mode == BLKmode)
9261 mode = QImode;
9262 }
9263 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9264 if (mode == GET_MODE (reg))
9265 addr_type = build_pointer_type (piece_type);
9266 else
9267 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9268 true);
9269 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9270 true);
9271
9272 if (SSE_REGNO_P (REGNO (reg)))
9273 {
9274 src_addr = sse_addr;
9275 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9276 }
9277 else
9278 {
9279 src_addr = int_addr;
9280 src_offset = REGNO (reg) * 8;
9281 }
9282 src_addr = fold_convert (addr_type, src_addr);
9283 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9284
9285 dest_addr = fold_convert (daddr_type, addr);
9286 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9287 if (cur_size == GET_MODE_SIZE (mode))
9288 {
9289 src = build_va_arg_indirect_ref (src_addr);
9290 dest = build_va_arg_indirect_ref (dest_addr);
9291
9292 gimplify_assign (dest, src, pre_p);
9293 }
9294 else
9295 {
9296 tree copy
9297 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9298 3, dest_addr, src_addr,
9299 size_int (cur_size));
9300 gimplify_and_add (copy, pre_p);
9301 }
9302 prev_size += cur_size;
9303 }
9304 }
9305
9306 if (needed_intregs)
9307 {
9308 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9309 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9310 gimplify_assign (gpr, t, pre_p);
9311 }
9312
9313 if (needed_sseregs)
9314 {
9315 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9316 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9317 gimplify_assign (fpr, t, pre_p);
9318 }
9319
9320 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9321
9322 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9323 }
9324
9325 /* ... otherwise out of the overflow area. */
9326
9327 /* When we align parameter on stack for caller, if the parameter
9328 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9329 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9330 here with caller. */
9331 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9332 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9333 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9334
9335 /* Care for on-stack alignment if needed. */
9336 if (arg_boundary <= 64 || size == 0)
9337 t = ovf;
9338 else
9339 {
9340 HOST_WIDE_INT align = arg_boundary / 8;
9341 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9342 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9343 build_int_cst (TREE_TYPE (t), -align));
9344 }
9345
9346 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9347 gimplify_assign (addr, t, pre_p);
9348
9349 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9350 gimplify_assign (unshare_expr (ovf), t, pre_p);
9351
9352 if (container)
9353 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9354
9355 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9356 addr = fold_convert (ptrtype, addr);
9357
9358 if (indirect_p)
9359 addr = build_va_arg_indirect_ref (addr);
9360 return build_va_arg_indirect_ref (addr);
9361 }
9362 \f
9363 /* Return true if OPNUM's MEM should be matched
9364 in movabs* patterns. */
9365
9366 bool
9367 ix86_check_movabs (rtx insn, int opnum)
9368 {
9369 rtx set, mem;
9370
9371 set = PATTERN (insn);
9372 if (GET_CODE (set) == PARALLEL)
9373 set = XVECEXP (set, 0, 0);
9374 gcc_assert (GET_CODE (set) == SET);
9375 mem = XEXP (set, opnum);
9376 while (GET_CODE (mem) == SUBREG)
9377 mem = SUBREG_REG (mem);
9378 gcc_assert (MEM_P (mem));
9379 return volatile_ok || !MEM_VOLATILE_P (mem);
9380 }
9381 \f
9382 /* Initialize the table of extra 80387 mathematical constants. */
9383
9384 static void
9385 init_ext_80387_constants (void)
9386 {
9387 static const char * cst[5] =
9388 {
9389 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9390 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9391 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9392 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9393 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9394 };
9395 int i;
9396
9397 for (i = 0; i < 5; i++)
9398 {
9399 real_from_string (&ext_80387_constants_table[i], cst[i]);
9400 /* Ensure each constant is rounded to XFmode precision. */
9401 real_convert (&ext_80387_constants_table[i],
9402 XFmode, &ext_80387_constants_table[i]);
9403 }
9404
9405 ext_80387_constants_init = 1;
9406 }
9407
9408 /* Return non-zero if the constant is something that
9409 can be loaded with a special instruction. */
9410
9411 int
9412 standard_80387_constant_p (rtx x)
9413 {
9414 machine_mode mode = GET_MODE (x);
9415
9416 REAL_VALUE_TYPE r;
9417
9418 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9419 return -1;
9420
9421 if (x == CONST0_RTX (mode))
9422 return 1;
9423 if (x == CONST1_RTX (mode))
9424 return 2;
9425
9426 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9427
9428 /* For XFmode constants, try to find a special 80387 instruction when
9429 optimizing for size or on those CPUs that benefit from them. */
9430 if (mode == XFmode
9431 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9432 {
9433 int i;
9434
9435 if (! ext_80387_constants_init)
9436 init_ext_80387_constants ();
9437
9438 for (i = 0; i < 5; i++)
9439 if (real_identical (&r, &ext_80387_constants_table[i]))
9440 return i + 3;
9441 }
9442
9443 /* Load of the constant -0.0 or -1.0 will be split as
9444 fldz;fchs or fld1;fchs sequence. */
9445 if (real_isnegzero (&r))
9446 return 8;
9447 if (real_identical (&r, &dconstm1))
9448 return 9;
9449
9450 return 0;
9451 }
9452
9453 /* Return the opcode of the special instruction to be used to load
9454 the constant X. */
9455
9456 const char *
9457 standard_80387_constant_opcode (rtx x)
9458 {
9459 switch (standard_80387_constant_p (x))
9460 {
9461 case 1:
9462 return "fldz";
9463 case 2:
9464 return "fld1";
9465 case 3:
9466 return "fldlg2";
9467 case 4:
9468 return "fldln2";
9469 case 5:
9470 return "fldl2e";
9471 case 6:
9472 return "fldl2t";
9473 case 7:
9474 return "fldpi";
9475 case 8:
9476 case 9:
9477 return "#";
9478 default:
9479 gcc_unreachable ();
9480 }
9481 }
9482
9483 /* Return the CONST_DOUBLE representing the 80387 constant that is
9484 loaded by the specified special instruction. The argument IDX
9485 matches the return value from standard_80387_constant_p. */
9486
9487 rtx
9488 standard_80387_constant_rtx (int idx)
9489 {
9490 int i;
9491
9492 if (! ext_80387_constants_init)
9493 init_ext_80387_constants ();
9494
9495 switch (idx)
9496 {
9497 case 3:
9498 case 4:
9499 case 5:
9500 case 6:
9501 case 7:
9502 i = idx - 3;
9503 break;
9504
9505 default:
9506 gcc_unreachable ();
9507 }
9508
9509 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9510 XFmode);
9511 }
9512
9513 /* Return 1 if X is all 0s and 2 if x is all 1s
9514 in supported SSE/AVX vector mode. */
9515
9516 int
9517 standard_sse_constant_p (rtx x)
9518 {
9519 machine_mode mode;
9520
9521 if (!TARGET_SSE)
9522 return 0;
9523
9524 mode = GET_MODE (x);
9525
9526 if (x == const0_rtx || x == CONST0_RTX (mode))
9527 return 1;
9528 if (vector_all_ones_operand (x, mode))
9529 switch (mode)
9530 {
9531 case V16QImode:
9532 case V8HImode:
9533 case V4SImode:
9534 case V2DImode:
9535 if (TARGET_SSE2)
9536 return 2;
9537 case V32QImode:
9538 case V16HImode:
9539 case V8SImode:
9540 case V4DImode:
9541 if (TARGET_AVX2)
9542 return 2;
9543 case V64QImode:
9544 case V32HImode:
9545 case V16SImode:
9546 case V8DImode:
9547 if (TARGET_AVX512F)
9548 return 2;
9549 default:
9550 break;
9551 }
9552
9553 return 0;
9554 }
9555
9556 /* Return the opcode of the special instruction to be used to load
9557 the constant X. */
9558
9559 const char *
9560 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9561 {
9562 switch (standard_sse_constant_p (x))
9563 {
9564 case 1:
9565 switch (get_attr_mode (insn))
9566 {
9567 case MODE_XI:
9568 return "vpxord\t%g0, %g0, %g0";
9569 case MODE_V16SF:
9570 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9571 : "vpxord\t%g0, %g0, %g0";
9572 case MODE_V8DF:
9573 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9574 : "vpxorq\t%g0, %g0, %g0";
9575 case MODE_TI:
9576 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9577 : "%vpxor\t%0, %d0";
9578 case MODE_V2DF:
9579 return "%vxorpd\t%0, %d0";
9580 case MODE_V4SF:
9581 return "%vxorps\t%0, %d0";
9582
9583 case MODE_OI:
9584 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9585 : "vpxor\t%x0, %x0, %x0";
9586 case MODE_V4DF:
9587 return "vxorpd\t%x0, %x0, %x0";
9588 case MODE_V8SF:
9589 return "vxorps\t%x0, %x0, %x0";
9590
9591 default:
9592 break;
9593 }
9594
9595 case 2:
9596 if (TARGET_AVX512VL
9597 || get_attr_mode (insn) == MODE_XI
9598 || get_attr_mode (insn) == MODE_V8DF
9599 || get_attr_mode (insn) == MODE_V16SF)
9600 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9601 if (TARGET_AVX)
9602 return "vpcmpeqd\t%0, %0, %0";
9603 else
9604 return "pcmpeqd\t%0, %0";
9605
9606 default:
9607 break;
9608 }
9609 gcc_unreachable ();
9610 }
9611
9612 /* Returns true if OP contains a symbol reference */
9613
9614 bool
9615 symbolic_reference_mentioned_p (rtx op)
9616 {
9617 const char *fmt;
9618 int i;
9619
9620 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9621 return true;
9622
9623 fmt = GET_RTX_FORMAT (GET_CODE (op));
9624 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9625 {
9626 if (fmt[i] == 'E')
9627 {
9628 int j;
9629
9630 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9631 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9632 return true;
9633 }
9634
9635 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9636 return true;
9637 }
9638
9639 return false;
9640 }
9641
9642 /* Return true if it is appropriate to emit `ret' instructions in the
9643 body of a function. Do this only if the epilogue is simple, needing a
9644 couple of insns. Prior to reloading, we can't tell how many registers
9645 must be saved, so return false then. Return false if there is no frame
9646 marker to de-allocate. */
9647
9648 bool
9649 ix86_can_use_return_insn_p (void)
9650 {
9651 struct ix86_frame frame;
9652
9653 if (! reload_completed || frame_pointer_needed)
9654 return 0;
9655
9656 /* Don't allow more than 32k pop, since that's all we can do
9657 with one instruction. */
9658 if (crtl->args.pops_args && crtl->args.size >= 32768)
9659 return 0;
9660
9661 ix86_compute_frame_layout (&frame);
9662 return (frame.stack_pointer_offset == UNITS_PER_WORD
9663 && (frame.nregs + frame.nsseregs) == 0);
9664 }
9665 \f
9666 /* Value should be nonzero if functions must have frame pointers.
9667 Zero means the frame pointer need not be set up (and parms may
9668 be accessed via the stack pointer) in functions that seem suitable. */
9669
9670 static bool
9671 ix86_frame_pointer_required (void)
9672 {
9673 /* If we accessed previous frames, then the generated code expects
9674 to be able to access the saved ebp value in our frame. */
9675 if (cfun->machine->accesses_prev_frame)
9676 return true;
9677
9678 /* Several x86 os'es need a frame pointer for other reasons,
9679 usually pertaining to setjmp. */
9680 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9681 return true;
9682
9683 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9684 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9685 return true;
9686
9687 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9688 allocation is 4GB. */
9689 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9690 return true;
9691
9692 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9693 turns off the frame pointer by default. Turn it back on now if
9694 we've not got a leaf function. */
9695 if (TARGET_OMIT_LEAF_FRAME_POINTER
9696 && (!crtl->is_leaf
9697 || ix86_current_function_calls_tls_descriptor))
9698 return true;
9699
9700 if (crtl->profile && !flag_fentry)
9701 return true;
9702
9703 return false;
9704 }
9705
9706 /* Record that the current function accesses previous call frames. */
9707
9708 void
9709 ix86_setup_frame_addresses (void)
9710 {
9711 cfun->machine->accesses_prev_frame = 1;
9712 }
9713 \f
9714 #ifndef USE_HIDDEN_LINKONCE
9715 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9716 # define USE_HIDDEN_LINKONCE 1
9717 # else
9718 # define USE_HIDDEN_LINKONCE 0
9719 # endif
9720 #endif
9721
9722 static int pic_labels_used;
9723
9724 /* Fills in the label name that should be used for a pc thunk for
9725 the given register. */
9726
9727 static void
9728 get_pc_thunk_name (char name[32], unsigned int regno)
9729 {
9730 gcc_assert (!TARGET_64BIT);
9731
9732 if (USE_HIDDEN_LINKONCE)
9733 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9734 else
9735 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9736 }
9737
9738
9739 /* This function generates code for -fpic that loads %ebx with
9740 the return address of the caller and then returns. */
9741
9742 static void
9743 ix86_code_end (void)
9744 {
9745 rtx xops[2];
9746 int regno;
9747
9748 for (regno = AX_REG; regno <= SP_REG; regno++)
9749 {
9750 char name[32];
9751 tree decl;
9752
9753 if (!(pic_labels_used & (1 << regno)))
9754 continue;
9755
9756 get_pc_thunk_name (name, regno);
9757
9758 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9759 get_identifier (name),
9760 build_function_type_list (void_type_node, NULL_TREE));
9761 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9762 NULL_TREE, void_type_node);
9763 TREE_PUBLIC (decl) = 1;
9764 TREE_STATIC (decl) = 1;
9765 DECL_IGNORED_P (decl) = 1;
9766
9767 #if TARGET_MACHO
9768 if (TARGET_MACHO)
9769 {
9770 switch_to_section (darwin_sections[text_coal_section]);
9771 fputs ("\t.weak_definition\t", asm_out_file);
9772 assemble_name (asm_out_file, name);
9773 fputs ("\n\t.private_extern\t", asm_out_file);
9774 assemble_name (asm_out_file, name);
9775 putc ('\n', asm_out_file);
9776 ASM_OUTPUT_LABEL (asm_out_file, name);
9777 DECL_WEAK (decl) = 1;
9778 }
9779 else
9780 #endif
9781 if (USE_HIDDEN_LINKONCE)
9782 {
9783 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9784
9785 targetm.asm_out.unique_section (decl, 0);
9786 switch_to_section (get_named_section (decl, NULL, 0));
9787
9788 targetm.asm_out.globalize_label (asm_out_file, name);
9789 fputs ("\t.hidden\t", asm_out_file);
9790 assemble_name (asm_out_file, name);
9791 putc ('\n', asm_out_file);
9792 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9793 }
9794 else
9795 {
9796 switch_to_section (text_section);
9797 ASM_OUTPUT_LABEL (asm_out_file, name);
9798 }
9799
9800 DECL_INITIAL (decl) = make_node (BLOCK);
9801 current_function_decl = decl;
9802 init_function_start (decl);
9803 first_function_block_is_cold = false;
9804 /* Make sure unwind info is emitted for the thunk if needed. */
9805 final_start_function (emit_barrier (), asm_out_file, 1);
9806
9807 /* Pad stack IP move with 4 instructions (two NOPs count
9808 as one instruction). */
9809 if (TARGET_PAD_SHORT_FUNCTION)
9810 {
9811 int i = 8;
9812
9813 while (i--)
9814 fputs ("\tnop\n", asm_out_file);
9815 }
9816
9817 xops[0] = gen_rtx_REG (Pmode, regno);
9818 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9819 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9820 output_asm_insn ("%!ret", NULL);
9821 final_end_function ();
9822 init_insn_lengths ();
9823 free_after_compilation (cfun);
9824 set_cfun (NULL);
9825 current_function_decl = NULL;
9826 }
9827
9828 if (flag_split_stack)
9829 file_end_indicate_split_stack ();
9830 }
9831
9832 /* Emit code for the SET_GOT patterns. */
9833
9834 const char *
9835 output_set_got (rtx dest, rtx label)
9836 {
9837 rtx xops[3];
9838
9839 xops[0] = dest;
9840
9841 if (TARGET_VXWORKS_RTP && flag_pic)
9842 {
9843 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9844 xops[2] = gen_rtx_MEM (Pmode,
9845 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9846 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9847
9848 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9849 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9850 an unadorned address. */
9851 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9852 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9853 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9854 return "";
9855 }
9856
9857 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9858
9859 if (!flag_pic)
9860 {
9861 if (TARGET_MACHO)
9862 /* We don't need a pic base, we're not producing pic. */
9863 gcc_unreachable ();
9864
9865 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9866 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9867 targetm.asm_out.internal_label (asm_out_file, "L",
9868 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9869 }
9870 else
9871 {
9872 char name[32];
9873 get_pc_thunk_name (name, REGNO (dest));
9874 pic_labels_used |= 1 << REGNO (dest);
9875
9876 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9877 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9878 output_asm_insn ("%!call\t%X2", xops);
9879
9880 #if TARGET_MACHO
9881 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9882 This is what will be referenced by the Mach-O PIC subsystem. */
9883 if (machopic_should_output_picbase_label () || !label)
9884 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9885
9886 /* When we are restoring the pic base at the site of a nonlocal label,
9887 and we decided to emit the pic base above, we will still output a
9888 local label used for calculating the correction offset (even though
9889 the offset will be 0 in that case). */
9890 if (label)
9891 targetm.asm_out.internal_label (asm_out_file, "L",
9892 CODE_LABEL_NUMBER (label));
9893 #endif
9894 }
9895
9896 if (!TARGET_MACHO)
9897 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9898
9899 return "";
9900 }
9901
9902 /* Generate an "push" pattern for input ARG. */
9903
9904 static rtx
9905 gen_push (rtx arg)
9906 {
9907 struct machine_function *m = cfun->machine;
9908
9909 if (m->fs.cfa_reg == stack_pointer_rtx)
9910 m->fs.cfa_offset += UNITS_PER_WORD;
9911 m->fs.sp_offset += UNITS_PER_WORD;
9912
9913 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9914 arg = gen_rtx_REG (word_mode, REGNO (arg));
9915
9916 return gen_rtx_SET (gen_rtx_MEM (word_mode,
9917 gen_rtx_PRE_DEC (Pmode,
9918 stack_pointer_rtx)),
9919 arg);
9920 }
9921
9922 /* Generate an "pop" pattern for input ARG. */
9923
9924 static rtx
9925 gen_pop (rtx arg)
9926 {
9927 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9928 arg = gen_rtx_REG (word_mode, REGNO (arg));
9929
9930 return gen_rtx_SET (arg,
9931 gen_rtx_MEM (word_mode,
9932 gen_rtx_POST_INC (Pmode,
9933 stack_pointer_rtx)));
9934 }
9935
9936 /* Return >= 0 if there is an unused call-clobbered register available
9937 for the entire function. */
9938
9939 static unsigned int
9940 ix86_select_alt_pic_regnum (void)
9941 {
9942 if (ix86_use_pseudo_pic_reg ())
9943 return INVALID_REGNUM;
9944
9945 if (crtl->is_leaf
9946 && !crtl->profile
9947 && !ix86_current_function_calls_tls_descriptor)
9948 {
9949 int i, drap;
9950 /* Can't use the same register for both PIC and DRAP. */
9951 if (crtl->drap_reg)
9952 drap = REGNO (crtl->drap_reg);
9953 else
9954 drap = -1;
9955 for (i = 2; i >= 0; --i)
9956 if (i != drap && !df_regs_ever_live_p (i))
9957 return i;
9958 }
9959
9960 return INVALID_REGNUM;
9961 }
9962
9963 /* Return TRUE if we need to save REGNO. */
9964
9965 static bool
9966 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9967 {
9968 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9969 && pic_offset_table_rtx)
9970 {
9971 if (ix86_use_pseudo_pic_reg ())
9972 {
9973 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9974 _mcount in prologue. */
9975 if (!TARGET_64BIT && flag_pic && crtl->profile)
9976 return true;
9977 }
9978 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9979 || crtl->profile
9980 || crtl->calls_eh_return
9981 || crtl->uses_const_pool
9982 || cfun->has_nonlocal_label)
9983 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9984 }
9985
9986 if (crtl->calls_eh_return && maybe_eh_return)
9987 {
9988 unsigned i;
9989 for (i = 0; ; i++)
9990 {
9991 unsigned test = EH_RETURN_DATA_REGNO (i);
9992 if (test == INVALID_REGNUM)
9993 break;
9994 if (test == regno)
9995 return true;
9996 }
9997 }
9998
9999 if (crtl->drap_reg
10000 && regno == REGNO (crtl->drap_reg)
10001 && !cfun->machine->no_drap_save_restore)
10002 return true;
10003
10004 return (df_regs_ever_live_p (regno)
10005 && !call_used_regs[regno]
10006 && !fixed_regs[regno]
10007 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10008 }
10009
10010 /* Return number of saved general prupose registers. */
10011
10012 static int
10013 ix86_nsaved_regs (void)
10014 {
10015 int nregs = 0;
10016 int regno;
10017
10018 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10019 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10020 nregs ++;
10021 return nregs;
10022 }
10023
10024 /* Return number of saved SSE registrers. */
10025
10026 static int
10027 ix86_nsaved_sseregs (void)
10028 {
10029 int nregs = 0;
10030 int regno;
10031
10032 if (!TARGET_64BIT_MS_ABI)
10033 return 0;
10034 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10035 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10036 nregs ++;
10037 return nregs;
10038 }
10039
10040 /* Given FROM and TO register numbers, say whether this elimination is
10041 allowed. If stack alignment is needed, we can only replace argument
10042 pointer with hard frame pointer, or replace frame pointer with stack
10043 pointer. Otherwise, frame pointer elimination is automatically
10044 handled and all other eliminations are valid. */
10045
10046 static bool
10047 ix86_can_eliminate (const int from, const int to)
10048 {
10049 if (stack_realign_fp)
10050 return ((from == ARG_POINTER_REGNUM
10051 && to == HARD_FRAME_POINTER_REGNUM)
10052 || (from == FRAME_POINTER_REGNUM
10053 && to == STACK_POINTER_REGNUM));
10054 else
10055 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10056 }
10057
10058 /* Return the offset between two registers, one to be eliminated, and the other
10059 its replacement, at the start of a routine. */
10060
10061 HOST_WIDE_INT
10062 ix86_initial_elimination_offset (int from, int to)
10063 {
10064 struct ix86_frame frame;
10065 ix86_compute_frame_layout (&frame);
10066
10067 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10068 return frame.hard_frame_pointer_offset;
10069 else if (from == FRAME_POINTER_REGNUM
10070 && to == HARD_FRAME_POINTER_REGNUM)
10071 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10072 else
10073 {
10074 gcc_assert (to == STACK_POINTER_REGNUM);
10075
10076 if (from == ARG_POINTER_REGNUM)
10077 return frame.stack_pointer_offset;
10078
10079 gcc_assert (from == FRAME_POINTER_REGNUM);
10080 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10081 }
10082 }
10083
10084 /* In a dynamically-aligned function, we can't know the offset from
10085 stack pointer to frame pointer, so we must ensure that setjmp
10086 eliminates fp against the hard fp (%ebp) rather than trying to
10087 index from %esp up to the top of the frame across a gap that is
10088 of unknown (at compile-time) size. */
10089 static rtx
10090 ix86_builtin_setjmp_frame_value (void)
10091 {
10092 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10093 }
10094
10095 /* When using -fsplit-stack, the allocation routines set a field in
10096 the TCB to the bottom of the stack plus this much space, measured
10097 in bytes. */
10098
10099 #define SPLIT_STACK_AVAILABLE 256
10100
10101 /* Fill structure ix86_frame about frame of currently computed function. */
10102
10103 static void
10104 ix86_compute_frame_layout (struct ix86_frame *frame)
10105 {
10106 unsigned HOST_WIDE_INT stack_alignment_needed;
10107 HOST_WIDE_INT offset;
10108 unsigned HOST_WIDE_INT preferred_alignment;
10109 HOST_WIDE_INT size = get_frame_size ();
10110 HOST_WIDE_INT to_allocate;
10111
10112 frame->nregs = ix86_nsaved_regs ();
10113 frame->nsseregs = ix86_nsaved_sseregs ();
10114
10115 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10116 function prologues and leaf. */
10117 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10118 && (!crtl->is_leaf || cfun->calls_alloca != 0
10119 || ix86_current_function_calls_tls_descriptor))
10120 {
10121 crtl->preferred_stack_boundary = 128;
10122 crtl->stack_alignment_needed = 128;
10123 }
10124 /* preferred_stack_boundary is never updated for call
10125 expanded from tls descriptor. Update it here. We don't update it in
10126 expand stage because according to the comments before
10127 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10128 away. */
10129 else if (ix86_current_function_calls_tls_descriptor
10130 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10131 {
10132 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10133 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10134 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10135 }
10136
10137 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10138 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10139
10140 gcc_assert (!size || stack_alignment_needed);
10141 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10142 gcc_assert (preferred_alignment <= stack_alignment_needed);
10143
10144 /* For SEH we have to limit the amount of code movement into the prologue.
10145 At present we do this via a BLOCKAGE, at which point there's very little
10146 scheduling that can be done, which means that there's very little point
10147 in doing anything except PUSHs. */
10148 if (TARGET_SEH)
10149 cfun->machine->use_fast_prologue_epilogue = false;
10150
10151 /* During reload iteration the amount of registers saved can change.
10152 Recompute the value as needed. Do not recompute when amount of registers
10153 didn't change as reload does multiple calls to the function and does not
10154 expect the decision to change within single iteration. */
10155 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10156 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10157 {
10158 int count = frame->nregs;
10159 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10160
10161 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10162
10163 /* The fast prologue uses move instead of push to save registers. This
10164 is significantly longer, but also executes faster as modern hardware
10165 can execute the moves in parallel, but can't do that for push/pop.
10166
10167 Be careful about choosing what prologue to emit: When function takes
10168 many instructions to execute we may use slow version as well as in
10169 case function is known to be outside hot spot (this is known with
10170 feedback only). Weight the size of function by number of registers
10171 to save as it is cheap to use one or two push instructions but very
10172 slow to use many of them. */
10173 if (count)
10174 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10175 if (node->frequency < NODE_FREQUENCY_NORMAL
10176 || (flag_branch_probabilities
10177 && node->frequency < NODE_FREQUENCY_HOT))
10178 cfun->machine->use_fast_prologue_epilogue = false;
10179 else
10180 cfun->machine->use_fast_prologue_epilogue
10181 = !expensive_function_p (count);
10182 }
10183
10184 frame->save_regs_using_mov
10185 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10186 /* If static stack checking is enabled and done with probes,
10187 the registers need to be saved before allocating the frame. */
10188 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10189
10190 /* Skip return address. */
10191 offset = UNITS_PER_WORD;
10192
10193 /* Skip pushed static chain. */
10194 if (ix86_static_chain_on_stack)
10195 offset += UNITS_PER_WORD;
10196
10197 /* Skip saved base pointer. */
10198 if (frame_pointer_needed)
10199 offset += UNITS_PER_WORD;
10200 frame->hfp_save_offset = offset;
10201
10202 /* The traditional frame pointer location is at the top of the frame. */
10203 frame->hard_frame_pointer_offset = offset;
10204
10205 /* Register save area */
10206 offset += frame->nregs * UNITS_PER_WORD;
10207 frame->reg_save_offset = offset;
10208
10209 /* On SEH target, registers are pushed just before the frame pointer
10210 location. */
10211 if (TARGET_SEH)
10212 frame->hard_frame_pointer_offset = offset;
10213
10214 /* Align and set SSE register save area. */
10215 if (frame->nsseregs)
10216 {
10217 /* The only ABI that has saved SSE registers (Win64) also has a
10218 16-byte aligned default stack, and thus we don't need to be
10219 within the re-aligned local stack frame to save them. */
10220 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10221 offset = (offset + 16 - 1) & -16;
10222 offset += frame->nsseregs * 16;
10223 }
10224 frame->sse_reg_save_offset = offset;
10225
10226 /* The re-aligned stack starts here. Values before this point are not
10227 directly comparable with values below this point. In order to make
10228 sure that no value happens to be the same before and after, force
10229 the alignment computation below to add a non-zero value. */
10230 if (stack_realign_fp)
10231 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10232
10233 /* Va-arg area */
10234 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10235 offset += frame->va_arg_size;
10236
10237 /* Align start of frame for local function. */
10238 if (stack_realign_fp
10239 || offset != frame->sse_reg_save_offset
10240 || size != 0
10241 || !crtl->is_leaf
10242 || cfun->calls_alloca
10243 || ix86_current_function_calls_tls_descriptor)
10244 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10245
10246 /* Frame pointer points here. */
10247 frame->frame_pointer_offset = offset;
10248
10249 offset += size;
10250
10251 /* Add outgoing arguments area. Can be skipped if we eliminated
10252 all the function calls as dead code.
10253 Skipping is however impossible when function calls alloca. Alloca
10254 expander assumes that last crtl->outgoing_args_size
10255 of stack frame are unused. */
10256 if (ACCUMULATE_OUTGOING_ARGS
10257 && (!crtl->is_leaf || cfun->calls_alloca
10258 || ix86_current_function_calls_tls_descriptor))
10259 {
10260 offset += crtl->outgoing_args_size;
10261 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10262 }
10263 else
10264 frame->outgoing_arguments_size = 0;
10265
10266 /* Align stack boundary. Only needed if we're calling another function
10267 or using alloca. */
10268 if (!crtl->is_leaf || cfun->calls_alloca
10269 || ix86_current_function_calls_tls_descriptor)
10270 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10271
10272 /* We've reached end of stack frame. */
10273 frame->stack_pointer_offset = offset;
10274
10275 /* Size prologue needs to allocate. */
10276 to_allocate = offset - frame->sse_reg_save_offset;
10277
10278 if ((!to_allocate && frame->nregs <= 1)
10279 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10280 frame->save_regs_using_mov = false;
10281
10282 if (ix86_using_red_zone ()
10283 && crtl->sp_is_unchanging
10284 && crtl->is_leaf
10285 && !ix86_current_function_calls_tls_descriptor)
10286 {
10287 frame->red_zone_size = to_allocate;
10288 if (frame->save_regs_using_mov)
10289 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10290 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10291 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10292 }
10293 else
10294 frame->red_zone_size = 0;
10295 frame->stack_pointer_offset -= frame->red_zone_size;
10296
10297 /* The SEH frame pointer location is near the bottom of the frame.
10298 This is enforced by the fact that the difference between the
10299 stack pointer and the frame pointer is limited to 240 bytes in
10300 the unwind data structure. */
10301 if (TARGET_SEH)
10302 {
10303 HOST_WIDE_INT diff;
10304
10305 /* If we can leave the frame pointer where it is, do so. Also, returns
10306 the establisher frame for __builtin_frame_address (0). */
10307 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10308 if (diff <= SEH_MAX_FRAME_SIZE
10309 && (diff > 240 || (diff & 15) != 0)
10310 && !crtl->accesses_prior_frames)
10311 {
10312 /* Ideally we'd determine what portion of the local stack frame
10313 (within the constraint of the lowest 240) is most heavily used.
10314 But without that complication, simply bias the frame pointer
10315 by 128 bytes so as to maximize the amount of the local stack
10316 frame that is addressable with 8-bit offsets. */
10317 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10318 }
10319 }
10320 }
10321
10322 /* This is semi-inlined memory_address_length, but simplified
10323 since we know that we're always dealing with reg+offset, and
10324 to avoid having to create and discard all that rtl. */
10325
10326 static inline int
10327 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10328 {
10329 int len = 4;
10330
10331 if (offset == 0)
10332 {
10333 /* EBP and R13 cannot be encoded without an offset. */
10334 len = (regno == BP_REG || regno == R13_REG);
10335 }
10336 else if (IN_RANGE (offset, -128, 127))
10337 len = 1;
10338
10339 /* ESP and R12 must be encoded with a SIB byte. */
10340 if (regno == SP_REG || regno == R12_REG)
10341 len++;
10342
10343 return len;
10344 }
10345
10346 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10347 The valid base registers are taken from CFUN->MACHINE->FS. */
10348
10349 static rtx
10350 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10351 {
10352 const struct machine_function *m = cfun->machine;
10353 rtx base_reg = NULL;
10354 HOST_WIDE_INT base_offset = 0;
10355
10356 if (m->use_fast_prologue_epilogue)
10357 {
10358 /* Choose the base register most likely to allow the most scheduling
10359 opportunities. Generally FP is valid throughout the function,
10360 while DRAP must be reloaded within the epilogue. But choose either
10361 over the SP due to increased encoding size. */
10362
10363 if (m->fs.fp_valid)
10364 {
10365 base_reg = hard_frame_pointer_rtx;
10366 base_offset = m->fs.fp_offset - cfa_offset;
10367 }
10368 else if (m->fs.drap_valid)
10369 {
10370 base_reg = crtl->drap_reg;
10371 base_offset = 0 - cfa_offset;
10372 }
10373 else if (m->fs.sp_valid)
10374 {
10375 base_reg = stack_pointer_rtx;
10376 base_offset = m->fs.sp_offset - cfa_offset;
10377 }
10378 }
10379 else
10380 {
10381 HOST_WIDE_INT toffset;
10382 int len = 16, tlen;
10383
10384 /* Choose the base register with the smallest address encoding.
10385 With a tie, choose FP > DRAP > SP. */
10386 if (m->fs.sp_valid)
10387 {
10388 base_reg = stack_pointer_rtx;
10389 base_offset = m->fs.sp_offset - cfa_offset;
10390 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10391 }
10392 if (m->fs.drap_valid)
10393 {
10394 toffset = 0 - cfa_offset;
10395 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10396 if (tlen <= len)
10397 {
10398 base_reg = crtl->drap_reg;
10399 base_offset = toffset;
10400 len = tlen;
10401 }
10402 }
10403 if (m->fs.fp_valid)
10404 {
10405 toffset = m->fs.fp_offset - cfa_offset;
10406 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10407 if (tlen <= len)
10408 {
10409 base_reg = hard_frame_pointer_rtx;
10410 base_offset = toffset;
10411 len = tlen;
10412 }
10413 }
10414 }
10415 gcc_assert (base_reg != NULL);
10416
10417 return plus_constant (Pmode, base_reg, base_offset);
10418 }
10419
10420 /* Emit code to save registers in the prologue. */
10421
10422 static void
10423 ix86_emit_save_regs (void)
10424 {
10425 unsigned int regno;
10426 rtx_insn *insn;
10427
10428 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10429 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10430 {
10431 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10432 RTX_FRAME_RELATED_P (insn) = 1;
10433 }
10434 }
10435
10436 /* Emit a single register save at CFA - CFA_OFFSET. */
10437
10438 static void
10439 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10440 HOST_WIDE_INT cfa_offset)
10441 {
10442 struct machine_function *m = cfun->machine;
10443 rtx reg = gen_rtx_REG (mode, regno);
10444 rtx mem, addr, base, insn;
10445
10446 addr = choose_baseaddr (cfa_offset);
10447 mem = gen_frame_mem (mode, addr);
10448
10449 /* For SSE saves, we need to indicate the 128-bit alignment. */
10450 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10451
10452 insn = emit_move_insn (mem, reg);
10453 RTX_FRAME_RELATED_P (insn) = 1;
10454
10455 base = addr;
10456 if (GET_CODE (base) == PLUS)
10457 base = XEXP (base, 0);
10458 gcc_checking_assert (REG_P (base));
10459
10460 /* When saving registers into a re-aligned local stack frame, avoid
10461 any tricky guessing by dwarf2out. */
10462 if (m->fs.realigned)
10463 {
10464 gcc_checking_assert (stack_realign_drap);
10465
10466 if (regno == REGNO (crtl->drap_reg))
10467 {
10468 /* A bit of a hack. We force the DRAP register to be saved in
10469 the re-aligned stack frame, which provides us with a copy
10470 of the CFA that will last past the prologue. Install it. */
10471 gcc_checking_assert (cfun->machine->fs.fp_valid);
10472 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10473 cfun->machine->fs.fp_offset - cfa_offset);
10474 mem = gen_rtx_MEM (mode, addr);
10475 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10476 }
10477 else
10478 {
10479 /* The frame pointer is a stable reference within the
10480 aligned frame. Use it. */
10481 gcc_checking_assert (cfun->machine->fs.fp_valid);
10482 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10483 cfun->machine->fs.fp_offset - cfa_offset);
10484 mem = gen_rtx_MEM (mode, addr);
10485 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10486 }
10487 }
10488
10489 /* The memory may not be relative to the current CFA register,
10490 which means that we may need to generate a new pattern for
10491 use by the unwind info. */
10492 else if (base != m->fs.cfa_reg)
10493 {
10494 addr = plus_constant (Pmode, m->fs.cfa_reg,
10495 m->fs.cfa_offset - cfa_offset);
10496 mem = gen_rtx_MEM (mode, addr);
10497 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10498 }
10499 }
10500
10501 /* Emit code to save registers using MOV insns.
10502 First register is stored at CFA - CFA_OFFSET. */
10503 static void
10504 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10505 {
10506 unsigned int regno;
10507
10508 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10509 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10510 {
10511 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10512 cfa_offset -= UNITS_PER_WORD;
10513 }
10514 }
10515
10516 /* Emit code to save SSE registers using MOV insns.
10517 First register is stored at CFA - CFA_OFFSET. */
10518 static void
10519 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10520 {
10521 unsigned int regno;
10522
10523 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10524 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10525 {
10526 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10527 cfa_offset -= 16;
10528 }
10529 }
10530
10531 static GTY(()) rtx queued_cfa_restores;
10532
10533 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10534 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10535 Don't add the note if the previously saved value will be left untouched
10536 within stack red-zone till return, as unwinders can find the same value
10537 in the register and on the stack. */
10538
10539 static void
10540 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10541 {
10542 if (!crtl->shrink_wrapped
10543 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10544 return;
10545
10546 if (insn)
10547 {
10548 add_reg_note (insn, REG_CFA_RESTORE, reg);
10549 RTX_FRAME_RELATED_P (insn) = 1;
10550 }
10551 else
10552 queued_cfa_restores
10553 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10554 }
10555
10556 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10557
10558 static void
10559 ix86_add_queued_cfa_restore_notes (rtx insn)
10560 {
10561 rtx last;
10562 if (!queued_cfa_restores)
10563 return;
10564 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10565 ;
10566 XEXP (last, 1) = REG_NOTES (insn);
10567 REG_NOTES (insn) = queued_cfa_restores;
10568 queued_cfa_restores = NULL_RTX;
10569 RTX_FRAME_RELATED_P (insn) = 1;
10570 }
10571
10572 /* Expand prologue or epilogue stack adjustment.
10573 The pattern exist to put a dependency on all ebp-based memory accesses.
10574 STYLE should be negative if instructions should be marked as frame related,
10575 zero if %r11 register is live and cannot be freely used and positive
10576 otherwise. */
10577
10578 static void
10579 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10580 int style, bool set_cfa)
10581 {
10582 struct machine_function *m = cfun->machine;
10583 rtx insn;
10584 bool add_frame_related_expr = false;
10585
10586 if (Pmode == SImode)
10587 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10588 else if (x86_64_immediate_operand (offset, DImode))
10589 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10590 else
10591 {
10592 rtx tmp;
10593 /* r11 is used by indirect sibcall return as well, set before the
10594 epilogue and used after the epilogue. */
10595 if (style)
10596 tmp = gen_rtx_REG (DImode, R11_REG);
10597 else
10598 {
10599 gcc_assert (src != hard_frame_pointer_rtx
10600 && dest != hard_frame_pointer_rtx);
10601 tmp = hard_frame_pointer_rtx;
10602 }
10603 insn = emit_insn (gen_rtx_SET (tmp, offset));
10604 if (style < 0)
10605 add_frame_related_expr = true;
10606
10607 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10608 }
10609
10610 insn = emit_insn (insn);
10611 if (style >= 0)
10612 ix86_add_queued_cfa_restore_notes (insn);
10613
10614 if (set_cfa)
10615 {
10616 rtx r;
10617
10618 gcc_assert (m->fs.cfa_reg == src);
10619 m->fs.cfa_offset += INTVAL (offset);
10620 m->fs.cfa_reg = dest;
10621
10622 r = gen_rtx_PLUS (Pmode, src, offset);
10623 r = gen_rtx_SET (dest, r);
10624 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10625 RTX_FRAME_RELATED_P (insn) = 1;
10626 }
10627 else if (style < 0)
10628 {
10629 RTX_FRAME_RELATED_P (insn) = 1;
10630 if (add_frame_related_expr)
10631 {
10632 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10633 r = gen_rtx_SET (dest, r);
10634 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10635 }
10636 }
10637
10638 if (dest == stack_pointer_rtx)
10639 {
10640 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10641 bool valid = m->fs.sp_valid;
10642
10643 if (src == hard_frame_pointer_rtx)
10644 {
10645 valid = m->fs.fp_valid;
10646 ooffset = m->fs.fp_offset;
10647 }
10648 else if (src == crtl->drap_reg)
10649 {
10650 valid = m->fs.drap_valid;
10651 ooffset = 0;
10652 }
10653 else
10654 {
10655 /* Else there are two possibilities: SP itself, which we set
10656 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10657 taken care of this by hand along the eh_return path. */
10658 gcc_checking_assert (src == stack_pointer_rtx
10659 || offset == const0_rtx);
10660 }
10661
10662 m->fs.sp_offset = ooffset - INTVAL (offset);
10663 m->fs.sp_valid = valid;
10664 }
10665 }
10666
10667 /* Find an available register to be used as dynamic realign argument
10668 pointer regsiter. Such a register will be written in prologue and
10669 used in begin of body, so it must not be
10670 1. parameter passing register.
10671 2. GOT pointer.
10672 We reuse static-chain register if it is available. Otherwise, we
10673 use DI for i386 and R13 for x86-64. We chose R13 since it has
10674 shorter encoding.
10675
10676 Return: the regno of chosen register. */
10677
10678 static unsigned int
10679 find_drap_reg (void)
10680 {
10681 tree decl = cfun->decl;
10682
10683 if (TARGET_64BIT)
10684 {
10685 /* Use R13 for nested function or function need static chain.
10686 Since function with tail call may use any caller-saved
10687 registers in epilogue, DRAP must not use caller-saved
10688 register in such case. */
10689 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10690 return R13_REG;
10691
10692 return R10_REG;
10693 }
10694 else
10695 {
10696 /* Use DI for nested function or function need static chain.
10697 Since function with tail call may use any caller-saved
10698 registers in epilogue, DRAP must not use caller-saved
10699 register in such case. */
10700 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10701 return DI_REG;
10702
10703 /* Reuse static chain register if it isn't used for parameter
10704 passing. */
10705 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10706 {
10707 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10708 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10709 return CX_REG;
10710 }
10711 return DI_REG;
10712 }
10713 }
10714
10715 /* Return minimum incoming stack alignment. */
10716
10717 static unsigned int
10718 ix86_minimum_incoming_stack_boundary (bool sibcall)
10719 {
10720 unsigned int incoming_stack_boundary;
10721
10722 /* Prefer the one specified at command line. */
10723 if (ix86_user_incoming_stack_boundary)
10724 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10725 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10726 if -mstackrealign is used, it isn't used for sibcall check and
10727 estimated stack alignment is 128bit. */
10728 else if (!sibcall
10729 && !TARGET_64BIT
10730 && ix86_force_align_arg_pointer
10731 && crtl->stack_alignment_estimated == 128)
10732 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10733 else
10734 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10735
10736 /* Incoming stack alignment can be changed on individual functions
10737 via force_align_arg_pointer attribute. We use the smallest
10738 incoming stack boundary. */
10739 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10740 && lookup_attribute (ix86_force_align_arg_pointer_string,
10741 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10742 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10743
10744 /* The incoming stack frame has to be aligned at least at
10745 parm_stack_boundary. */
10746 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10747 incoming_stack_boundary = crtl->parm_stack_boundary;
10748
10749 /* Stack at entrance of main is aligned by runtime. We use the
10750 smallest incoming stack boundary. */
10751 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10752 && DECL_NAME (current_function_decl)
10753 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10754 && DECL_FILE_SCOPE_P (current_function_decl))
10755 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10756
10757 return incoming_stack_boundary;
10758 }
10759
10760 /* Update incoming stack boundary and estimated stack alignment. */
10761
10762 static void
10763 ix86_update_stack_boundary (void)
10764 {
10765 ix86_incoming_stack_boundary
10766 = ix86_minimum_incoming_stack_boundary (false);
10767
10768 /* x86_64 vararg needs 16byte stack alignment for register save
10769 area. */
10770 if (TARGET_64BIT
10771 && cfun->stdarg
10772 && crtl->stack_alignment_estimated < 128)
10773 crtl->stack_alignment_estimated = 128;
10774 }
10775
10776 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10777 needed or an rtx for DRAP otherwise. */
10778
10779 static rtx
10780 ix86_get_drap_rtx (void)
10781 {
10782 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10783 crtl->need_drap = true;
10784
10785 if (stack_realign_drap)
10786 {
10787 /* Assign DRAP to vDRAP and returns vDRAP */
10788 unsigned int regno = find_drap_reg ();
10789 rtx drap_vreg;
10790 rtx arg_ptr;
10791 rtx_insn *seq, *insn;
10792
10793 arg_ptr = gen_rtx_REG (Pmode, regno);
10794 crtl->drap_reg = arg_ptr;
10795
10796 start_sequence ();
10797 drap_vreg = copy_to_reg (arg_ptr);
10798 seq = get_insns ();
10799 end_sequence ();
10800
10801 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10802 if (!optimize)
10803 {
10804 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10805 RTX_FRAME_RELATED_P (insn) = 1;
10806 }
10807 return drap_vreg;
10808 }
10809 else
10810 return NULL;
10811 }
10812
10813 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10814
10815 static rtx
10816 ix86_internal_arg_pointer (void)
10817 {
10818 return virtual_incoming_args_rtx;
10819 }
10820
10821 struct scratch_reg {
10822 rtx reg;
10823 bool saved;
10824 };
10825
10826 /* Return a short-lived scratch register for use on function entry.
10827 In 32-bit mode, it is valid only after the registers are saved
10828 in the prologue. This register must be released by means of
10829 release_scratch_register_on_entry once it is dead. */
10830
10831 static void
10832 get_scratch_register_on_entry (struct scratch_reg *sr)
10833 {
10834 int regno;
10835
10836 sr->saved = false;
10837
10838 if (TARGET_64BIT)
10839 {
10840 /* We always use R11 in 64-bit mode. */
10841 regno = R11_REG;
10842 }
10843 else
10844 {
10845 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10846 bool fastcall_p
10847 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10848 bool thiscall_p
10849 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10850 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10851 int regparm = ix86_function_regparm (fntype, decl);
10852 int drap_regno
10853 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10854
10855 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10856 for the static chain register. */
10857 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10858 && drap_regno != AX_REG)
10859 regno = AX_REG;
10860 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10861 for the static chain register. */
10862 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10863 regno = AX_REG;
10864 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10865 regno = DX_REG;
10866 /* ecx is the static chain register. */
10867 else if (regparm < 3 && !fastcall_p && !thiscall_p
10868 && !static_chain_p
10869 && drap_regno != CX_REG)
10870 regno = CX_REG;
10871 else if (ix86_save_reg (BX_REG, true))
10872 regno = BX_REG;
10873 /* esi is the static chain register. */
10874 else if (!(regparm == 3 && static_chain_p)
10875 && ix86_save_reg (SI_REG, true))
10876 regno = SI_REG;
10877 else if (ix86_save_reg (DI_REG, true))
10878 regno = DI_REG;
10879 else
10880 {
10881 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10882 sr->saved = true;
10883 }
10884 }
10885
10886 sr->reg = gen_rtx_REG (Pmode, regno);
10887 if (sr->saved)
10888 {
10889 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10890 RTX_FRAME_RELATED_P (insn) = 1;
10891 }
10892 }
10893
10894 /* Release a scratch register obtained from the preceding function. */
10895
10896 static void
10897 release_scratch_register_on_entry (struct scratch_reg *sr)
10898 {
10899 if (sr->saved)
10900 {
10901 struct machine_function *m = cfun->machine;
10902 rtx x, insn = emit_insn (gen_pop (sr->reg));
10903
10904 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10905 RTX_FRAME_RELATED_P (insn) = 1;
10906 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10907 x = gen_rtx_SET (stack_pointer_rtx, x);
10908 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10909 m->fs.sp_offset -= UNITS_PER_WORD;
10910 }
10911 }
10912
10913 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10914
10915 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10916
10917 static void
10918 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10919 {
10920 /* We skip the probe for the first interval + a small dope of 4 words and
10921 probe that many bytes past the specified size to maintain a protection
10922 area at the botton of the stack. */
10923 const int dope = 4 * UNITS_PER_WORD;
10924 rtx size_rtx = GEN_INT (size), last;
10925
10926 /* See if we have a constant small number of probes to generate. If so,
10927 that's the easy case. The run-time loop is made up of 11 insns in the
10928 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10929 for n # of intervals. */
10930 if (size <= 5 * PROBE_INTERVAL)
10931 {
10932 HOST_WIDE_INT i, adjust;
10933 bool first_probe = true;
10934
10935 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10936 values of N from 1 until it exceeds SIZE. If only one probe is
10937 needed, this will not generate any code. Then adjust and probe
10938 to PROBE_INTERVAL + SIZE. */
10939 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10940 {
10941 if (first_probe)
10942 {
10943 adjust = 2 * PROBE_INTERVAL + dope;
10944 first_probe = false;
10945 }
10946 else
10947 adjust = PROBE_INTERVAL;
10948
10949 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10950 plus_constant (Pmode, stack_pointer_rtx,
10951 -adjust)));
10952 emit_stack_probe (stack_pointer_rtx);
10953 }
10954
10955 if (first_probe)
10956 adjust = size + PROBE_INTERVAL + dope;
10957 else
10958 adjust = size + PROBE_INTERVAL - i;
10959
10960 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10961 plus_constant (Pmode, stack_pointer_rtx,
10962 -adjust)));
10963 emit_stack_probe (stack_pointer_rtx);
10964
10965 /* Adjust back to account for the additional first interval. */
10966 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
10967 plus_constant (Pmode, stack_pointer_rtx,
10968 PROBE_INTERVAL + dope)));
10969 }
10970
10971 /* Otherwise, do the same as above, but in a loop. Note that we must be
10972 extra careful with variables wrapping around because we might be at
10973 the very top (or the very bottom) of the address space and we have
10974 to be able to handle this case properly; in particular, we use an
10975 equality test for the loop condition. */
10976 else
10977 {
10978 HOST_WIDE_INT rounded_size;
10979 struct scratch_reg sr;
10980
10981 get_scratch_register_on_entry (&sr);
10982
10983
10984 /* Step 1: round SIZE to the previous multiple of the interval. */
10985
10986 rounded_size = size & -PROBE_INTERVAL;
10987
10988
10989 /* Step 2: compute initial and final value of the loop counter. */
10990
10991 /* SP = SP_0 + PROBE_INTERVAL. */
10992 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10993 plus_constant (Pmode, stack_pointer_rtx,
10994 - (PROBE_INTERVAL + dope))));
10995
10996 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10997 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10998 emit_insn (gen_rtx_SET (sr.reg,
10999 gen_rtx_PLUS (Pmode, sr.reg,
11000 stack_pointer_rtx)));
11001
11002
11003 /* Step 3: the loop
11004
11005 while (SP != LAST_ADDR)
11006 {
11007 SP = SP + PROBE_INTERVAL
11008 probe at SP
11009 }
11010
11011 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11012 values of N from 1 until it is equal to ROUNDED_SIZE. */
11013
11014 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11015
11016
11017 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11018 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11019
11020 if (size != rounded_size)
11021 {
11022 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11023 plus_constant (Pmode, stack_pointer_rtx,
11024 rounded_size - size)));
11025 emit_stack_probe (stack_pointer_rtx);
11026 }
11027
11028 /* Adjust back to account for the additional first interval. */
11029 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11030 plus_constant (Pmode, stack_pointer_rtx,
11031 PROBE_INTERVAL + dope)));
11032
11033 release_scratch_register_on_entry (&sr);
11034 }
11035
11036 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11037
11038 /* Even if the stack pointer isn't the CFA register, we need to correctly
11039 describe the adjustments made to it, in particular differentiate the
11040 frame-related ones from the frame-unrelated ones. */
11041 if (size > 0)
11042 {
11043 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11044 XVECEXP (expr, 0, 0)
11045 = gen_rtx_SET (stack_pointer_rtx,
11046 plus_constant (Pmode, stack_pointer_rtx, -size));
11047 XVECEXP (expr, 0, 1)
11048 = gen_rtx_SET (stack_pointer_rtx,
11049 plus_constant (Pmode, stack_pointer_rtx,
11050 PROBE_INTERVAL + dope + size));
11051 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11052 RTX_FRAME_RELATED_P (last) = 1;
11053
11054 cfun->machine->fs.sp_offset += size;
11055 }
11056
11057 /* Make sure nothing is scheduled before we are done. */
11058 emit_insn (gen_blockage ());
11059 }
11060
11061 /* Adjust the stack pointer up to REG while probing it. */
11062
11063 const char *
11064 output_adjust_stack_and_probe (rtx reg)
11065 {
11066 static int labelno = 0;
11067 char loop_lab[32], end_lab[32];
11068 rtx xops[2];
11069
11070 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11071 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11072
11073 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11074
11075 /* Jump to END_LAB if SP == LAST_ADDR. */
11076 xops[0] = stack_pointer_rtx;
11077 xops[1] = reg;
11078 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11079 fputs ("\tje\t", asm_out_file);
11080 assemble_name_raw (asm_out_file, end_lab);
11081 fputc ('\n', asm_out_file);
11082
11083 /* SP = SP + PROBE_INTERVAL. */
11084 xops[1] = GEN_INT (PROBE_INTERVAL);
11085 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11086
11087 /* Probe at SP. */
11088 xops[1] = const0_rtx;
11089 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11090
11091 fprintf (asm_out_file, "\tjmp\t");
11092 assemble_name_raw (asm_out_file, loop_lab);
11093 fputc ('\n', asm_out_file);
11094
11095 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11096
11097 return "";
11098 }
11099
11100 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11101 inclusive. These are offsets from the current stack pointer. */
11102
11103 static void
11104 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11105 {
11106 /* See if we have a constant small number of probes to generate. If so,
11107 that's the easy case. The run-time loop is made up of 7 insns in the
11108 generic case while the compile-time loop is made up of n insns for n #
11109 of intervals. */
11110 if (size <= 7 * PROBE_INTERVAL)
11111 {
11112 HOST_WIDE_INT i;
11113
11114 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11115 it exceeds SIZE. If only one probe is needed, this will not
11116 generate any code. Then probe at FIRST + SIZE. */
11117 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11118 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11119 -(first + i)));
11120
11121 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11122 -(first + size)));
11123 }
11124
11125 /* Otherwise, do the same as above, but in a loop. Note that we must be
11126 extra careful with variables wrapping around because we might be at
11127 the very top (or the very bottom) of the address space and we have
11128 to be able to handle this case properly; in particular, we use an
11129 equality test for the loop condition. */
11130 else
11131 {
11132 HOST_WIDE_INT rounded_size, last;
11133 struct scratch_reg sr;
11134
11135 get_scratch_register_on_entry (&sr);
11136
11137
11138 /* Step 1: round SIZE to the previous multiple of the interval. */
11139
11140 rounded_size = size & -PROBE_INTERVAL;
11141
11142
11143 /* Step 2: compute initial and final value of the loop counter. */
11144
11145 /* TEST_OFFSET = FIRST. */
11146 emit_move_insn (sr.reg, GEN_INT (-first));
11147
11148 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11149 last = first + rounded_size;
11150
11151
11152 /* Step 3: the loop
11153
11154 while (TEST_ADDR != LAST_ADDR)
11155 {
11156 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11157 probe at TEST_ADDR
11158 }
11159
11160 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11161 until it is equal to ROUNDED_SIZE. */
11162
11163 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11164
11165
11166 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11167 that SIZE is equal to ROUNDED_SIZE. */
11168
11169 if (size != rounded_size)
11170 emit_stack_probe (plus_constant (Pmode,
11171 gen_rtx_PLUS (Pmode,
11172 stack_pointer_rtx,
11173 sr.reg),
11174 rounded_size - size));
11175
11176 release_scratch_register_on_entry (&sr);
11177 }
11178
11179 /* Make sure nothing is scheduled before we are done. */
11180 emit_insn (gen_blockage ());
11181 }
11182
11183 /* Probe a range of stack addresses from REG to END, inclusive. These are
11184 offsets from the current stack pointer. */
11185
11186 const char *
11187 output_probe_stack_range (rtx reg, rtx end)
11188 {
11189 static int labelno = 0;
11190 char loop_lab[32], end_lab[32];
11191 rtx xops[3];
11192
11193 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11194 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11195
11196 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11197
11198 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11199 xops[0] = reg;
11200 xops[1] = end;
11201 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11202 fputs ("\tje\t", asm_out_file);
11203 assemble_name_raw (asm_out_file, end_lab);
11204 fputc ('\n', asm_out_file);
11205
11206 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11207 xops[1] = GEN_INT (PROBE_INTERVAL);
11208 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11209
11210 /* Probe at TEST_ADDR. */
11211 xops[0] = stack_pointer_rtx;
11212 xops[1] = reg;
11213 xops[2] = const0_rtx;
11214 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11215
11216 fprintf (asm_out_file, "\tjmp\t");
11217 assemble_name_raw (asm_out_file, loop_lab);
11218 fputc ('\n', asm_out_file);
11219
11220 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11221
11222 return "";
11223 }
11224
11225 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11226 to be generated in correct form. */
11227 static void
11228 ix86_finalize_stack_realign_flags (void)
11229 {
11230 /* Check if stack realign is really needed after reload, and
11231 stores result in cfun */
11232 unsigned int incoming_stack_boundary
11233 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11234 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11235 unsigned int stack_realign = (incoming_stack_boundary
11236 < (crtl->is_leaf
11237 ? crtl->max_used_stack_slot_alignment
11238 : crtl->stack_alignment_needed));
11239
11240 if (crtl->stack_realign_finalized)
11241 {
11242 /* After stack_realign_needed is finalized, we can't no longer
11243 change it. */
11244 gcc_assert (crtl->stack_realign_needed == stack_realign);
11245 return;
11246 }
11247
11248 /* If the only reason for frame_pointer_needed is that we conservatively
11249 assumed stack realignment might be needed, but in the end nothing that
11250 needed the stack alignment had been spilled, clear frame_pointer_needed
11251 and say we don't need stack realignment. */
11252 if (stack_realign
11253 && frame_pointer_needed
11254 && crtl->is_leaf
11255 && flag_omit_frame_pointer
11256 && crtl->sp_is_unchanging
11257 && !ix86_current_function_calls_tls_descriptor
11258 && !crtl->accesses_prior_frames
11259 && !cfun->calls_alloca
11260 && !crtl->calls_eh_return
11261 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11262 && !ix86_frame_pointer_required ()
11263 && get_frame_size () == 0
11264 && ix86_nsaved_sseregs () == 0
11265 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11266 {
11267 HARD_REG_SET set_up_by_prologue, prologue_used;
11268 basic_block bb;
11269
11270 CLEAR_HARD_REG_SET (prologue_used);
11271 CLEAR_HARD_REG_SET (set_up_by_prologue);
11272 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11273 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11274 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11275 HARD_FRAME_POINTER_REGNUM);
11276 FOR_EACH_BB_FN (bb, cfun)
11277 {
11278 rtx_insn *insn;
11279 FOR_BB_INSNS (bb, insn)
11280 if (NONDEBUG_INSN_P (insn)
11281 && requires_stack_frame_p (insn, prologue_used,
11282 set_up_by_prologue))
11283 {
11284 crtl->stack_realign_needed = stack_realign;
11285 crtl->stack_realign_finalized = true;
11286 return;
11287 }
11288 }
11289
11290 /* If drap has been set, but it actually isn't live at the start
11291 of the function, there is no reason to set it up. */
11292 if (crtl->drap_reg)
11293 {
11294 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11295 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11296 {
11297 crtl->drap_reg = NULL_RTX;
11298 crtl->need_drap = false;
11299 }
11300 }
11301 else
11302 cfun->machine->no_drap_save_restore = true;
11303
11304 frame_pointer_needed = false;
11305 stack_realign = false;
11306 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11307 crtl->stack_alignment_needed = incoming_stack_boundary;
11308 crtl->stack_alignment_estimated = incoming_stack_boundary;
11309 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11310 crtl->preferred_stack_boundary = incoming_stack_boundary;
11311 df_finish_pass (true);
11312 df_scan_alloc (NULL);
11313 df_scan_blocks ();
11314 df_compute_regs_ever_live (true);
11315 df_analyze ();
11316 }
11317
11318 crtl->stack_realign_needed = stack_realign;
11319 crtl->stack_realign_finalized = true;
11320 }
11321
11322 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11323
11324 static void
11325 ix86_elim_entry_set_got (rtx reg)
11326 {
11327 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11328 rtx_insn *c_insn = BB_HEAD (bb);
11329 if (!NONDEBUG_INSN_P (c_insn))
11330 c_insn = next_nonnote_nondebug_insn (c_insn);
11331 if (c_insn && NONJUMP_INSN_P (c_insn))
11332 {
11333 rtx pat = PATTERN (c_insn);
11334 if (GET_CODE (pat) == PARALLEL)
11335 {
11336 rtx vec = XVECEXP (pat, 0, 0);
11337 if (GET_CODE (vec) == SET
11338 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11339 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11340 delete_insn (c_insn);
11341 }
11342 }
11343 }
11344
11345 /* Expand the prologue into a bunch of separate insns. */
11346
11347 void
11348 ix86_expand_prologue (void)
11349 {
11350 struct machine_function *m = cfun->machine;
11351 rtx insn, t;
11352 struct ix86_frame frame;
11353 HOST_WIDE_INT allocate;
11354 bool int_registers_saved;
11355 bool sse_registers_saved;
11356
11357 ix86_finalize_stack_realign_flags ();
11358
11359 /* DRAP should not coexist with stack_realign_fp */
11360 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11361
11362 memset (&m->fs, 0, sizeof (m->fs));
11363
11364 /* Initialize CFA state for before the prologue. */
11365 m->fs.cfa_reg = stack_pointer_rtx;
11366 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11367
11368 /* Track SP offset to the CFA. We continue tracking this after we've
11369 swapped the CFA register away from SP. In the case of re-alignment
11370 this is fudged; we're interested to offsets within the local frame. */
11371 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11372 m->fs.sp_valid = true;
11373
11374 ix86_compute_frame_layout (&frame);
11375
11376 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11377 {
11378 /* We should have already generated an error for any use of
11379 ms_hook on a nested function. */
11380 gcc_checking_assert (!ix86_static_chain_on_stack);
11381
11382 /* Check if profiling is active and we shall use profiling before
11383 prologue variant. If so sorry. */
11384 if (crtl->profile && flag_fentry != 0)
11385 sorry ("ms_hook_prologue attribute isn%'t compatible "
11386 "with -mfentry for 32-bit");
11387
11388 /* In ix86_asm_output_function_label we emitted:
11389 8b ff movl.s %edi,%edi
11390 55 push %ebp
11391 8b ec movl.s %esp,%ebp
11392
11393 This matches the hookable function prologue in Win32 API
11394 functions in Microsoft Windows XP Service Pack 2 and newer.
11395 Wine uses this to enable Windows apps to hook the Win32 API
11396 functions provided by Wine.
11397
11398 What that means is that we've already set up the frame pointer. */
11399
11400 if (frame_pointer_needed
11401 && !(crtl->drap_reg && crtl->stack_realign_needed))
11402 {
11403 rtx push, mov;
11404
11405 /* We've decided to use the frame pointer already set up.
11406 Describe this to the unwinder by pretending that both
11407 push and mov insns happen right here.
11408
11409 Putting the unwind info here at the end of the ms_hook
11410 is done so that we can make absolutely certain we get
11411 the required byte sequence at the start of the function,
11412 rather than relying on an assembler that can produce
11413 the exact encoding required.
11414
11415 However it does mean (in the unpatched case) that we have
11416 a 1 insn window where the asynchronous unwind info is
11417 incorrect. However, if we placed the unwind info at
11418 its correct location we would have incorrect unwind info
11419 in the patched case. Which is probably all moot since
11420 I don't expect Wine generates dwarf2 unwind info for the
11421 system libraries that use this feature. */
11422
11423 insn = emit_insn (gen_blockage ());
11424
11425 push = gen_push (hard_frame_pointer_rtx);
11426 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11427 stack_pointer_rtx);
11428 RTX_FRAME_RELATED_P (push) = 1;
11429 RTX_FRAME_RELATED_P (mov) = 1;
11430
11431 RTX_FRAME_RELATED_P (insn) = 1;
11432 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11433 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11434
11435 /* Note that gen_push incremented m->fs.cfa_offset, even
11436 though we didn't emit the push insn here. */
11437 m->fs.cfa_reg = hard_frame_pointer_rtx;
11438 m->fs.fp_offset = m->fs.cfa_offset;
11439 m->fs.fp_valid = true;
11440 }
11441 else
11442 {
11443 /* The frame pointer is not needed so pop %ebp again.
11444 This leaves us with a pristine state. */
11445 emit_insn (gen_pop (hard_frame_pointer_rtx));
11446 }
11447 }
11448
11449 /* The first insn of a function that accepts its static chain on the
11450 stack is to push the register that would be filled in by a direct
11451 call. This insn will be skipped by the trampoline. */
11452 else if (ix86_static_chain_on_stack)
11453 {
11454 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11455 emit_insn (gen_blockage ());
11456
11457 /* We don't want to interpret this push insn as a register save,
11458 only as a stack adjustment. The real copy of the register as
11459 a save will be done later, if needed. */
11460 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11461 t = gen_rtx_SET (stack_pointer_rtx, t);
11462 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11463 RTX_FRAME_RELATED_P (insn) = 1;
11464 }
11465
11466 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11467 of DRAP is needed and stack realignment is really needed after reload */
11468 if (stack_realign_drap)
11469 {
11470 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11471
11472 /* Only need to push parameter pointer reg if it is caller saved. */
11473 if (!call_used_regs[REGNO (crtl->drap_reg)])
11474 {
11475 /* Push arg pointer reg */
11476 insn = emit_insn (gen_push (crtl->drap_reg));
11477 RTX_FRAME_RELATED_P (insn) = 1;
11478 }
11479
11480 /* Grab the argument pointer. */
11481 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11482 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11483 RTX_FRAME_RELATED_P (insn) = 1;
11484 m->fs.cfa_reg = crtl->drap_reg;
11485 m->fs.cfa_offset = 0;
11486
11487 /* Align the stack. */
11488 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11489 stack_pointer_rtx,
11490 GEN_INT (-align_bytes)));
11491 RTX_FRAME_RELATED_P (insn) = 1;
11492
11493 /* Replicate the return address on the stack so that return
11494 address can be reached via (argp - 1) slot. This is needed
11495 to implement macro RETURN_ADDR_RTX and intrinsic function
11496 expand_builtin_return_addr etc. */
11497 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11498 t = gen_frame_mem (word_mode, t);
11499 insn = emit_insn (gen_push (t));
11500 RTX_FRAME_RELATED_P (insn) = 1;
11501
11502 /* For the purposes of frame and register save area addressing,
11503 we've started over with a new frame. */
11504 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11505 m->fs.realigned = true;
11506 }
11507
11508 int_registers_saved = (frame.nregs == 0);
11509 sse_registers_saved = (frame.nsseregs == 0);
11510
11511 if (frame_pointer_needed && !m->fs.fp_valid)
11512 {
11513 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11514 slower on all targets. Also sdb doesn't like it. */
11515 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11516 RTX_FRAME_RELATED_P (insn) = 1;
11517
11518 /* Push registers now, before setting the frame pointer
11519 on SEH target. */
11520 if (!int_registers_saved
11521 && TARGET_SEH
11522 && !frame.save_regs_using_mov)
11523 {
11524 ix86_emit_save_regs ();
11525 int_registers_saved = true;
11526 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11527 }
11528
11529 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11530 {
11531 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11532 RTX_FRAME_RELATED_P (insn) = 1;
11533
11534 if (m->fs.cfa_reg == stack_pointer_rtx)
11535 m->fs.cfa_reg = hard_frame_pointer_rtx;
11536 m->fs.fp_offset = m->fs.sp_offset;
11537 m->fs.fp_valid = true;
11538 }
11539 }
11540
11541 if (!int_registers_saved)
11542 {
11543 /* If saving registers via PUSH, do so now. */
11544 if (!frame.save_regs_using_mov)
11545 {
11546 ix86_emit_save_regs ();
11547 int_registers_saved = true;
11548 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11549 }
11550
11551 /* When using red zone we may start register saving before allocating
11552 the stack frame saving one cycle of the prologue. However, avoid
11553 doing this if we have to probe the stack; at least on x86_64 the
11554 stack probe can turn into a call that clobbers a red zone location. */
11555 else if (ix86_using_red_zone ()
11556 && (! TARGET_STACK_PROBE
11557 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11558 {
11559 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11560 int_registers_saved = true;
11561 }
11562 }
11563
11564 if (stack_realign_fp)
11565 {
11566 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11567 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11568
11569 /* The computation of the size of the re-aligned stack frame means
11570 that we must allocate the size of the register save area before
11571 performing the actual alignment. Otherwise we cannot guarantee
11572 that there's enough storage above the realignment point. */
11573 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11574 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11575 GEN_INT (m->fs.sp_offset
11576 - frame.sse_reg_save_offset),
11577 -1, false);
11578
11579 /* Align the stack. */
11580 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11581 stack_pointer_rtx,
11582 GEN_INT (-align_bytes)));
11583
11584 /* For the purposes of register save area addressing, the stack
11585 pointer is no longer valid. As for the value of sp_offset,
11586 see ix86_compute_frame_layout, which we need to match in order
11587 to pass verification of stack_pointer_offset at the end. */
11588 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11589 m->fs.sp_valid = false;
11590 }
11591
11592 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11593
11594 if (flag_stack_usage_info)
11595 {
11596 /* We start to count from ARG_POINTER. */
11597 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11598
11599 /* If it was realigned, take into account the fake frame. */
11600 if (stack_realign_drap)
11601 {
11602 if (ix86_static_chain_on_stack)
11603 stack_size += UNITS_PER_WORD;
11604
11605 if (!call_used_regs[REGNO (crtl->drap_reg)])
11606 stack_size += UNITS_PER_WORD;
11607
11608 /* This over-estimates by 1 minimal-stack-alignment-unit but
11609 mitigates that by counting in the new return address slot. */
11610 current_function_dynamic_stack_size
11611 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11612 }
11613
11614 current_function_static_stack_size = stack_size;
11615 }
11616
11617 /* On SEH target with very large frame size, allocate an area to save
11618 SSE registers (as the very large allocation won't be described). */
11619 if (TARGET_SEH
11620 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11621 && !sse_registers_saved)
11622 {
11623 HOST_WIDE_INT sse_size =
11624 frame.sse_reg_save_offset - frame.reg_save_offset;
11625
11626 gcc_assert (int_registers_saved);
11627
11628 /* No need to do stack checking as the area will be immediately
11629 written. */
11630 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11631 GEN_INT (-sse_size), -1,
11632 m->fs.cfa_reg == stack_pointer_rtx);
11633 allocate -= sse_size;
11634 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11635 sse_registers_saved = true;
11636 }
11637
11638 /* The stack has already been decremented by the instruction calling us
11639 so probe if the size is non-negative to preserve the protection area. */
11640 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11641 {
11642 /* We expect the registers to be saved when probes are used. */
11643 gcc_assert (int_registers_saved);
11644
11645 if (STACK_CHECK_MOVING_SP)
11646 {
11647 if (!(crtl->is_leaf && !cfun->calls_alloca
11648 && allocate <= PROBE_INTERVAL))
11649 {
11650 ix86_adjust_stack_and_probe (allocate);
11651 allocate = 0;
11652 }
11653 }
11654 else
11655 {
11656 HOST_WIDE_INT size = allocate;
11657
11658 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11659 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11660
11661 if (TARGET_STACK_PROBE)
11662 {
11663 if (crtl->is_leaf && !cfun->calls_alloca)
11664 {
11665 if (size > PROBE_INTERVAL)
11666 ix86_emit_probe_stack_range (0, size);
11667 }
11668 else
11669 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11670 }
11671 else
11672 {
11673 if (crtl->is_leaf && !cfun->calls_alloca)
11674 {
11675 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11676 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11677 size - STACK_CHECK_PROTECT);
11678 }
11679 else
11680 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11681 }
11682 }
11683 }
11684
11685 if (allocate == 0)
11686 ;
11687 else if (!ix86_target_stack_probe ()
11688 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11689 {
11690 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11691 GEN_INT (-allocate), -1,
11692 m->fs.cfa_reg == stack_pointer_rtx);
11693 }
11694 else
11695 {
11696 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11697 rtx r10 = NULL;
11698 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11699 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11700 bool eax_live = ix86_eax_live_at_start_p ();
11701 bool r10_live = false;
11702
11703 if (TARGET_64BIT)
11704 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11705
11706 if (eax_live)
11707 {
11708 insn = emit_insn (gen_push (eax));
11709 allocate -= UNITS_PER_WORD;
11710 /* Note that SEH directives need to continue tracking the stack
11711 pointer even after the frame pointer has been set up. */
11712 if (sp_is_cfa_reg || TARGET_SEH)
11713 {
11714 if (sp_is_cfa_reg)
11715 m->fs.cfa_offset += UNITS_PER_WORD;
11716 RTX_FRAME_RELATED_P (insn) = 1;
11717 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11718 gen_rtx_SET (stack_pointer_rtx,
11719 plus_constant (Pmode, stack_pointer_rtx,
11720 -UNITS_PER_WORD)));
11721 }
11722 }
11723
11724 if (r10_live)
11725 {
11726 r10 = gen_rtx_REG (Pmode, R10_REG);
11727 insn = emit_insn (gen_push (r10));
11728 allocate -= UNITS_PER_WORD;
11729 if (sp_is_cfa_reg || TARGET_SEH)
11730 {
11731 if (sp_is_cfa_reg)
11732 m->fs.cfa_offset += UNITS_PER_WORD;
11733 RTX_FRAME_RELATED_P (insn) = 1;
11734 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11735 gen_rtx_SET (stack_pointer_rtx,
11736 plus_constant (Pmode, stack_pointer_rtx,
11737 -UNITS_PER_WORD)));
11738 }
11739 }
11740
11741 emit_move_insn (eax, GEN_INT (allocate));
11742 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11743
11744 /* Use the fact that AX still contains ALLOCATE. */
11745 adjust_stack_insn = (Pmode == DImode
11746 ? gen_pro_epilogue_adjust_stack_di_sub
11747 : gen_pro_epilogue_adjust_stack_si_sub);
11748
11749 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11750 stack_pointer_rtx, eax));
11751
11752 if (sp_is_cfa_reg || TARGET_SEH)
11753 {
11754 if (sp_is_cfa_reg)
11755 m->fs.cfa_offset += allocate;
11756 RTX_FRAME_RELATED_P (insn) = 1;
11757 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11758 gen_rtx_SET (stack_pointer_rtx,
11759 plus_constant (Pmode, stack_pointer_rtx,
11760 -allocate)));
11761 }
11762 m->fs.sp_offset += allocate;
11763
11764 /* Use stack_pointer_rtx for relative addressing so that code
11765 works for realigned stack, too. */
11766 if (r10_live && eax_live)
11767 {
11768 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11769 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11770 gen_frame_mem (word_mode, t));
11771 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11772 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11773 gen_frame_mem (word_mode, t));
11774 }
11775 else if (eax_live || r10_live)
11776 {
11777 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11778 emit_move_insn (gen_rtx_REG (word_mode,
11779 (eax_live ? AX_REG : R10_REG)),
11780 gen_frame_mem (word_mode, t));
11781 }
11782 }
11783 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11784
11785 /* If we havn't already set up the frame pointer, do so now. */
11786 if (frame_pointer_needed && !m->fs.fp_valid)
11787 {
11788 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11789 GEN_INT (frame.stack_pointer_offset
11790 - frame.hard_frame_pointer_offset));
11791 insn = emit_insn (insn);
11792 RTX_FRAME_RELATED_P (insn) = 1;
11793 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11794
11795 if (m->fs.cfa_reg == stack_pointer_rtx)
11796 m->fs.cfa_reg = hard_frame_pointer_rtx;
11797 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11798 m->fs.fp_valid = true;
11799 }
11800
11801 if (!int_registers_saved)
11802 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11803 if (!sse_registers_saved)
11804 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11805
11806 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11807 in PROLOGUE. */
11808 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11809 {
11810 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11811 insn = emit_insn (gen_set_got (pic));
11812 RTX_FRAME_RELATED_P (insn) = 1;
11813 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11814 emit_insn (gen_prologue_use (pic));
11815 /* Deleting already emmitted SET_GOT if exist and allocated to
11816 REAL_PIC_OFFSET_TABLE_REGNUM. */
11817 ix86_elim_entry_set_got (pic);
11818 }
11819
11820 if (crtl->drap_reg && !crtl->stack_realign_needed)
11821 {
11822 /* vDRAP is setup but after reload it turns out stack realign
11823 isn't necessary, here we will emit prologue to setup DRAP
11824 without stack realign adjustment */
11825 t = choose_baseaddr (0);
11826 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11827 }
11828
11829 /* Prevent instructions from being scheduled into register save push
11830 sequence when access to the redzone area is done through frame pointer.
11831 The offset between the frame pointer and the stack pointer is calculated
11832 relative to the value of the stack pointer at the end of the function
11833 prologue, and moving instructions that access redzone area via frame
11834 pointer inside push sequence violates this assumption. */
11835 if (frame_pointer_needed && frame.red_zone_size)
11836 emit_insn (gen_memory_blockage ());
11837
11838 /* Emit cld instruction if stringops are used in the function. */
11839 if (TARGET_CLD && ix86_current_function_needs_cld)
11840 emit_insn (gen_cld ());
11841
11842 /* SEH requires that the prologue end within 256 bytes of the start of
11843 the function. Prevent instruction schedules that would extend that.
11844 Further, prevent alloca modifications to the stack pointer from being
11845 combined with prologue modifications. */
11846 if (TARGET_SEH)
11847 emit_insn (gen_prologue_use (stack_pointer_rtx));
11848 }
11849
11850 /* Emit code to restore REG using a POP insn. */
11851
11852 static void
11853 ix86_emit_restore_reg_using_pop (rtx reg)
11854 {
11855 struct machine_function *m = cfun->machine;
11856 rtx_insn *insn = emit_insn (gen_pop (reg));
11857
11858 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11859 m->fs.sp_offset -= UNITS_PER_WORD;
11860
11861 if (m->fs.cfa_reg == crtl->drap_reg
11862 && REGNO (reg) == REGNO (crtl->drap_reg))
11863 {
11864 /* Previously we'd represented the CFA as an expression
11865 like *(%ebp - 8). We've just popped that value from
11866 the stack, which means we need to reset the CFA to
11867 the drap register. This will remain until we restore
11868 the stack pointer. */
11869 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11870 RTX_FRAME_RELATED_P (insn) = 1;
11871
11872 /* This means that the DRAP register is valid for addressing too. */
11873 m->fs.drap_valid = true;
11874 return;
11875 }
11876
11877 if (m->fs.cfa_reg == stack_pointer_rtx)
11878 {
11879 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11880 x = gen_rtx_SET (stack_pointer_rtx, x);
11881 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11882 RTX_FRAME_RELATED_P (insn) = 1;
11883
11884 m->fs.cfa_offset -= UNITS_PER_WORD;
11885 }
11886
11887 /* When the frame pointer is the CFA, and we pop it, we are
11888 swapping back to the stack pointer as the CFA. This happens
11889 for stack frames that don't allocate other data, so we assume
11890 the stack pointer is now pointing at the return address, i.e.
11891 the function entry state, which makes the offset be 1 word. */
11892 if (reg == hard_frame_pointer_rtx)
11893 {
11894 m->fs.fp_valid = false;
11895 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11896 {
11897 m->fs.cfa_reg = stack_pointer_rtx;
11898 m->fs.cfa_offset -= UNITS_PER_WORD;
11899
11900 add_reg_note (insn, REG_CFA_DEF_CFA,
11901 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11902 GEN_INT (m->fs.cfa_offset)));
11903 RTX_FRAME_RELATED_P (insn) = 1;
11904 }
11905 }
11906 }
11907
11908 /* Emit code to restore saved registers using POP insns. */
11909
11910 static void
11911 ix86_emit_restore_regs_using_pop (void)
11912 {
11913 unsigned int regno;
11914
11915 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11916 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11917 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11918 }
11919
11920 /* Emit code and notes for the LEAVE instruction. */
11921
11922 static void
11923 ix86_emit_leave (void)
11924 {
11925 struct machine_function *m = cfun->machine;
11926 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11927
11928 ix86_add_queued_cfa_restore_notes (insn);
11929
11930 gcc_assert (m->fs.fp_valid);
11931 m->fs.sp_valid = true;
11932 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11933 m->fs.fp_valid = false;
11934
11935 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11936 {
11937 m->fs.cfa_reg = stack_pointer_rtx;
11938 m->fs.cfa_offset = m->fs.sp_offset;
11939
11940 add_reg_note (insn, REG_CFA_DEF_CFA,
11941 plus_constant (Pmode, stack_pointer_rtx,
11942 m->fs.sp_offset));
11943 RTX_FRAME_RELATED_P (insn) = 1;
11944 }
11945 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11946 m->fs.fp_offset);
11947 }
11948
11949 /* Emit code to restore saved registers using MOV insns.
11950 First register is restored from CFA - CFA_OFFSET. */
11951 static void
11952 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11953 bool maybe_eh_return)
11954 {
11955 struct machine_function *m = cfun->machine;
11956 unsigned int regno;
11957
11958 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11959 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11960 {
11961 rtx reg = gen_rtx_REG (word_mode, regno);
11962 rtx mem;
11963 rtx_insn *insn;
11964
11965 mem = choose_baseaddr (cfa_offset);
11966 mem = gen_frame_mem (word_mode, mem);
11967 insn = emit_move_insn (reg, mem);
11968
11969 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11970 {
11971 /* Previously we'd represented the CFA as an expression
11972 like *(%ebp - 8). We've just popped that value from
11973 the stack, which means we need to reset the CFA to
11974 the drap register. This will remain until we restore
11975 the stack pointer. */
11976 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11977 RTX_FRAME_RELATED_P (insn) = 1;
11978
11979 /* This means that the DRAP register is valid for addressing. */
11980 m->fs.drap_valid = true;
11981 }
11982 else
11983 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11984
11985 cfa_offset -= UNITS_PER_WORD;
11986 }
11987 }
11988
11989 /* Emit code to restore saved registers using MOV insns.
11990 First register is restored from CFA - CFA_OFFSET. */
11991 static void
11992 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11993 bool maybe_eh_return)
11994 {
11995 unsigned int regno;
11996
11997 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11998 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11999 {
12000 rtx reg = gen_rtx_REG (V4SFmode, regno);
12001 rtx mem;
12002
12003 mem = choose_baseaddr (cfa_offset);
12004 mem = gen_rtx_MEM (V4SFmode, mem);
12005 set_mem_align (mem, 128);
12006 emit_move_insn (reg, mem);
12007
12008 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12009
12010 cfa_offset -= 16;
12011 }
12012 }
12013
12014 /* Restore function stack, frame, and registers. */
12015
12016 void
12017 ix86_expand_epilogue (int style)
12018 {
12019 struct machine_function *m = cfun->machine;
12020 struct machine_frame_state frame_state_save = m->fs;
12021 struct ix86_frame frame;
12022 bool restore_regs_via_mov;
12023 bool using_drap;
12024
12025 ix86_finalize_stack_realign_flags ();
12026 ix86_compute_frame_layout (&frame);
12027
12028 m->fs.sp_valid = (!frame_pointer_needed
12029 || (crtl->sp_is_unchanging
12030 && !stack_realign_fp));
12031 gcc_assert (!m->fs.sp_valid
12032 || m->fs.sp_offset == frame.stack_pointer_offset);
12033
12034 /* The FP must be valid if the frame pointer is present. */
12035 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12036 gcc_assert (!m->fs.fp_valid
12037 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12038
12039 /* We must have *some* valid pointer to the stack frame. */
12040 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12041
12042 /* The DRAP is never valid at this point. */
12043 gcc_assert (!m->fs.drap_valid);
12044
12045 /* See the comment about red zone and frame
12046 pointer usage in ix86_expand_prologue. */
12047 if (frame_pointer_needed && frame.red_zone_size)
12048 emit_insn (gen_memory_blockage ());
12049
12050 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12051 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12052
12053 /* Determine the CFA offset of the end of the red-zone. */
12054 m->fs.red_zone_offset = 0;
12055 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12056 {
12057 /* The red-zone begins below the return address. */
12058 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12059
12060 /* When the register save area is in the aligned portion of
12061 the stack, determine the maximum runtime displacement that
12062 matches up with the aligned frame. */
12063 if (stack_realign_drap)
12064 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12065 + UNITS_PER_WORD);
12066 }
12067
12068 /* Special care must be taken for the normal return case of a function
12069 using eh_return: the eax and edx registers are marked as saved, but
12070 not restored along this path. Adjust the save location to match. */
12071 if (crtl->calls_eh_return && style != 2)
12072 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12073
12074 /* EH_RETURN requires the use of moves to function properly. */
12075 if (crtl->calls_eh_return)
12076 restore_regs_via_mov = true;
12077 /* SEH requires the use of pops to identify the epilogue. */
12078 else if (TARGET_SEH)
12079 restore_regs_via_mov = false;
12080 /* If we're only restoring one register and sp is not valid then
12081 using a move instruction to restore the register since it's
12082 less work than reloading sp and popping the register. */
12083 else if (!m->fs.sp_valid && frame.nregs <= 1)
12084 restore_regs_via_mov = true;
12085 else if (TARGET_EPILOGUE_USING_MOVE
12086 && cfun->machine->use_fast_prologue_epilogue
12087 && (frame.nregs > 1
12088 || m->fs.sp_offset != frame.reg_save_offset))
12089 restore_regs_via_mov = true;
12090 else if (frame_pointer_needed
12091 && !frame.nregs
12092 && m->fs.sp_offset != frame.reg_save_offset)
12093 restore_regs_via_mov = true;
12094 else if (frame_pointer_needed
12095 && TARGET_USE_LEAVE
12096 && cfun->machine->use_fast_prologue_epilogue
12097 && frame.nregs == 1)
12098 restore_regs_via_mov = true;
12099 else
12100 restore_regs_via_mov = false;
12101
12102 if (restore_regs_via_mov || frame.nsseregs)
12103 {
12104 /* Ensure that the entire register save area is addressable via
12105 the stack pointer, if we will restore via sp. */
12106 if (TARGET_64BIT
12107 && m->fs.sp_offset > 0x7fffffff
12108 && !(m->fs.fp_valid || m->fs.drap_valid)
12109 && (frame.nsseregs + frame.nregs) != 0)
12110 {
12111 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12112 GEN_INT (m->fs.sp_offset
12113 - frame.sse_reg_save_offset),
12114 style,
12115 m->fs.cfa_reg == stack_pointer_rtx);
12116 }
12117 }
12118
12119 /* If there are any SSE registers to restore, then we have to do it
12120 via moves, since there's obviously no pop for SSE regs. */
12121 if (frame.nsseregs)
12122 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12123 style == 2);
12124
12125 if (restore_regs_via_mov)
12126 {
12127 rtx t;
12128
12129 if (frame.nregs)
12130 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12131
12132 /* eh_return epilogues need %ecx added to the stack pointer. */
12133 if (style == 2)
12134 {
12135 rtx sa = EH_RETURN_STACKADJ_RTX;
12136 rtx_insn *insn;
12137
12138 /* Stack align doesn't work with eh_return. */
12139 gcc_assert (!stack_realign_drap);
12140 /* Neither does regparm nested functions. */
12141 gcc_assert (!ix86_static_chain_on_stack);
12142
12143 if (frame_pointer_needed)
12144 {
12145 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12146 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12147 emit_insn (gen_rtx_SET (sa, t));
12148
12149 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12150 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12151
12152 /* Note that we use SA as a temporary CFA, as the return
12153 address is at the proper place relative to it. We
12154 pretend this happens at the FP restore insn because
12155 prior to this insn the FP would be stored at the wrong
12156 offset relative to SA, and after this insn we have no
12157 other reasonable register to use for the CFA. We don't
12158 bother resetting the CFA to the SP for the duration of
12159 the return insn. */
12160 add_reg_note (insn, REG_CFA_DEF_CFA,
12161 plus_constant (Pmode, sa, UNITS_PER_WORD));
12162 ix86_add_queued_cfa_restore_notes (insn);
12163 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12164 RTX_FRAME_RELATED_P (insn) = 1;
12165
12166 m->fs.cfa_reg = sa;
12167 m->fs.cfa_offset = UNITS_PER_WORD;
12168 m->fs.fp_valid = false;
12169
12170 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12171 const0_rtx, style, false);
12172 }
12173 else
12174 {
12175 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12176 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12177 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12178 ix86_add_queued_cfa_restore_notes (insn);
12179
12180 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12181 if (m->fs.cfa_offset != UNITS_PER_WORD)
12182 {
12183 m->fs.cfa_offset = UNITS_PER_WORD;
12184 add_reg_note (insn, REG_CFA_DEF_CFA,
12185 plus_constant (Pmode, stack_pointer_rtx,
12186 UNITS_PER_WORD));
12187 RTX_FRAME_RELATED_P (insn) = 1;
12188 }
12189 }
12190 m->fs.sp_offset = UNITS_PER_WORD;
12191 m->fs.sp_valid = true;
12192 }
12193 }
12194 else
12195 {
12196 /* SEH requires that the function end with (1) a stack adjustment
12197 if necessary, (2) a sequence of pops, and (3) a return or
12198 jump instruction. Prevent insns from the function body from
12199 being scheduled into this sequence. */
12200 if (TARGET_SEH)
12201 {
12202 /* Prevent a catch region from being adjacent to the standard
12203 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12204 several other flags that would be interesting to test are
12205 not yet set up. */
12206 if (flag_non_call_exceptions)
12207 emit_insn (gen_nops (const1_rtx));
12208 else
12209 emit_insn (gen_blockage ());
12210 }
12211
12212 /* First step is to deallocate the stack frame so that we can
12213 pop the registers. Also do it on SEH target for very large
12214 frame as the emitted instructions aren't allowed by the ABI in
12215 epilogues. */
12216 if (!m->fs.sp_valid
12217 || (TARGET_SEH
12218 && (m->fs.sp_offset - frame.reg_save_offset
12219 >= SEH_MAX_FRAME_SIZE)))
12220 {
12221 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12222 GEN_INT (m->fs.fp_offset
12223 - frame.reg_save_offset),
12224 style, false);
12225 }
12226 else if (m->fs.sp_offset != frame.reg_save_offset)
12227 {
12228 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12229 GEN_INT (m->fs.sp_offset
12230 - frame.reg_save_offset),
12231 style,
12232 m->fs.cfa_reg == stack_pointer_rtx);
12233 }
12234
12235 ix86_emit_restore_regs_using_pop ();
12236 }
12237
12238 /* If we used a stack pointer and haven't already got rid of it,
12239 then do so now. */
12240 if (m->fs.fp_valid)
12241 {
12242 /* If the stack pointer is valid and pointing at the frame
12243 pointer store address, then we only need a pop. */
12244 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12245 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12246 /* Leave results in shorter dependency chains on CPUs that are
12247 able to grok it fast. */
12248 else if (TARGET_USE_LEAVE
12249 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12250 || !cfun->machine->use_fast_prologue_epilogue)
12251 ix86_emit_leave ();
12252 else
12253 {
12254 pro_epilogue_adjust_stack (stack_pointer_rtx,
12255 hard_frame_pointer_rtx,
12256 const0_rtx, style, !using_drap);
12257 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12258 }
12259 }
12260
12261 if (using_drap)
12262 {
12263 int param_ptr_offset = UNITS_PER_WORD;
12264 rtx_insn *insn;
12265
12266 gcc_assert (stack_realign_drap);
12267
12268 if (ix86_static_chain_on_stack)
12269 param_ptr_offset += UNITS_PER_WORD;
12270 if (!call_used_regs[REGNO (crtl->drap_reg)])
12271 param_ptr_offset += UNITS_PER_WORD;
12272
12273 insn = emit_insn (gen_rtx_SET
12274 (stack_pointer_rtx,
12275 gen_rtx_PLUS (Pmode,
12276 crtl->drap_reg,
12277 GEN_INT (-param_ptr_offset))));
12278 m->fs.cfa_reg = stack_pointer_rtx;
12279 m->fs.cfa_offset = param_ptr_offset;
12280 m->fs.sp_offset = param_ptr_offset;
12281 m->fs.realigned = false;
12282
12283 add_reg_note (insn, REG_CFA_DEF_CFA,
12284 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12285 GEN_INT (param_ptr_offset)));
12286 RTX_FRAME_RELATED_P (insn) = 1;
12287
12288 if (!call_used_regs[REGNO (crtl->drap_reg)])
12289 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12290 }
12291
12292 /* At this point the stack pointer must be valid, and we must have
12293 restored all of the registers. We may not have deallocated the
12294 entire stack frame. We've delayed this until now because it may
12295 be possible to merge the local stack deallocation with the
12296 deallocation forced by ix86_static_chain_on_stack. */
12297 gcc_assert (m->fs.sp_valid);
12298 gcc_assert (!m->fs.fp_valid);
12299 gcc_assert (!m->fs.realigned);
12300 if (m->fs.sp_offset != UNITS_PER_WORD)
12301 {
12302 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12303 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12304 style, true);
12305 }
12306 else
12307 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12308
12309 /* Sibcall epilogues don't want a return instruction. */
12310 if (style == 0)
12311 {
12312 m->fs = frame_state_save;
12313 return;
12314 }
12315
12316 if (crtl->args.pops_args && crtl->args.size)
12317 {
12318 rtx popc = GEN_INT (crtl->args.pops_args);
12319
12320 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12321 address, do explicit add, and jump indirectly to the caller. */
12322
12323 if (crtl->args.pops_args >= 65536)
12324 {
12325 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12326 rtx_insn *insn;
12327
12328 /* There is no "pascal" calling convention in any 64bit ABI. */
12329 gcc_assert (!TARGET_64BIT);
12330
12331 insn = emit_insn (gen_pop (ecx));
12332 m->fs.cfa_offset -= UNITS_PER_WORD;
12333 m->fs.sp_offset -= UNITS_PER_WORD;
12334
12335 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12336 x = gen_rtx_SET (stack_pointer_rtx, x);
12337 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12338 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12339 RTX_FRAME_RELATED_P (insn) = 1;
12340
12341 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12342 popc, -1, true);
12343 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12344 }
12345 else
12346 emit_jump_insn (gen_simple_return_pop_internal (popc));
12347 }
12348 else
12349 emit_jump_insn (gen_simple_return_internal ());
12350
12351 /* Restore the state back to the state from the prologue,
12352 so that it's correct for the next epilogue. */
12353 m->fs = frame_state_save;
12354 }
12355
12356 /* Reset from the function's potential modifications. */
12357
12358 static void
12359 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12360 {
12361 if (pic_offset_table_rtx
12362 && !ix86_use_pseudo_pic_reg ())
12363 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12364 #if TARGET_MACHO
12365 /* Mach-O doesn't support labels at the end of objects, so if
12366 it looks like we might want one, insert a NOP. */
12367 {
12368 rtx_insn *insn = get_last_insn ();
12369 rtx_insn *deleted_debug_label = NULL;
12370 while (insn
12371 && NOTE_P (insn)
12372 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12373 {
12374 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12375 notes only, instead set their CODE_LABEL_NUMBER to -1,
12376 otherwise there would be code generation differences
12377 in between -g and -g0. */
12378 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12379 deleted_debug_label = insn;
12380 insn = PREV_INSN (insn);
12381 }
12382 if (insn
12383 && (LABEL_P (insn)
12384 || (NOTE_P (insn)
12385 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12386 fputs ("\tnop\n", file);
12387 else if (deleted_debug_label)
12388 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12389 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12390 CODE_LABEL_NUMBER (insn) = -1;
12391 }
12392 #endif
12393
12394 }
12395
12396 /* Return a scratch register to use in the split stack prologue. The
12397 split stack prologue is used for -fsplit-stack. It is the first
12398 instructions in the function, even before the regular prologue.
12399 The scratch register can be any caller-saved register which is not
12400 used for parameters or for the static chain. */
12401
12402 static unsigned int
12403 split_stack_prologue_scratch_regno (void)
12404 {
12405 if (TARGET_64BIT)
12406 return R11_REG;
12407 else
12408 {
12409 bool is_fastcall, is_thiscall;
12410 int regparm;
12411
12412 is_fastcall = (lookup_attribute ("fastcall",
12413 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12414 != NULL);
12415 is_thiscall = (lookup_attribute ("thiscall",
12416 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12417 != NULL);
12418 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12419
12420 if (is_fastcall)
12421 {
12422 if (DECL_STATIC_CHAIN (cfun->decl))
12423 {
12424 sorry ("-fsplit-stack does not support fastcall with "
12425 "nested function");
12426 return INVALID_REGNUM;
12427 }
12428 return AX_REG;
12429 }
12430 else if (is_thiscall)
12431 {
12432 if (!DECL_STATIC_CHAIN (cfun->decl))
12433 return DX_REG;
12434 return AX_REG;
12435 }
12436 else if (regparm < 3)
12437 {
12438 if (!DECL_STATIC_CHAIN (cfun->decl))
12439 return CX_REG;
12440 else
12441 {
12442 if (regparm >= 2)
12443 {
12444 sorry ("-fsplit-stack does not support 2 register "
12445 "parameters for a nested function");
12446 return INVALID_REGNUM;
12447 }
12448 return DX_REG;
12449 }
12450 }
12451 else
12452 {
12453 /* FIXME: We could make this work by pushing a register
12454 around the addition and comparison. */
12455 sorry ("-fsplit-stack does not support 3 register parameters");
12456 return INVALID_REGNUM;
12457 }
12458 }
12459 }
12460
12461 /* A SYMBOL_REF for the function which allocates new stackspace for
12462 -fsplit-stack. */
12463
12464 static GTY(()) rtx split_stack_fn;
12465
12466 /* A SYMBOL_REF for the more stack function when using the large
12467 model. */
12468
12469 static GTY(()) rtx split_stack_fn_large;
12470
12471 /* Handle -fsplit-stack. These are the first instructions in the
12472 function, even before the regular prologue. */
12473
12474 void
12475 ix86_expand_split_stack_prologue (void)
12476 {
12477 struct ix86_frame frame;
12478 HOST_WIDE_INT allocate;
12479 unsigned HOST_WIDE_INT args_size;
12480 rtx_code_label *label;
12481 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12482 rtx scratch_reg = NULL_RTX;
12483 rtx_code_label *varargs_label = NULL;
12484 rtx fn;
12485
12486 gcc_assert (flag_split_stack && reload_completed);
12487
12488 ix86_finalize_stack_realign_flags ();
12489 ix86_compute_frame_layout (&frame);
12490 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12491
12492 /* This is the label we will branch to if we have enough stack
12493 space. We expect the basic block reordering pass to reverse this
12494 branch if optimizing, so that we branch in the unlikely case. */
12495 label = gen_label_rtx ();
12496
12497 /* We need to compare the stack pointer minus the frame size with
12498 the stack boundary in the TCB. The stack boundary always gives
12499 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12500 can compare directly. Otherwise we need to do an addition. */
12501
12502 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12503 UNSPEC_STACK_CHECK);
12504 limit = gen_rtx_CONST (Pmode, limit);
12505 limit = gen_rtx_MEM (Pmode, limit);
12506 if (allocate < SPLIT_STACK_AVAILABLE)
12507 current = stack_pointer_rtx;
12508 else
12509 {
12510 unsigned int scratch_regno;
12511 rtx offset;
12512
12513 /* We need a scratch register to hold the stack pointer minus
12514 the required frame size. Since this is the very start of the
12515 function, the scratch register can be any caller-saved
12516 register which is not used for parameters. */
12517 offset = GEN_INT (- allocate);
12518 scratch_regno = split_stack_prologue_scratch_regno ();
12519 if (scratch_regno == INVALID_REGNUM)
12520 return;
12521 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12522 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12523 {
12524 /* We don't use ix86_gen_add3 in this case because it will
12525 want to split to lea, but when not optimizing the insn
12526 will not be split after this point. */
12527 emit_insn (gen_rtx_SET (scratch_reg,
12528 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12529 offset)));
12530 }
12531 else
12532 {
12533 emit_move_insn (scratch_reg, offset);
12534 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12535 stack_pointer_rtx));
12536 }
12537 current = scratch_reg;
12538 }
12539
12540 ix86_expand_branch (GEU, current, limit, label);
12541 jump_insn = get_last_insn ();
12542 JUMP_LABEL (jump_insn) = label;
12543
12544 /* Mark the jump as very likely to be taken. */
12545 add_int_reg_note (jump_insn, REG_BR_PROB,
12546 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12547
12548 if (split_stack_fn == NULL_RTX)
12549 {
12550 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12551 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12552 }
12553 fn = split_stack_fn;
12554
12555 /* Get more stack space. We pass in the desired stack space and the
12556 size of the arguments to copy to the new stack. In 32-bit mode
12557 we push the parameters; __morestack will return on a new stack
12558 anyhow. In 64-bit mode we pass the parameters in r10 and
12559 r11. */
12560 allocate_rtx = GEN_INT (allocate);
12561 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12562 call_fusage = NULL_RTX;
12563 if (TARGET_64BIT)
12564 {
12565 rtx reg10, reg11;
12566
12567 reg10 = gen_rtx_REG (Pmode, R10_REG);
12568 reg11 = gen_rtx_REG (Pmode, R11_REG);
12569
12570 /* If this function uses a static chain, it will be in %r10.
12571 Preserve it across the call to __morestack. */
12572 if (DECL_STATIC_CHAIN (cfun->decl))
12573 {
12574 rtx rax;
12575
12576 rax = gen_rtx_REG (word_mode, AX_REG);
12577 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12578 use_reg (&call_fusage, rax);
12579 }
12580
12581 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12582 && !TARGET_PECOFF)
12583 {
12584 HOST_WIDE_INT argval;
12585
12586 gcc_assert (Pmode == DImode);
12587 /* When using the large model we need to load the address
12588 into a register, and we've run out of registers. So we
12589 switch to a different calling convention, and we call a
12590 different function: __morestack_large. We pass the
12591 argument size in the upper 32 bits of r10 and pass the
12592 frame size in the lower 32 bits. */
12593 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12594 gcc_assert ((args_size & 0xffffffff) == args_size);
12595
12596 if (split_stack_fn_large == NULL_RTX)
12597 {
12598 split_stack_fn_large =
12599 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12600 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12601 }
12602 if (ix86_cmodel == CM_LARGE_PIC)
12603 {
12604 rtx_code_label *label;
12605 rtx x;
12606
12607 label = gen_label_rtx ();
12608 emit_label (label);
12609 LABEL_PRESERVE_P (label) = 1;
12610 emit_insn (gen_set_rip_rex64 (reg10, label));
12611 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12612 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12613 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12614 UNSPEC_GOT);
12615 x = gen_rtx_CONST (Pmode, x);
12616 emit_move_insn (reg11, x);
12617 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12618 x = gen_const_mem (Pmode, x);
12619 emit_move_insn (reg11, x);
12620 }
12621 else
12622 emit_move_insn (reg11, split_stack_fn_large);
12623
12624 fn = reg11;
12625
12626 argval = ((args_size << 16) << 16) + allocate;
12627 emit_move_insn (reg10, GEN_INT (argval));
12628 }
12629 else
12630 {
12631 emit_move_insn (reg10, allocate_rtx);
12632 emit_move_insn (reg11, GEN_INT (args_size));
12633 use_reg (&call_fusage, reg11);
12634 }
12635
12636 use_reg (&call_fusage, reg10);
12637 }
12638 else
12639 {
12640 emit_insn (gen_push (GEN_INT (args_size)));
12641 emit_insn (gen_push (allocate_rtx));
12642 }
12643 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12644 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12645 NULL_RTX, false);
12646 add_function_usage_to (call_insn, call_fusage);
12647
12648 /* In order to make call/return prediction work right, we now need
12649 to execute a return instruction. See
12650 libgcc/config/i386/morestack.S for the details on how this works.
12651
12652 For flow purposes gcc must not see this as a return
12653 instruction--we need control flow to continue at the subsequent
12654 label. Therefore, we use an unspec. */
12655 gcc_assert (crtl->args.pops_args < 65536);
12656 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12657
12658 /* If we are in 64-bit mode and this function uses a static chain,
12659 we saved %r10 in %rax before calling _morestack. */
12660 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12661 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12662 gen_rtx_REG (word_mode, AX_REG));
12663
12664 /* If this function calls va_start, we need to store a pointer to
12665 the arguments on the old stack, because they may not have been
12666 all copied to the new stack. At this point the old stack can be
12667 found at the frame pointer value used by __morestack, because
12668 __morestack has set that up before calling back to us. Here we
12669 store that pointer in a scratch register, and in
12670 ix86_expand_prologue we store the scratch register in a stack
12671 slot. */
12672 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12673 {
12674 unsigned int scratch_regno;
12675 rtx frame_reg;
12676 int words;
12677
12678 scratch_regno = split_stack_prologue_scratch_regno ();
12679 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12680 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12681
12682 /* 64-bit:
12683 fp -> old fp value
12684 return address within this function
12685 return address of caller of this function
12686 stack arguments
12687 So we add three words to get to the stack arguments.
12688
12689 32-bit:
12690 fp -> old fp value
12691 return address within this function
12692 first argument to __morestack
12693 second argument to __morestack
12694 return address of caller of this function
12695 stack arguments
12696 So we add five words to get to the stack arguments.
12697 */
12698 words = TARGET_64BIT ? 3 : 5;
12699 emit_insn (gen_rtx_SET (scratch_reg,
12700 gen_rtx_PLUS (Pmode, frame_reg,
12701 GEN_INT (words * UNITS_PER_WORD))));
12702
12703 varargs_label = gen_label_rtx ();
12704 emit_jump_insn (gen_jump (varargs_label));
12705 JUMP_LABEL (get_last_insn ()) = varargs_label;
12706
12707 emit_barrier ();
12708 }
12709
12710 emit_label (label);
12711 LABEL_NUSES (label) = 1;
12712
12713 /* If this function calls va_start, we now have to set the scratch
12714 register for the case where we do not call __morestack. In this
12715 case we need to set it based on the stack pointer. */
12716 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12717 {
12718 emit_insn (gen_rtx_SET (scratch_reg,
12719 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12720 GEN_INT (UNITS_PER_WORD))));
12721
12722 emit_label (varargs_label);
12723 LABEL_NUSES (varargs_label) = 1;
12724 }
12725 }
12726
12727 /* We may have to tell the dataflow pass that the split stack prologue
12728 is initializing a scratch register. */
12729
12730 static void
12731 ix86_live_on_entry (bitmap regs)
12732 {
12733 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12734 {
12735 gcc_assert (flag_split_stack);
12736 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12737 }
12738 }
12739 \f
12740 /* Extract the parts of an RTL expression that is a valid memory address
12741 for an instruction. Return 0 if the structure of the address is
12742 grossly off. Return -1 if the address contains ASHIFT, so it is not
12743 strictly valid, but still used for computing length of lea instruction. */
12744
12745 int
12746 ix86_decompose_address (rtx addr, struct ix86_address *out)
12747 {
12748 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12749 rtx base_reg, index_reg;
12750 HOST_WIDE_INT scale = 1;
12751 rtx scale_rtx = NULL_RTX;
12752 rtx tmp;
12753 int retval = 1;
12754 enum ix86_address_seg seg = SEG_DEFAULT;
12755
12756 /* Allow zero-extended SImode addresses,
12757 they will be emitted with addr32 prefix. */
12758 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12759 {
12760 if (GET_CODE (addr) == ZERO_EXTEND
12761 && GET_MODE (XEXP (addr, 0)) == SImode)
12762 {
12763 addr = XEXP (addr, 0);
12764 if (CONST_INT_P (addr))
12765 return 0;
12766 }
12767 else if (GET_CODE (addr) == AND
12768 && const_32bit_mask (XEXP (addr, 1), DImode))
12769 {
12770 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12771 if (addr == NULL_RTX)
12772 return 0;
12773
12774 if (CONST_INT_P (addr))
12775 return 0;
12776 }
12777 }
12778
12779 /* Allow SImode subregs of DImode addresses,
12780 they will be emitted with addr32 prefix. */
12781 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12782 {
12783 if (GET_CODE (addr) == SUBREG
12784 && GET_MODE (SUBREG_REG (addr)) == DImode)
12785 {
12786 addr = SUBREG_REG (addr);
12787 if (CONST_INT_P (addr))
12788 return 0;
12789 }
12790 }
12791
12792 if (REG_P (addr))
12793 base = addr;
12794 else if (GET_CODE (addr) == SUBREG)
12795 {
12796 if (REG_P (SUBREG_REG (addr)))
12797 base = addr;
12798 else
12799 return 0;
12800 }
12801 else if (GET_CODE (addr) == PLUS)
12802 {
12803 rtx addends[4], op;
12804 int n = 0, i;
12805
12806 op = addr;
12807 do
12808 {
12809 if (n >= 4)
12810 return 0;
12811 addends[n++] = XEXP (op, 1);
12812 op = XEXP (op, 0);
12813 }
12814 while (GET_CODE (op) == PLUS);
12815 if (n >= 4)
12816 return 0;
12817 addends[n] = op;
12818
12819 for (i = n; i >= 0; --i)
12820 {
12821 op = addends[i];
12822 switch (GET_CODE (op))
12823 {
12824 case MULT:
12825 if (index)
12826 return 0;
12827 index = XEXP (op, 0);
12828 scale_rtx = XEXP (op, 1);
12829 break;
12830
12831 case ASHIFT:
12832 if (index)
12833 return 0;
12834 index = XEXP (op, 0);
12835 tmp = XEXP (op, 1);
12836 if (!CONST_INT_P (tmp))
12837 return 0;
12838 scale = INTVAL (tmp);
12839 if ((unsigned HOST_WIDE_INT) scale > 3)
12840 return 0;
12841 scale = 1 << scale;
12842 break;
12843
12844 case ZERO_EXTEND:
12845 op = XEXP (op, 0);
12846 if (GET_CODE (op) != UNSPEC)
12847 return 0;
12848 /* FALLTHRU */
12849
12850 case UNSPEC:
12851 if (XINT (op, 1) == UNSPEC_TP
12852 && TARGET_TLS_DIRECT_SEG_REFS
12853 && seg == SEG_DEFAULT)
12854 seg = DEFAULT_TLS_SEG_REG;
12855 else
12856 return 0;
12857 break;
12858
12859 case SUBREG:
12860 if (!REG_P (SUBREG_REG (op)))
12861 return 0;
12862 /* FALLTHRU */
12863
12864 case REG:
12865 if (!base)
12866 base = op;
12867 else if (!index)
12868 index = op;
12869 else
12870 return 0;
12871 break;
12872
12873 case CONST:
12874 case CONST_INT:
12875 case SYMBOL_REF:
12876 case LABEL_REF:
12877 if (disp)
12878 return 0;
12879 disp = op;
12880 break;
12881
12882 default:
12883 return 0;
12884 }
12885 }
12886 }
12887 else if (GET_CODE (addr) == MULT)
12888 {
12889 index = XEXP (addr, 0); /* index*scale */
12890 scale_rtx = XEXP (addr, 1);
12891 }
12892 else if (GET_CODE (addr) == ASHIFT)
12893 {
12894 /* We're called for lea too, which implements ashift on occasion. */
12895 index = XEXP (addr, 0);
12896 tmp = XEXP (addr, 1);
12897 if (!CONST_INT_P (tmp))
12898 return 0;
12899 scale = INTVAL (tmp);
12900 if ((unsigned HOST_WIDE_INT) scale > 3)
12901 return 0;
12902 scale = 1 << scale;
12903 retval = -1;
12904 }
12905 else
12906 disp = addr; /* displacement */
12907
12908 if (index)
12909 {
12910 if (REG_P (index))
12911 ;
12912 else if (GET_CODE (index) == SUBREG
12913 && REG_P (SUBREG_REG (index)))
12914 ;
12915 else
12916 return 0;
12917 }
12918
12919 /* Extract the integral value of scale. */
12920 if (scale_rtx)
12921 {
12922 if (!CONST_INT_P (scale_rtx))
12923 return 0;
12924 scale = INTVAL (scale_rtx);
12925 }
12926
12927 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12928 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12929
12930 /* Avoid useless 0 displacement. */
12931 if (disp == const0_rtx && (base || index))
12932 disp = NULL_RTX;
12933
12934 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12935 if (base_reg && index_reg && scale == 1
12936 && (index_reg == arg_pointer_rtx
12937 || index_reg == frame_pointer_rtx
12938 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12939 {
12940 std::swap (base, index);
12941 std::swap (base_reg, index_reg);
12942 }
12943
12944 /* Special case: %ebp cannot be encoded as a base without a displacement.
12945 Similarly %r13. */
12946 if (!disp
12947 && base_reg
12948 && (base_reg == hard_frame_pointer_rtx
12949 || base_reg == frame_pointer_rtx
12950 || base_reg == arg_pointer_rtx
12951 || (REG_P (base_reg)
12952 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12953 || REGNO (base_reg) == R13_REG))))
12954 disp = const0_rtx;
12955
12956 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12957 Avoid this by transforming to [%esi+0].
12958 Reload calls address legitimization without cfun defined, so we need
12959 to test cfun for being non-NULL. */
12960 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12961 && base_reg && !index_reg && !disp
12962 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12963 disp = const0_rtx;
12964
12965 /* Special case: encode reg+reg instead of reg*2. */
12966 if (!base && index && scale == 2)
12967 base = index, base_reg = index_reg, scale = 1;
12968
12969 /* Special case: scaling cannot be encoded without base or displacement. */
12970 if (!base && !disp && index && scale != 1)
12971 disp = const0_rtx;
12972
12973 out->base = base;
12974 out->index = index;
12975 out->disp = disp;
12976 out->scale = scale;
12977 out->seg = seg;
12978
12979 return retval;
12980 }
12981 \f
12982 /* Return cost of the memory address x.
12983 For i386, it is better to use a complex address than let gcc copy
12984 the address into a reg and make a new pseudo. But not if the address
12985 requires to two regs - that would mean more pseudos with longer
12986 lifetimes. */
12987 static int
12988 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12989 {
12990 struct ix86_address parts;
12991 int cost = 1;
12992 int ok = ix86_decompose_address (x, &parts);
12993
12994 gcc_assert (ok);
12995
12996 if (parts.base && GET_CODE (parts.base) == SUBREG)
12997 parts.base = SUBREG_REG (parts.base);
12998 if (parts.index && GET_CODE (parts.index) == SUBREG)
12999 parts.index = SUBREG_REG (parts.index);
13000
13001 /* Attempt to minimize number of registers in the address by increasing
13002 address cost for each used register. We don't increase address cost
13003 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13004 is not invariant itself it most likely means that base or index is not
13005 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13006 which is not profitable for x86. */
13007 if (parts.base
13008 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13009 && (current_pass->type == GIMPLE_PASS
13010 || !pic_offset_table_rtx
13011 || !REG_P (parts.base)
13012 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13013 cost++;
13014
13015 if (parts.index
13016 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13017 && (current_pass->type == GIMPLE_PASS
13018 || !pic_offset_table_rtx
13019 || !REG_P (parts.index)
13020 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13021 cost++;
13022
13023 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13024 since it's predecode logic can't detect the length of instructions
13025 and it degenerates to vector decoded. Increase cost of such
13026 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13027 to split such addresses or even refuse such addresses at all.
13028
13029 Following addressing modes are affected:
13030 [base+scale*index]
13031 [scale*index+disp]
13032 [base+index]
13033
13034 The first and last case may be avoidable by explicitly coding the zero in
13035 memory address, but I don't have AMD-K6 machine handy to check this
13036 theory. */
13037
13038 if (TARGET_K6
13039 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13040 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13041 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13042 cost += 10;
13043
13044 return cost;
13045 }
13046 \f
13047 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13048 this is used for to form addresses to local data when -fPIC is in
13049 use. */
13050
13051 static bool
13052 darwin_local_data_pic (rtx disp)
13053 {
13054 return (GET_CODE (disp) == UNSPEC
13055 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13056 }
13057
13058 /* Determine if a given RTX is a valid constant. We already know this
13059 satisfies CONSTANT_P. */
13060
13061 static bool
13062 ix86_legitimate_constant_p (machine_mode, rtx x)
13063 {
13064 /* Pointer bounds constants are not valid. */
13065 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13066 return false;
13067
13068 switch (GET_CODE (x))
13069 {
13070 case CONST:
13071 x = XEXP (x, 0);
13072
13073 if (GET_CODE (x) == PLUS)
13074 {
13075 if (!CONST_INT_P (XEXP (x, 1)))
13076 return false;
13077 x = XEXP (x, 0);
13078 }
13079
13080 if (TARGET_MACHO && darwin_local_data_pic (x))
13081 return true;
13082
13083 /* Only some unspecs are valid as "constants". */
13084 if (GET_CODE (x) == UNSPEC)
13085 switch (XINT (x, 1))
13086 {
13087 case UNSPEC_GOT:
13088 case UNSPEC_GOTOFF:
13089 case UNSPEC_PLTOFF:
13090 return TARGET_64BIT;
13091 case UNSPEC_TPOFF:
13092 case UNSPEC_NTPOFF:
13093 x = XVECEXP (x, 0, 0);
13094 return (GET_CODE (x) == SYMBOL_REF
13095 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13096 case UNSPEC_DTPOFF:
13097 x = XVECEXP (x, 0, 0);
13098 return (GET_CODE (x) == SYMBOL_REF
13099 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13100 default:
13101 return false;
13102 }
13103
13104 /* We must have drilled down to a symbol. */
13105 if (GET_CODE (x) == LABEL_REF)
13106 return true;
13107 if (GET_CODE (x) != SYMBOL_REF)
13108 return false;
13109 /* FALLTHRU */
13110
13111 case SYMBOL_REF:
13112 /* TLS symbols are never valid. */
13113 if (SYMBOL_REF_TLS_MODEL (x))
13114 return false;
13115
13116 /* DLLIMPORT symbols are never valid. */
13117 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13118 && SYMBOL_REF_DLLIMPORT_P (x))
13119 return false;
13120
13121 #if TARGET_MACHO
13122 /* mdynamic-no-pic */
13123 if (MACHO_DYNAMIC_NO_PIC_P)
13124 return machopic_symbol_defined_p (x);
13125 #endif
13126 break;
13127
13128 case CONST_WIDE_INT:
13129 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13130 return false;
13131 break;
13132
13133 case CONST_VECTOR:
13134 if (!standard_sse_constant_p (x))
13135 return false;
13136
13137 default:
13138 break;
13139 }
13140
13141 /* Otherwise we handle everything else in the move patterns. */
13142 return true;
13143 }
13144
13145 /* Determine if it's legal to put X into the constant pool. This
13146 is not possible for the address of thread-local symbols, which
13147 is checked above. */
13148
13149 static bool
13150 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13151 {
13152 /* We can always put integral constants and vectors in memory. */
13153 switch (GET_CODE (x))
13154 {
13155 case CONST_INT:
13156 case CONST_WIDE_INT:
13157 case CONST_DOUBLE:
13158 case CONST_VECTOR:
13159 return false;
13160
13161 default:
13162 break;
13163 }
13164 return !ix86_legitimate_constant_p (mode, x);
13165 }
13166
13167 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13168 otherwise zero. */
13169
13170 static bool
13171 is_imported_p (rtx x)
13172 {
13173 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13174 || GET_CODE (x) != SYMBOL_REF)
13175 return false;
13176
13177 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13178 }
13179
13180
13181 /* Nonzero if the constant value X is a legitimate general operand
13182 when generating PIC code. It is given that flag_pic is on and
13183 that X satisfies CONSTANT_P. */
13184
13185 bool
13186 legitimate_pic_operand_p (rtx x)
13187 {
13188 rtx inner;
13189
13190 switch (GET_CODE (x))
13191 {
13192 case CONST:
13193 inner = XEXP (x, 0);
13194 if (GET_CODE (inner) == PLUS
13195 && CONST_INT_P (XEXP (inner, 1)))
13196 inner = XEXP (inner, 0);
13197
13198 /* Only some unspecs are valid as "constants". */
13199 if (GET_CODE (inner) == UNSPEC)
13200 switch (XINT (inner, 1))
13201 {
13202 case UNSPEC_GOT:
13203 case UNSPEC_GOTOFF:
13204 case UNSPEC_PLTOFF:
13205 return TARGET_64BIT;
13206 case UNSPEC_TPOFF:
13207 x = XVECEXP (inner, 0, 0);
13208 return (GET_CODE (x) == SYMBOL_REF
13209 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13210 case UNSPEC_MACHOPIC_OFFSET:
13211 return legitimate_pic_address_disp_p (x);
13212 default:
13213 return false;
13214 }
13215 /* FALLTHRU */
13216
13217 case SYMBOL_REF:
13218 case LABEL_REF:
13219 return legitimate_pic_address_disp_p (x);
13220
13221 default:
13222 return true;
13223 }
13224 }
13225
13226 /* Determine if a given CONST RTX is a valid memory displacement
13227 in PIC mode. */
13228
13229 bool
13230 legitimate_pic_address_disp_p (rtx disp)
13231 {
13232 bool saw_plus;
13233
13234 /* In 64bit mode we can allow direct addresses of symbols and labels
13235 when they are not dynamic symbols. */
13236 if (TARGET_64BIT)
13237 {
13238 rtx op0 = disp, op1;
13239
13240 switch (GET_CODE (disp))
13241 {
13242 case LABEL_REF:
13243 return true;
13244
13245 case CONST:
13246 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13247 break;
13248 op0 = XEXP (XEXP (disp, 0), 0);
13249 op1 = XEXP (XEXP (disp, 0), 1);
13250 if (!CONST_INT_P (op1)
13251 || INTVAL (op1) >= 16*1024*1024
13252 || INTVAL (op1) < -16*1024*1024)
13253 break;
13254 if (GET_CODE (op0) == LABEL_REF)
13255 return true;
13256 if (GET_CODE (op0) == CONST
13257 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13258 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13259 return true;
13260 if (GET_CODE (op0) == UNSPEC
13261 && XINT (op0, 1) == UNSPEC_PCREL)
13262 return true;
13263 if (GET_CODE (op0) != SYMBOL_REF)
13264 break;
13265 /* FALLTHRU */
13266
13267 case SYMBOL_REF:
13268 /* TLS references should always be enclosed in UNSPEC.
13269 The dllimported symbol needs always to be resolved. */
13270 if (SYMBOL_REF_TLS_MODEL (op0)
13271 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13272 return false;
13273
13274 if (TARGET_PECOFF)
13275 {
13276 if (is_imported_p (op0))
13277 return true;
13278
13279 if (SYMBOL_REF_FAR_ADDR_P (op0)
13280 || !SYMBOL_REF_LOCAL_P (op0))
13281 break;
13282
13283 /* Function-symbols need to be resolved only for
13284 large-model.
13285 For the small-model we don't need to resolve anything
13286 here. */
13287 if ((ix86_cmodel != CM_LARGE_PIC
13288 && SYMBOL_REF_FUNCTION_P (op0))
13289 || ix86_cmodel == CM_SMALL_PIC)
13290 return true;
13291 /* Non-external symbols don't need to be resolved for
13292 large, and medium-model. */
13293 if ((ix86_cmodel == CM_LARGE_PIC
13294 || ix86_cmodel == CM_MEDIUM_PIC)
13295 && !SYMBOL_REF_EXTERNAL_P (op0))
13296 return true;
13297 }
13298 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13299 && (SYMBOL_REF_LOCAL_P (op0)
13300 || (HAVE_LD_PIE_COPYRELOC
13301 && flag_pie
13302 && !SYMBOL_REF_WEAK (op0)
13303 && !SYMBOL_REF_FUNCTION_P (op0)))
13304 && ix86_cmodel != CM_LARGE_PIC)
13305 return true;
13306 break;
13307
13308 default:
13309 break;
13310 }
13311 }
13312 if (GET_CODE (disp) != CONST)
13313 return false;
13314 disp = XEXP (disp, 0);
13315
13316 if (TARGET_64BIT)
13317 {
13318 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13319 of GOT tables. We should not need these anyway. */
13320 if (GET_CODE (disp) != UNSPEC
13321 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13322 && XINT (disp, 1) != UNSPEC_GOTOFF
13323 && XINT (disp, 1) != UNSPEC_PCREL
13324 && XINT (disp, 1) != UNSPEC_PLTOFF))
13325 return false;
13326
13327 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13328 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13329 return false;
13330 return true;
13331 }
13332
13333 saw_plus = false;
13334 if (GET_CODE (disp) == PLUS)
13335 {
13336 if (!CONST_INT_P (XEXP (disp, 1)))
13337 return false;
13338 disp = XEXP (disp, 0);
13339 saw_plus = true;
13340 }
13341
13342 if (TARGET_MACHO && darwin_local_data_pic (disp))
13343 return true;
13344
13345 if (GET_CODE (disp) != UNSPEC)
13346 return false;
13347
13348 switch (XINT (disp, 1))
13349 {
13350 case UNSPEC_GOT:
13351 if (saw_plus)
13352 return false;
13353 /* We need to check for both symbols and labels because VxWorks loads
13354 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13355 details. */
13356 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13357 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13358 case UNSPEC_GOTOFF:
13359 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13360 While ABI specify also 32bit relocation but we don't produce it in
13361 small PIC model at all. */
13362 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13363 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13364 && !TARGET_64BIT)
13365 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13366 return false;
13367 case UNSPEC_GOTTPOFF:
13368 case UNSPEC_GOTNTPOFF:
13369 case UNSPEC_INDNTPOFF:
13370 if (saw_plus)
13371 return false;
13372 disp = XVECEXP (disp, 0, 0);
13373 return (GET_CODE (disp) == SYMBOL_REF
13374 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13375 case UNSPEC_NTPOFF:
13376 disp = XVECEXP (disp, 0, 0);
13377 return (GET_CODE (disp) == SYMBOL_REF
13378 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13379 case UNSPEC_DTPOFF:
13380 disp = XVECEXP (disp, 0, 0);
13381 return (GET_CODE (disp) == SYMBOL_REF
13382 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13383 }
13384
13385 return false;
13386 }
13387
13388 /* Determine if op is suitable RTX for an address register.
13389 Return naked register if a register or a register subreg is
13390 found, otherwise return NULL_RTX. */
13391
13392 static rtx
13393 ix86_validate_address_register (rtx op)
13394 {
13395 machine_mode mode = GET_MODE (op);
13396
13397 /* Only SImode or DImode registers can form the address. */
13398 if (mode != SImode && mode != DImode)
13399 return NULL_RTX;
13400
13401 if (REG_P (op))
13402 return op;
13403 else if (GET_CODE (op) == SUBREG)
13404 {
13405 rtx reg = SUBREG_REG (op);
13406
13407 if (!REG_P (reg))
13408 return NULL_RTX;
13409
13410 mode = GET_MODE (reg);
13411
13412 /* Don't allow SUBREGs that span more than a word. It can
13413 lead to spill failures when the register is one word out
13414 of a two word structure. */
13415 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13416 return NULL_RTX;
13417
13418 /* Allow only SUBREGs of non-eliminable hard registers. */
13419 if (register_no_elim_operand (reg, mode))
13420 return reg;
13421 }
13422
13423 /* Op is not a register. */
13424 return NULL_RTX;
13425 }
13426
13427 /* Recognizes RTL expressions that are valid memory addresses for an
13428 instruction. The MODE argument is the machine mode for the MEM
13429 expression that wants to use this address.
13430
13431 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13432 convert common non-canonical forms to canonical form so that they will
13433 be recognized. */
13434
13435 static bool
13436 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13437 {
13438 struct ix86_address parts;
13439 rtx base, index, disp;
13440 HOST_WIDE_INT scale;
13441 enum ix86_address_seg seg;
13442
13443 if (ix86_decompose_address (addr, &parts) <= 0)
13444 /* Decomposition failed. */
13445 return false;
13446
13447 base = parts.base;
13448 index = parts.index;
13449 disp = parts.disp;
13450 scale = parts.scale;
13451 seg = parts.seg;
13452
13453 /* Validate base register. */
13454 if (base)
13455 {
13456 rtx reg = ix86_validate_address_register (base);
13457
13458 if (reg == NULL_RTX)
13459 return false;
13460
13461 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13462 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13463 /* Base is not valid. */
13464 return false;
13465 }
13466
13467 /* Validate index register. */
13468 if (index)
13469 {
13470 rtx reg = ix86_validate_address_register (index);
13471
13472 if (reg == NULL_RTX)
13473 return false;
13474
13475 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13476 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13477 /* Index is not valid. */
13478 return false;
13479 }
13480
13481 /* Index and base should have the same mode. */
13482 if (base && index
13483 && GET_MODE (base) != GET_MODE (index))
13484 return false;
13485
13486 /* Address override works only on the (%reg) part of %fs:(%reg). */
13487 if (seg != SEG_DEFAULT
13488 && ((base && GET_MODE (base) != word_mode)
13489 || (index && GET_MODE (index) != word_mode)))
13490 return false;
13491
13492 /* Validate scale factor. */
13493 if (scale != 1)
13494 {
13495 if (!index)
13496 /* Scale without index. */
13497 return false;
13498
13499 if (scale != 2 && scale != 4 && scale != 8)
13500 /* Scale is not a valid multiplier. */
13501 return false;
13502 }
13503
13504 /* Validate displacement. */
13505 if (disp)
13506 {
13507 if (GET_CODE (disp) == CONST
13508 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13509 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13510 switch (XINT (XEXP (disp, 0), 1))
13511 {
13512 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13513 used. While ABI specify also 32bit relocations, we don't produce
13514 them at all and use IP relative instead. */
13515 case UNSPEC_GOT:
13516 case UNSPEC_GOTOFF:
13517 gcc_assert (flag_pic);
13518 if (!TARGET_64BIT)
13519 goto is_legitimate_pic;
13520
13521 /* 64bit address unspec. */
13522 return false;
13523
13524 case UNSPEC_GOTPCREL:
13525 case UNSPEC_PCREL:
13526 gcc_assert (flag_pic);
13527 goto is_legitimate_pic;
13528
13529 case UNSPEC_GOTTPOFF:
13530 case UNSPEC_GOTNTPOFF:
13531 case UNSPEC_INDNTPOFF:
13532 case UNSPEC_NTPOFF:
13533 case UNSPEC_DTPOFF:
13534 break;
13535
13536 case UNSPEC_STACK_CHECK:
13537 gcc_assert (flag_split_stack);
13538 break;
13539
13540 default:
13541 /* Invalid address unspec. */
13542 return false;
13543 }
13544
13545 else if (SYMBOLIC_CONST (disp)
13546 && (flag_pic
13547 || (TARGET_MACHO
13548 #if TARGET_MACHO
13549 && MACHOPIC_INDIRECT
13550 && !machopic_operand_p (disp)
13551 #endif
13552 )))
13553 {
13554
13555 is_legitimate_pic:
13556 if (TARGET_64BIT && (index || base))
13557 {
13558 /* foo@dtpoff(%rX) is ok. */
13559 if (GET_CODE (disp) != CONST
13560 || GET_CODE (XEXP (disp, 0)) != PLUS
13561 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13562 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13563 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13564 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13565 /* Non-constant pic memory reference. */
13566 return false;
13567 }
13568 else if ((!TARGET_MACHO || flag_pic)
13569 && ! legitimate_pic_address_disp_p (disp))
13570 /* Displacement is an invalid pic construct. */
13571 return false;
13572 #if TARGET_MACHO
13573 else if (MACHO_DYNAMIC_NO_PIC_P
13574 && !ix86_legitimate_constant_p (Pmode, disp))
13575 /* displacment must be referenced via non_lazy_pointer */
13576 return false;
13577 #endif
13578
13579 /* This code used to verify that a symbolic pic displacement
13580 includes the pic_offset_table_rtx register.
13581
13582 While this is good idea, unfortunately these constructs may
13583 be created by "adds using lea" optimization for incorrect
13584 code like:
13585
13586 int a;
13587 int foo(int i)
13588 {
13589 return *(&a+i);
13590 }
13591
13592 This code is nonsensical, but results in addressing
13593 GOT table with pic_offset_table_rtx base. We can't
13594 just refuse it easily, since it gets matched by
13595 "addsi3" pattern, that later gets split to lea in the
13596 case output register differs from input. While this
13597 can be handled by separate addsi pattern for this case
13598 that never results in lea, this seems to be easier and
13599 correct fix for crash to disable this test. */
13600 }
13601 else if (GET_CODE (disp) != LABEL_REF
13602 && !CONST_INT_P (disp)
13603 && (GET_CODE (disp) != CONST
13604 || !ix86_legitimate_constant_p (Pmode, disp))
13605 && (GET_CODE (disp) != SYMBOL_REF
13606 || !ix86_legitimate_constant_p (Pmode, disp)))
13607 /* Displacement is not constant. */
13608 return false;
13609 else if (TARGET_64BIT
13610 && !x86_64_immediate_operand (disp, VOIDmode))
13611 /* Displacement is out of range. */
13612 return false;
13613 /* In x32 mode, constant addresses are sign extended to 64bit, so
13614 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13615 else if (TARGET_X32 && !(index || base)
13616 && CONST_INT_P (disp)
13617 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13618 return false;
13619 }
13620
13621 /* Everything looks valid. */
13622 return true;
13623 }
13624
13625 /* Determine if a given RTX is a valid constant address. */
13626
13627 bool
13628 constant_address_p (rtx x)
13629 {
13630 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13631 }
13632 \f
13633 /* Return a unique alias set for the GOT. */
13634
13635 static alias_set_type
13636 ix86_GOT_alias_set (void)
13637 {
13638 static alias_set_type set = -1;
13639 if (set == -1)
13640 set = new_alias_set ();
13641 return set;
13642 }
13643
13644 /* Return a legitimate reference for ORIG (an address) using the
13645 register REG. If REG is 0, a new pseudo is generated.
13646
13647 There are two types of references that must be handled:
13648
13649 1. Global data references must load the address from the GOT, via
13650 the PIC reg. An insn is emitted to do this load, and the reg is
13651 returned.
13652
13653 2. Static data references, constant pool addresses, and code labels
13654 compute the address as an offset from the GOT, whose base is in
13655 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13656 differentiate them from global data objects. The returned
13657 address is the PIC reg + an unspec constant.
13658
13659 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13660 reg also appears in the address. */
13661
13662 static rtx
13663 legitimize_pic_address (rtx orig, rtx reg)
13664 {
13665 rtx addr = orig;
13666 rtx new_rtx = orig;
13667
13668 #if TARGET_MACHO
13669 if (TARGET_MACHO && !TARGET_64BIT)
13670 {
13671 if (reg == 0)
13672 reg = gen_reg_rtx (Pmode);
13673 /* Use the generic Mach-O PIC machinery. */
13674 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13675 }
13676 #endif
13677
13678 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13679 {
13680 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13681 if (tmp)
13682 return tmp;
13683 }
13684
13685 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13686 new_rtx = addr;
13687 else if (TARGET_64BIT && !TARGET_PECOFF
13688 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13689 {
13690 rtx tmpreg;
13691 /* This symbol may be referenced via a displacement from the PIC
13692 base address (@GOTOFF). */
13693
13694 if (GET_CODE (addr) == CONST)
13695 addr = XEXP (addr, 0);
13696 if (GET_CODE (addr) == PLUS)
13697 {
13698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13699 UNSPEC_GOTOFF);
13700 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13701 }
13702 else
13703 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13704 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13705 if (!reg)
13706 tmpreg = gen_reg_rtx (Pmode);
13707 else
13708 tmpreg = reg;
13709 emit_move_insn (tmpreg, new_rtx);
13710
13711 if (reg != 0)
13712 {
13713 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13714 tmpreg, 1, OPTAB_DIRECT);
13715 new_rtx = reg;
13716 }
13717 else
13718 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13719 }
13720 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13721 {
13722 /* This symbol may be referenced via a displacement from the PIC
13723 base address (@GOTOFF). */
13724
13725 if (GET_CODE (addr) == CONST)
13726 addr = XEXP (addr, 0);
13727 if (GET_CODE (addr) == PLUS)
13728 {
13729 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13730 UNSPEC_GOTOFF);
13731 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13732 }
13733 else
13734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13735 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13736 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13737
13738 if (reg != 0)
13739 {
13740 emit_move_insn (reg, new_rtx);
13741 new_rtx = reg;
13742 }
13743 }
13744 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13745 /* We can't use @GOTOFF for text labels on VxWorks;
13746 see gotoff_operand. */
13747 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13748 {
13749 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13750 if (tmp)
13751 return tmp;
13752
13753 /* For x64 PE-COFF there is no GOT table. So we use address
13754 directly. */
13755 if (TARGET_64BIT && TARGET_PECOFF)
13756 {
13757 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13758 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13759
13760 if (reg == 0)
13761 reg = gen_reg_rtx (Pmode);
13762 emit_move_insn (reg, new_rtx);
13763 new_rtx = reg;
13764 }
13765 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13766 {
13767 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13768 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13769 new_rtx = gen_const_mem (Pmode, new_rtx);
13770 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13771
13772 if (reg == 0)
13773 reg = gen_reg_rtx (Pmode);
13774 /* Use directly gen_movsi, otherwise the address is loaded
13775 into register for CSE. We don't want to CSE this addresses,
13776 instead we CSE addresses from the GOT table, so skip this. */
13777 emit_insn (gen_movsi (reg, new_rtx));
13778 new_rtx = reg;
13779 }
13780 else
13781 {
13782 /* This symbol must be referenced via a load from the
13783 Global Offset Table (@GOT). */
13784
13785 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13786 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13787 if (TARGET_64BIT)
13788 new_rtx = force_reg (Pmode, new_rtx);
13789 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13790 new_rtx = gen_const_mem (Pmode, new_rtx);
13791 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13792
13793 if (reg == 0)
13794 reg = gen_reg_rtx (Pmode);
13795 emit_move_insn (reg, new_rtx);
13796 new_rtx = reg;
13797 }
13798 }
13799 else
13800 {
13801 if (CONST_INT_P (addr)
13802 && !x86_64_immediate_operand (addr, VOIDmode))
13803 {
13804 if (reg)
13805 {
13806 emit_move_insn (reg, addr);
13807 new_rtx = reg;
13808 }
13809 else
13810 new_rtx = force_reg (Pmode, addr);
13811 }
13812 else if (GET_CODE (addr) == CONST)
13813 {
13814 addr = XEXP (addr, 0);
13815
13816 /* We must match stuff we generate before. Assume the only
13817 unspecs that can get here are ours. Not that we could do
13818 anything with them anyway.... */
13819 if (GET_CODE (addr) == UNSPEC
13820 || (GET_CODE (addr) == PLUS
13821 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13822 return orig;
13823 gcc_assert (GET_CODE (addr) == PLUS);
13824 }
13825 if (GET_CODE (addr) == PLUS)
13826 {
13827 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13828
13829 /* Check first to see if this is a constant offset from a @GOTOFF
13830 symbol reference. */
13831 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13832 && CONST_INT_P (op1))
13833 {
13834 if (!TARGET_64BIT)
13835 {
13836 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13837 UNSPEC_GOTOFF);
13838 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13839 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13840 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13841
13842 if (reg != 0)
13843 {
13844 emit_move_insn (reg, new_rtx);
13845 new_rtx = reg;
13846 }
13847 }
13848 else
13849 {
13850 if (INTVAL (op1) < -16*1024*1024
13851 || INTVAL (op1) >= 16*1024*1024)
13852 {
13853 if (!x86_64_immediate_operand (op1, Pmode))
13854 op1 = force_reg (Pmode, op1);
13855 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13856 }
13857 }
13858 }
13859 else
13860 {
13861 rtx base = legitimize_pic_address (op0, reg);
13862 machine_mode mode = GET_MODE (base);
13863 new_rtx
13864 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13865
13866 if (CONST_INT_P (new_rtx))
13867 {
13868 if (INTVAL (new_rtx) < -16*1024*1024
13869 || INTVAL (new_rtx) >= 16*1024*1024)
13870 {
13871 if (!x86_64_immediate_operand (new_rtx, mode))
13872 new_rtx = force_reg (mode, new_rtx);
13873 new_rtx
13874 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13875 }
13876 else
13877 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13878 }
13879 else
13880 {
13881 /* For %rip addressing, we have to use just disp32, not
13882 base nor index. */
13883 if (TARGET_64BIT
13884 && (GET_CODE (base) == SYMBOL_REF
13885 || GET_CODE (base) == LABEL_REF))
13886 base = force_reg (mode, base);
13887 if (GET_CODE (new_rtx) == PLUS
13888 && CONSTANT_P (XEXP (new_rtx, 1)))
13889 {
13890 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13891 new_rtx = XEXP (new_rtx, 1);
13892 }
13893 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13894 }
13895 }
13896 }
13897 }
13898 return new_rtx;
13899 }
13900 \f
13901 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13902
13903 static rtx
13904 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13905 {
13906 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13907
13908 if (GET_MODE (tp) != tp_mode)
13909 {
13910 gcc_assert (GET_MODE (tp) == SImode);
13911 gcc_assert (tp_mode == DImode);
13912
13913 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13914 }
13915
13916 if (to_reg)
13917 tp = copy_to_mode_reg (tp_mode, tp);
13918
13919 return tp;
13920 }
13921
13922 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13923
13924 static GTY(()) rtx ix86_tls_symbol;
13925
13926 static rtx
13927 ix86_tls_get_addr (void)
13928 {
13929 if (!ix86_tls_symbol)
13930 {
13931 const char *sym
13932 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13933 ? "___tls_get_addr" : "__tls_get_addr");
13934
13935 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13936 }
13937
13938 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13939 {
13940 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13941 UNSPEC_PLTOFF);
13942 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13943 gen_rtx_CONST (Pmode, unspec));
13944 }
13945
13946 return ix86_tls_symbol;
13947 }
13948
13949 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13950
13951 static GTY(()) rtx ix86_tls_module_base_symbol;
13952
13953 rtx
13954 ix86_tls_module_base (void)
13955 {
13956 if (!ix86_tls_module_base_symbol)
13957 {
13958 ix86_tls_module_base_symbol
13959 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13960
13961 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13962 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13963 }
13964
13965 return ix86_tls_module_base_symbol;
13966 }
13967
13968 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13969 false if we expect this to be used for a memory address and true if
13970 we expect to load the address into a register. */
13971
13972 static rtx
13973 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13974 {
13975 rtx dest, base, off;
13976 rtx pic = NULL_RTX, tp = NULL_RTX;
13977 machine_mode tp_mode = Pmode;
13978 int type;
13979
13980 /* Fall back to global dynamic model if tool chain cannot support local
13981 dynamic. */
13982 if (TARGET_SUN_TLS && !TARGET_64BIT
13983 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13984 && model == TLS_MODEL_LOCAL_DYNAMIC)
13985 model = TLS_MODEL_GLOBAL_DYNAMIC;
13986
13987 switch (model)
13988 {
13989 case TLS_MODEL_GLOBAL_DYNAMIC:
13990 dest = gen_reg_rtx (Pmode);
13991
13992 if (!TARGET_64BIT)
13993 {
13994 if (flag_pic && !TARGET_PECOFF)
13995 pic = pic_offset_table_rtx;
13996 else
13997 {
13998 pic = gen_reg_rtx (Pmode);
13999 emit_insn (gen_set_got (pic));
14000 }
14001 }
14002
14003 if (TARGET_GNU2_TLS)
14004 {
14005 if (TARGET_64BIT)
14006 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14007 else
14008 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14009
14010 tp = get_thread_pointer (Pmode, true);
14011 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14012
14013 if (GET_MODE (x) != Pmode)
14014 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14015
14016 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14017 }
14018 else
14019 {
14020 rtx caddr = ix86_tls_get_addr ();
14021
14022 if (TARGET_64BIT)
14023 {
14024 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14025 rtx_insn *insns;
14026
14027 start_sequence ();
14028 emit_call_insn
14029 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14030 insns = get_insns ();
14031 end_sequence ();
14032
14033 if (GET_MODE (x) != Pmode)
14034 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14035
14036 RTL_CONST_CALL_P (insns) = 1;
14037 emit_libcall_block (insns, dest, rax, x);
14038 }
14039 else
14040 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14041 }
14042 break;
14043
14044 case TLS_MODEL_LOCAL_DYNAMIC:
14045 base = gen_reg_rtx (Pmode);
14046
14047 if (!TARGET_64BIT)
14048 {
14049 if (flag_pic)
14050 pic = pic_offset_table_rtx;
14051 else
14052 {
14053 pic = gen_reg_rtx (Pmode);
14054 emit_insn (gen_set_got (pic));
14055 }
14056 }
14057
14058 if (TARGET_GNU2_TLS)
14059 {
14060 rtx tmp = ix86_tls_module_base ();
14061
14062 if (TARGET_64BIT)
14063 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14064 else
14065 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14066
14067 tp = get_thread_pointer (Pmode, true);
14068 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14069 gen_rtx_MINUS (Pmode, tmp, tp));
14070 }
14071 else
14072 {
14073 rtx caddr = ix86_tls_get_addr ();
14074
14075 if (TARGET_64BIT)
14076 {
14077 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14078 rtx_insn *insns;
14079 rtx eqv;
14080
14081 start_sequence ();
14082 emit_call_insn
14083 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14084 insns = get_insns ();
14085 end_sequence ();
14086
14087 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14088 share the LD_BASE result with other LD model accesses. */
14089 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14090 UNSPEC_TLS_LD_BASE);
14091
14092 RTL_CONST_CALL_P (insns) = 1;
14093 emit_libcall_block (insns, base, rax, eqv);
14094 }
14095 else
14096 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14097 }
14098
14099 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14100 off = gen_rtx_CONST (Pmode, off);
14101
14102 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14103
14104 if (TARGET_GNU2_TLS)
14105 {
14106 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14107
14108 if (GET_MODE (x) != Pmode)
14109 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14110
14111 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14112 }
14113 break;
14114
14115 case TLS_MODEL_INITIAL_EXEC:
14116 if (TARGET_64BIT)
14117 {
14118 if (TARGET_SUN_TLS && !TARGET_X32)
14119 {
14120 /* The Sun linker took the AMD64 TLS spec literally
14121 and can only handle %rax as destination of the
14122 initial executable code sequence. */
14123
14124 dest = gen_reg_rtx (DImode);
14125 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14126 return dest;
14127 }
14128
14129 /* Generate DImode references to avoid %fs:(%reg32)
14130 problems and linker IE->LE relaxation bug. */
14131 tp_mode = DImode;
14132 pic = NULL;
14133 type = UNSPEC_GOTNTPOFF;
14134 }
14135 else if (flag_pic)
14136 {
14137 pic = pic_offset_table_rtx;
14138 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14139 }
14140 else if (!TARGET_ANY_GNU_TLS)
14141 {
14142 pic = gen_reg_rtx (Pmode);
14143 emit_insn (gen_set_got (pic));
14144 type = UNSPEC_GOTTPOFF;
14145 }
14146 else
14147 {
14148 pic = NULL;
14149 type = UNSPEC_INDNTPOFF;
14150 }
14151
14152 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14153 off = gen_rtx_CONST (tp_mode, off);
14154 if (pic)
14155 off = gen_rtx_PLUS (tp_mode, pic, off);
14156 off = gen_const_mem (tp_mode, off);
14157 set_mem_alias_set (off, ix86_GOT_alias_set ());
14158
14159 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14160 {
14161 base = get_thread_pointer (tp_mode,
14162 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14163 off = force_reg (tp_mode, off);
14164 return gen_rtx_PLUS (tp_mode, base, off);
14165 }
14166 else
14167 {
14168 base = get_thread_pointer (Pmode, true);
14169 dest = gen_reg_rtx (Pmode);
14170 emit_insn (ix86_gen_sub3 (dest, base, off));
14171 }
14172 break;
14173
14174 case TLS_MODEL_LOCAL_EXEC:
14175 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14176 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14177 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14178 off = gen_rtx_CONST (Pmode, off);
14179
14180 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14181 {
14182 base = get_thread_pointer (Pmode,
14183 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14184 return gen_rtx_PLUS (Pmode, base, off);
14185 }
14186 else
14187 {
14188 base = get_thread_pointer (Pmode, true);
14189 dest = gen_reg_rtx (Pmode);
14190 emit_insn (ix86_gen_sub3 (dest, base, off));
14191 }
14192 break;
14193
14194 default:
14195 gcc_unreachable ();
14196 }
14197
14198 return dest;
14199 }
14200
14201 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14202 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14203 unique refptr-DECL symbol corresponding to symbol DECL. */
14204
14205 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14206 {
14207 static inline hashval_t hash (tree_map *m) { return m->hash; }
14208 static inline bool
14209 equal (tree_map *a, tree_map *b)
14210 {
14211 return a->base.from == b->base.from;
14212 }
14213
14214 static void
14215 handle_cache_entry (tree_map *&m)
14216 {
14217 extern void gt_ggc_mx (tree_map *&);
14218 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14219 return;
14220 else if (ggc_marked_p (m->base.from))
14221 gt_ggc_mx (m);
14222 else
14223 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14224 }
14225 };
14226
14227 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14228
14229 static tree
14230 get_dllimport_decl (tree decl, bool beimport)
14231 {
14232 struct tree_map *h, in;
14233 const char *name;
14234 const char *prefix;
14235 size_t namelen, prefixlen;
14236 char *imp_name;
14237 tree to;
14238 rtx rtl;
14239
14240 if (!dllimport_map)
14241 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14242
14243 in.hash = htab_hash_pointer (decl);
14244 in.base.from = decl;
14245 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14246 h = *loc;
14247 if (h)
14248 return h->to;
14249
14250 *loc = h = ggc_alloc<tree_map> ();
14251 h->hash = in.hash;
14252 h->base.from = decl;
14253 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14254 VAR_DECL, NULL, ptr_type_node);
14255 DECL_ARTIFICIAL (to) = 1;
14256 DECL_IGNORED_P (to) = 1;
14257 DECL_EXTERNAL (to) = 1;
14258 TREE_READONLY (to) = 1;
14259
14260 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14261 name = targetm.strip_name_encoding (name);
14262 if (beimport)
14263 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14264 ? "*__imp_" : "*__imp__";
14265 else
14266 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14267 namelen = strlen (name);
14268 prefixlen = strlen (prefix);
14269 imp_name = (char *) alloca (namelen + prefixlen + 1);
14270 memcpy (imp_name, prefix, prefixlen);
14271 memcpy (imp_name + prefixlen, name, namelen + 1);
14272
14273 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14274 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14275 SET_SYMBOL_REF_DECL (rtl, to);
14276 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14277 if (!beimport)
14278 {
14279 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14280 #ifdef SUB_TARGET_RECORD_STUB
14281 SUB_TARGET_RECORD_STUB (name);
14282 #endif
14283 }
14284
14285 rtl = gen_const_mem (Pmode, rtl);
14286 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14287
14288 SET_DECL_RTL (to, rtl);
14289 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14290
14291 return to;
14292 }
14293
14294 /* Expand SYMBOL into its corresponding far-addresse symbol.
14295 WANT_REG is true if we require the result be a register. */
14296
14297 static rtx
14298 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14299 {
14300 tree imp_decl;
14301 rtx x;
14302
14303 gcc_assert (SYMBOL_REF_DECL (symbol));
14304 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14305
14306 x = DECL_RTL (imp_decl);
14307 if (want_reg)
14308 x = force_reg (Pmode, x);
14309 return x;
14310 }
14311
14312 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14313 true if we require the result be a register. */
14314
14315 static rtx
14316 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14317 {
14318 tree imp_decl;
14319 rtx x;
14320
14321 gcc_assert (SYMBOL_REF_DECL (symbol));
14322 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14323
14324 x = DECL_RTL (imp_decl);
14325 if (want_reg)
14326 x = force_reg (Pmode, x);
14327 return x;
14328 }
14329
14330 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14331 is true if we require the result be a register. */
14332
14333 static rtx
14334 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14335 {
14336 if (!TARGET_PECOFF)
14337 return NULL_RTX;
14338
14339 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14340 {
14341 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14342 return legitimize_dllimport_symbol (addr, inreg);
14343 if (GET_CODE (addr) == CONST
14344 && GET_CODE (XEXP (addr, 0)) == PLUS
14345 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14346 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14347 {
14348 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14349 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14350 }
14351 }
14352
14353 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14354 return NULL_RTX;
14355 if (GET_CODE (addr) == SYMBOL_REF
14356 && !is_imported_p (addr)
14357 && SYMBOL_REF_EXTERNAL_P (addr)
14358 && SYMBOL_REF_DECL (addr))
14359 return legitimize_pe_coff_extern_decl (addr, inreg);
14360
14361 if (GET_CODE (addr) == CONST
14362 && GET_CODE (XEXP (addr, 0)) == PLUS
14363 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14364 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14365 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14366 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14367 {
14368 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14369 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14370 }
14371 return NULL_RTX;
14372 }
14373
14374 /* Try machine-dependent ways of modifying an illegitimate address
14375 to be legitimate. If we find one, return the new, valid address.
14376 This macro is used in only one place: `memory_address' in explow.c.
14377
14378 OLDX is the address as it was before break_out_memory_refs was called.
14379 In some cases it is useful to look at this to decide what needs to be done.
14380
14381 It is always safe for this macro to do nothing. It exists to recognize
14382 opportunities to optimize the output.
14383
14384 For the 80386, we handle X+REG by loading X into a register R and
14385 using R+REG. R will go in a general reg and indexing will be used.
14386 However, if REG is a broken-out memory address or multiplication,
14387 nothing needs to be done because REG can certainly go in a general reg.
14388
14389 When -fpic is used, special handling is needed for symbolic references.
14390 See comments by legitimize_pic_address in i386.c for details. */
14391
14392 static rtx
14393 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14394 {
14395 bool changed = false;
14396 unsigned log;
14397
14398 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14399 if (log)
14400 return legitimize_tls_address (x, (enum tls_model) log, false);
14401 if (GET_CODE (x) == CONST
14402 && GET_CODE (XEXP (x, 0)) == PLUS
14403 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14404 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14405 {
14406 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14407 (enum tls_model) log, false);
14408 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14409 }
14410
14411 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14412 {
14413 rtx tmp = legitimize_pe_coff_symbol (x, true);
14414 if (tmp)
14415 return tmp;
14416 }
14417
14418 if (flag_pic && SYMBOLIC_CONST (x))
14419 return legitimize_pic_address (x, 0);
14420
14421 #if TARGET_MACHO
14422 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14423 return machopic_indirect_data_reference (x, 0);
14424 #endif
14425
14426 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14427 if (GET_CODE (x) == ASHIFT
14428 && CONST_INT_P (XEXP (x, 1))
14429 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14430 {
14431 changed = true;
14432 log = INTVAL (XEXP (x, 1));
14433 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14434 GEN_INT (1 << log));
14435 }
14436
14437 if (GET_CODE (x) == PLUS)
14438 {
14439 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14440
14441 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14442 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14443 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14444 {
14445 changed = true;
14446 log = INTVAL (XEXP (XEXP (x, 0), 1));
14447 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14448 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14449 GEN_INT (1 << log));
14450 }
14451
14452 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14453 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14454 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14455 {
14456 changed = true;
14457 log = INTVAL (XEXP (XEXP (x, 1), 1));
14458 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14459 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14460 GEN_INT (1 << log));
14461 }
14462
14463 /* Put multiply first if it isn't already. */
14464 if (GET_CODE (XEXP (x, 1)) == MULT)
14465 {
14466 std::swap (XEXP (x, 0), XEXP (x, 1));
14467 changed = true;
14468 }
14469
14470 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14471 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14472 created by virtual register instantiation, register elimination, and
14473 similar optimizations. */
14474 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14475 {
14476 changed = true;
14477 x = gen_rtx_PLUS (Pmode,
14478 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14479 XEXP (XEXP (x, 1), 0)),
14480 XEXP (XEXP (x, 1), 1));
14481 }
14482
14483 /* Canonicalize
14484 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14485 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14486 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14487 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14488 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14489 && CONSTANT_P (XEXP (x, 1)))
14490 {
14491 rtx constant;
14492 rtx other = NULL_RTX;
14493
14494 if (CONST_INT_P (XEXP (x, 1)))
14495 {
14496 constant = XEXP (x, 1);
14497 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14498 }
14499 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14500 {
14501 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14502 other = XEXP (x, 1);
14503 }
14504 else
14505 constant = 0;
14506
14507 if (constant)
14508 {
14509 changed = true;
14510 x = gen_rtx_PLUS (Pmode,
14511 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14512 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14513 plus_constant (Pmode, other,
14514 INTVAL (constant)));
14515 }
14516 }
14517
14518 if (changed && ix86_legitimate_address_p (mode, x, false))
14519 return x;
14520
14521 if (GET_CODE (XEXP (x, 0)) == MULT)
14522 {
14523 changed = true;
14524 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14525 }
14526
14527 if (GET_CODE (XEXP (x, 1)) == MULT)
14528 {
14529 changed = true;
14530 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14531 }
14532
14533 if (changed
14534 && REG_P (XEXP (x, 1))
14535 && REG_P (XEXP (x, 0)))
14536 return x;
14537
14538 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14539 {
14540 changed = true;
14541 x = legitimize_pic_address (x, 0);
14542 }
14543
14544 if (changed && ix86_legitimate_address_p (mode, x, false))
14545 return x;
14546
14547 if (REG_P (XEXP (x, 0)))
14548 {
14549 rtx temp = gen_reg_rtx (Pmode);
14550 rtx val = force_operand (XEXP (x, 1), temp);
14551 if (val != temp)
14552 {
14553 val = convert_to_mode (Pmode, val, 1);
14554 emit_move_insn (temp, val);
14555 }
14556
14557 XEXP (x, 1) = temp;
14558 return x;
14559 }
14560
14561 else if (REG_P (XEXP (x, 1)))
14562 {
14563 rtx temp = gen_reg_rtx (Pmode);
14564 rtx val = force_operand (XEXP (x, 0), temp);
14565 if (val != temp)
14566 {
14567 val = convert_to_mode (Pmode, val, 1);
14568 emit_move_insn (temp, val);
14569 }
14570
14571 XEXP (x, 0) = temp;
14572 return x;
14573 }
14574 }
14575
14576 return x;
14577 }
14578 \f
14579 /* Print an integer constant expression in assembler syntax. Addition
14580 and subtraction are the only arithmetic that may appear in these
14581 expressions. FILE is the stdio stream to write to, X is the rtx, and
14582 CODE is the operand print code from the output string. */
14583
14584 static void
14585 output_pic_addr_const (FILE *file, rtx x, int code)
14586 {
14587 char buf[256];
14588
14589 switch (GET_CODE (x))
14590 {
14591 case PC:
14592 gcc_assert (flag_pic);
14593 putc ('.', file);
14594 break;
14595
14596 case SYMBOL_REF:
14597 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14598 output_addr_const (file, x);
14599 else
14600 {
14601 const char *name = XSTR (x, 0);
14602
14603 /* Mark the decl as referenced so that cgraph will
14604 output the function. */
14605 if (SYMBOL_REF_DECL (x))
14606 mark_decl_referenced (SYMBOL_REF_DECL (x));
14607
14608 #if TARGET_MACHO
14609 if (MACHOPIC_INDIRECT
14610 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14611 name = machopic_indirection_name (x, /*stub_p=*/true);
14612 #endif
14613 assemble_name (file, name);
14614 }
14615 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14616 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14617 fputs ("@PLT", file);
14618 break;
14619
14620 case LABEL_REF:
14621 x = XEXP (x, 0);
14622 /* FALLTHRU */
14623 case CODE_LABEL:
14624 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14625 assemble_name (asm_out_file, buf);
14626 break;
14627
14628 case CONST_INT:
14629 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14630 break;
14631
14632 case CONST:
14633 /* This used to output parentheses around the expression,
14634 but that does not work on the 386 (either ATT or BSD assembler). */
14635 output_pic_addr_const (file, XEXP (x, 0), code);
14636 break;
14637
14638 case CONST_DOUBLE:
14639 /* We can't handle floating point constants;
14640 TARGET_PRINT_OPERAND must handle them. */
14641 output_operand_lossage ("floating constant misused");
14642 break;
14643
14644 case PLUS:
14645 /* Some assemblers need integer constants to appear first. */
14646 if (CONST_INT_P (XEXP (x, 0)))
14647 {
14648 output_pic_addr_const (file, XEXP (x, 0), code);
14649 putc ('+', file);
14650 output_pic_addr_const (file, XEXP (x, 1), code);
14651 }
14652 else
14653 {
14654 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14655 output_pic_addr_const (file, XEXP (x, 1), code);
14656 putc ('+', file);
14657 output_pic_addr_const (file, XEXP (x, 0), code);
14658 }
14659 break;
14660
14661 case MINUS:
14662 if (!TARGET_MACHO)
14663 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14664 output_pic_addr_const (file, XEXP (x, 0), code);
14665 putc ('-', file);
14666 output_pic_addr_const (file, XEXP (x, 1), code);
14667 if (!TARGET_MACHO)
14668 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14669 break;
14670
14671 case UNSPEC:
14672 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14673 {
14674 bool f = i386_asm_output_addr_const_extra (file, x);
14675 gcc_assert (f);
14676 break;
14677 }
14678
14679 gcc_assert (XVECLEN (x, 0) == 1);
14680 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14681 switch (XINT (x, 1))
14682 {
14683 case UNSPEC_GOT:
14684 fputs ("@GOT", file);
14685 break;
14686 case UNSPEC_GOTOFF:
14687 fputs ("@GOTOFF", file);
14688 break;
14689 case UNSPEC_PLTOFF:
14690 fputs ("@PLTOFF", file);
14691 break;
14692 case UNSPEC_PCREL:
14693 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14694 "(%rip)" : "[rip]", file);
14695 break;
14696 case UNSPEC_GOTPCREL:
14697 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14698 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14699 break;
14700 case UNSPEC_GOTTPOFF:
14701 /* FIXME: This might be @TPOFF in Sun ld too. */
14702 fputs ("@gottpoff", file);
14703 break;
14704 case UNSPEC_TPOFF:
14705 fputs ("@tpoff", file);
14706 break;
14707 case UNSPEC_NTPOFF:
14708 if (TARGET_64BIT)
14709 fputs ("@tpoff", file);
14710 else
14711 fputs ("@ntpoff", file);
14712 break;
14713 case UNSPEC_DTPOFF:
14714 fputs ("@dtpoff", file);
14715 break;
14716 case UNSPEC_GOTNTPOFF:
14717 if (TARGET_64BIT)
14718 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14719 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14720 else
14721 fputs ("@gotntpoff", file);
14722 break;
14723 case UNSPEC_INDNTPOFF:
14724 fputs ("@indntpoff", file);
14725 break;
14726 #if TARGET_MACHO
14727 case UNSPEC_MACHOPIC_OFFSET:
14728 putc ('-', file);
14729 machopic_output_function_base_name (file);
14730 break;
14731 #endif
14732 default:
14733 output_operand_lossage ("invalid UNSPEC as operand");
14734 break;
14735 }
14736 break;
14737
14738 default:
14739 output_operand_lossage ("invalid expression as operand");
14740 }
14741 }
14742
14743 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14744 We need to emit DTP-relative relocations. */
14745
14746 static void ATTRIBUTE_UNUSED
14747 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14748 {
14749 fputs (ASM_LONG, file);
14750 output_addr_const (file, x);
14751 fputs ("@dtpoff", file);
14752 switch (size)
14753 {
14754 case 4:
14755 break;
14756 case 8:
14757 fputs (", 0", file);
14758 break;
14759 default:
14760 gcc_unreachable ();
14761 }
14762 }
14763
14764 /* Return true if X is a representation of the PIC register. This copes
14765 with calls from ix86_find_base_term, where the register might have
14766 been replaced by a cselib value. */
14767
14768 static bool
14769 ix86_pic_register_p (rtx x)
14770 {
14771 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14772 return (pic_offset_table_rtx
14773 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14774 else if (!REG_P (x))
14775 return false;
14776 else if (pic_offset_table_rtx)
14777 {
14778 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14779 return true;
14780 if (HARD_REGISTER_P (x)
14781 && !HARD_REGISTER_P (pic_offset_table_rtx)
14782 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14783 return true;
14784 return false;
14785 }
14786 else
14787 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14788 }
14789
14790 /* Helper function for ix86_delegitimize_address.
14791 Attempt to delegitimize TLS local-exec accesses. */
14792
14793 static rtx
14794 ix86_delegitimize_tls_address (rtx orig_x)
14795 {
14796 rtx x = orig_x, unspec;
14797 struct ix86_address addr;
14798
14799 if (!TARGET_TLS_DIRECT_SEG_REFS)
14800 return orig_x;
14801 if (MEM_P (x))
14802 x = XEXP (x, 0);
14803 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14804 return orig_x;
14805 if (ix86_decompose_address (x, &addr) == 0
14806 || addr.seg != DEFAULT_TLS_SEG_REG
14807 || addr.disp == NULL_RTX
14808 || GET_CODE (addr.disp) != CONST)
14809 return orig_x;
14810 unspec = XEXP (addr.disp, 0);
14811 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14812 unspec = XEXP (unspec, 0);
14813 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14814 return orig_x;
14815 x = XVECEXP (unspec, 0, 0);
14816 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14817 if (unspec != XEXP (addr.disp, 0))
14818 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14819 if (addr.index)
14820 {
14821 rtx idx = addr.index;
14822 if (addr.scale != 1)
14823 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14824 x = gen_rtx_PLUS (Pmode, idx, x);
14825 }
14826 if (addr.base)
14827 x = gen_rtx_PLUS (Pmode, addr.base, x);
14828 if (MEM_P (orig_x))
14829 x = replace_equiv_address_nv (orig_x, x);
14830 return x;
14831 }
14832
14833 /* In the name of slightly smaller debug output, and to cater to
14834 general assembler lossage, recognize PIC+GOTOFF and turn it back
14835 into a direct symbol reference.
14836
14837 On Darwin, this is necessary to avoid a crash, because Darwin
14838 has a different PIC label for each routine but the DWARF debugging
14839 information is not associated with any particular routine, so it's
14840 necessary to remove references to the PIC label from RTL stored by
14841 the DWARF output code. */
14842
14843 static rtx
14844 ix86_delegitimize_address (rtx x)
14845 {
14846 rtx orig_x = delegitimize_mem_from_attrs (x);
14847 /* addend is NULL or some rtx if x is something+GOTOFF where
14848 something doesn't include the PIC register. */
14849 rtx addend = NULL_RTX;
14850 /* reg_addend is NULL or a multiple of some register. */
14851 rtx reg_addend = NULL_RTX;
14852 /* const_addend is NULL or a const_int. */
14853 rtx const_addend = NULL_RTX;
14854 /* This is the result, or NULL. */
14855 rtx result = NULL_RTX;
14856
14857 x = orig_x;
14858
14859 if (MEM_P (x))
14860 x = XEXP (x, 0);
14861
14862 if (TARGET_64BIT)
14863 {
14864 if (GET_CODE (x) == CONST
14865 && GET_CODE (XEXP (x, 0)) == PLUS
14866 && GET_MODE (XEXP (x, 0)) == Pmode
14867 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14868 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14869 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14870 {
14871 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14872 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14873 if (MEM_P (orig_x))
14874 x = replace_equiv_address_nv (orig_x, x);
14875 return x;
14876 }
14877
14878 if (GET_CODE (x) == CONST
14879 && GET_CODE (XEXP (x, 0)) == UNSPEC
14880 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14881 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14882 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14883 {
14884 x = XVECEXP (XEXP (x, 0), 0, 0);
14885 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14886 {
14887 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14888 GET_MODE (x), 0);
14889 if (x == NULL_RTX)
14890 return orig_x;
14891 }
14892 return x;
14893 }
14894
14895 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14896 return ix86_delegitimize_tls_address (orig_x);
14897
14898 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14899 and -mcmodel=medium -fpic. */
14900 }
14901
14902 if (GET_CODE (x) != PLUS
14903 || GET_CODE (XEXP (x, 1)) != CONST)
14904 return ix86_delegitimize_tls_address (orig_x);
14905
14906 if (ix86_pic_register_p (XEXP (x, 0)))
14907 /* %ebx + GOT/GOTOFF */
14908 ;
14909 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14910 {
14911 /* %ebx + %reg * scale + GOT/GOTOFF */
14912 reg_addend = XEXP (x, 0);
14913 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14914 reg_addend = XEXP (reg_addend, 1);
14915 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14916 reg_addend = XEXP (reg_addend, 0);
14917 else
14918 {
14919 reg_addend = NULL_RTX;
14920 addend = XEXP (x, 0);
14921 }
14922 }
14923 else
14924 addend = XEXP (x, 0);
14925
14926 x = XEXP (XEXP (x, 1), 0);
14927 if (GET_CODE (x) == PLUS
14928 && CONST_INT_P (XEXP (x, 1)))
14929 {
14930 const_addend = XEXP (x, 1);
14931 x = XEXP (x, 0);
14932 }
14933
14934 if (GET_CODE (x) == UNSPEC
14935 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14936 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14937 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14938 && !MEM_P (orig_x) && !addend)))
14939 result = XVECEXP (x, 0, 0);
14940
14941 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14942 && !MEM_P (orig_x))
14943 result = XVECEXP (x, 0, 0);
14944
14945 if (! result)
14946 return ix86_delegitimize_tls_address (orig_x);
14947
14948 if (const_addend)
14949 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14950 if (reg_addend)
14951 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14952 if (addend)
14953 {
14954 /* If the rest of original X doesn't involve the PIC register, add
14955 addend and subtract pic_offset_table_rtx. This can happen e.g.
14956 for code like:
14957 leal (%ebx, %ecx, 4), %ecx
14958 ...
14959 movl foo@GOTOFF(%ecx), %edx
14960 in which case we return (%ecx - %ebx) + foo
14961 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14962 and reload has completed. */
14963 if (pic_offset_table_rtx
14964 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14965 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14966 pic_offset_table_rtx),
14967 result);
14968 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14969 {
14970 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14971 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14972 result = gen_rtx_PLUS (Pmode, tmp, result);
14973 }
14974 else
14975 return orig_x;
14976 }
14977 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14978 {
14979 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14980 if (result == NULL_RTX)
14981 return orig_x;
14982 }
14983 return result;
14984 }
14985
14986 /* If X is a machine specific address (i.e. a symbol or label being
14987 referenced as a displacement from the GOT implemented using an
14988 UNSPEC), then return the base term. Otherwise return X. */
14989
14990 rtx
14991 ix86_find_base_term (rtx x)
14992 {
14993 rtx term;
14994
14995 if (TARGET_64BIT)
14996 {
14997 if (GET_CODE (x) != CONST)
14998 return x;
14999 term = XEXP (x, 0);
15000 if (GET_CODE (term) == PLUS
15001 && CONST_INT_P (XEXP (term, 1)))
15002 term = XEXP (term, 0);
15003 if (GET_CODE (term) != UNSPEC
15004 || (XINT (term, 1) != UNSPEC_GOTPCREL
15005 && XINT (term, 1) != UNSPEC_PCREL))
15006 return x;
15007
15008 return XVECEXP (term, 0, 0);
15009 }
15010
15011 return ix86_delegitimize_address (x);
15012 }
15013 \f
15014 static void
15015 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15016 bool fp, FILE *file)
15017 {
15018 const char *suffix;
15019
15020 if (mode == CCFPmode || mode == CCFPUmode)
15021 {
15022 code = ix86_fp_compare_code_to_integer (code);
15023 mode = CCmode;
15024 }
15025 if (reverse)
15026 code = reverse_condition (code);
15027
15028 switch (code)
15029 {
15030 case EQ:
15031 switch (mode)
15032 {
15033 case CCAmode:
15034 suffix = "a";
15035 break;
15036 case CCCmode:
15037 suffix = "c";
15038 break;
15039 case CCOmode:
15040 suffix = "o";
15041 break;
15042 case CCPmode:
15043 suffix = "p";
15044 break;
15045 case CCSmode:
15046 suffix = "s";
15047 break;
15048 default:
15049 suffix = "e";
15050 break;
15051 }
15052 break;
15053 case NE:
15054 switch (mode)
15055 {
15056 case CCAmode:
15057 suffix = "na";
15058 break;
15059 case CCCmode:
15060 suffix = "nc";
15061 break;
15062 case CCOmode:
15063 suffix = "no";
15064 break;
15065 case CCPmode:
15066 suffix = "np";
15067 break;
15068 case CCSmode:
15069 suffix = "ns";
15070 break;
15071 default:
15072 suffix = "ne";
15073 break;
15074 }
15075 break;
15076 case GT:
15077 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15078 suffix = "g";
15079 break;
15080 case GTU:
15081 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15082 Those same assemblers have the same but opposite lossage on cmov. */
15083 if (mode == CCmode)
15084 suffix = fp ? "nbe" : "a";
15085 else
15086 gcc_unreachable ();
15087 break;
15088 case LT:
15089 switch (mode)
15090 {
15091 case CCNOmode:
15092 case CCGOCmode:
15093 suffix = "s";
15094 break;
15095
15096 case CCmode:
15097 case CCGCmode:
15098 suffix = "l";
15099 break;
15100
15101 default:
15102 gcc_unreachable ();
15103 }
15104 break;
15105 case LTU:
15106 if (mode == CCmode)
15107 suffix = "b";
15108 else if (mode == CCCmode)
15109 suffix = fp ? "b" : "c";
15110 else
15111 gcc_unreachable ();
15112 break;
15113 case GE:
15114 switch (mode)
15115 {
15116 case CCNOmode:
15117 case CCGOCmode:
15118 suffix = "ns";
15119 break;
15120
15121 case CCmode:
15122 case CCGCmode:
15123 suffix = "ge";
15124 break;
15125
15126 default:
15127 gcc_unreachable ();
15128 }
15129 break;
15130 case GEU:
15131 if (mode == CCmode)
15132 suffix = "nb";
15133 else if (mode == CCCmode)
15134 suffix = fp ? "nb" : "nc";
15135 else
15136 gcc_unreachable ();
15137 break;
15138 case LE:
15139 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15140 suffix = "le";
15141 break;
15142 case LEU:
15143 if (mode == CCmode)
15144 suffix = "be";
15145 else
15146 gcc_unreachable ();
15147 break;
15148 case UNORDERED:
15149 suffix = fp ? "u" : "p";
15150 break;
15151 case ORDERED:
15152 suffix = fp ? "nu" : "np";
15153 break;
15154 default:
15155 gcc_unreachable ();
15156 }
15157 fputs (suffix, file);
15158 }
15159
15160 /* Print the name of register X to FILE based on its machine mode and number.
15161 If CODE is 'w', pretend the mode is HImode.
15162 If CODE is 'b', pretend the mode is QImode.
15163 If CODE is 'k', pretend the mode is SImode.
15164 If CODE is 'q', pretend the mode is DImode.
15165 If CODE is 'x', pretend the mode is V4SFmode.
15166 If CODE is 't', pretend the mode is V8SFmode.
15167 If CODE is 'g', pretend the mode is V16SFmode.
15168 If CODE is 'h', pretend the reg is the 'high' byte register.
15169 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15170 If CODE is 'd', duplicate the operand for AVX instruction.
15171 */
15172
15173 void
15174 print_reg (rtx x, int code, FILE *file)
15175 {
15176 const char *reg;
15177 int msize;
15178 unsigned int regno;
15179 bool duplicated;
15180
15181 if (ASSEMBLER_DIALECT == ASM_ATT)
15182 putc ('%', file);
15183
15184 if (x == pc_rtx)
15185 {
15186 gcc_assert (TARGET_64BIT);
15187 fputs ("rip", file);
15188 return;
15189 }
15190
15191 if (code == 'y' && STACK_TOP_P (x))
15192 {
15193 fputs ("st(0)", file);
15194 return;
15195 }
15196
15197 if (code == 'w')
15198 msize = 2;
15199 else if (code == 'b')
15200 msize = 1;
15201 else if (code == 'k')
15202 msize = 4;
15203 else if (code == 'q')
15204 msize = 8;
15205 else if (code == 'h')
15206 msize = 0;
15207 else if (code == 'x')
15208 msize = 16;
15209 else if (code == 't')
15210 msize = 32;
15211 else if (code == 'g')
15212 msize = 64;
15213 else
15214 msize = GET_MODE_SIZE (GET_MODE (x));
15215
15216 regno = true_regnum (x);
15217
15218 gcc_assert (regno != ARG_POINTER_REGNUM
15219 && regno != FRAME_POINTER_REGNUM
15220 && regno != FLAGS_REG
15221 && regno != FPSR_REG
15222 && regno != FPCR_REG);
15223
15224 duplicated = code == 'd' && TARGET_AVX;
15225
15226 switch (msize)
15227 {
15228 case 8:
15229 case 4:
15230 if (LEGACY_INT_REGNO_P (regno))
15231 putc (msize == 8 ? 'r' : 'e', file);
15232 case 16:
15233 case 12:
15234 case 2:
15235 normal:
15236 reg = hi_reg_name[regno];
15237 break;
15238 case 1:
15239 if (regno >= ARRAY_SIZE (qi_reg_name))
15240 goto normal;
15241 reg = qi_reg_name[regno];
15242 break;
15243 case 0:
15244 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15245 goto normal;
15246 reg = qi_high_reg_name[regno];
15247 break;
15248 case 32:
15249 case 64:
15250 if (SSE_REGNO_P (regno))
15251 {
15252 gcc_assert (!duplicated);
15253 putc (msize == 32 ? 'y' : 'z', file);
15254 reg = hi_reg_name[regno] + 1;
15255 break;
15256 }
15257 goto normal;
15258 default:
15259 gcc_unreachable ();
15260 }
15261
15262 fputs (reg, file);
15263
15264 /* Irritatingly, AMD extended registers use
15265 different naming convention: "r%d[bwd]" */
15266 if (REX_INT_REGNO_P (regno))
15267 {
15268 gcc_assert (TARGET_64BIT);
15269 switch (msize)
15270 {
15271 case 0:
15272 error ("extended registers have no high halves");
15273 break;
15274 case 1:
15275 putc ('b', file);
15276 break;
15277 case 2:
15278 putc ('w', file);
15279 break;
15280 case 4:
15281 putc ('d', file);
15282 break;
15283 case 8:
15284 /* no suffix */
15285 break;
15286 default:
15287 error ("unsupported operand size for extended register");
15288 break;
15289 }
15290 return;
15291 }
15292
15293 if (duplicated)
15294 {
15295 if (ASSEMBLER_DIALECT == ASM_ATT)
15296 fprintf (file, ", %%%s", reg);
15297 else
15298 fprintf (file, ", %s", reg);
15299 }
15300 }
15301
15302 /* Meaning of CODE:
15303 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15304 C -- print opcode suffix for set/cmov insn.
15305 c -- like C, but print reversed condition
15306 F,f -- likewise, but for floating-point.
15307 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15308 otherwise nothing
15309 R -- print embeded rounding and sae.
15310 r -- print only sae.
15311 z -- print the opcode suffix for the size of the current operand.
15312 Z -- likewise, with special suffixes for x87 instructions.
15313 * -- print a star (in certain assembler syntax)
15314 A -- print an absolute memory reference.
15315 E -- print address with DImode register names if TARGET_64BIT.
15316 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15317 s -- print a shift double count, followed by the assemblers argument
15318 delimiter.
15319 b -- print the QImode name of the register for the indicated operand.
15320 %b0 would print %al if operands[0] is reg 0.
15321 w -- likewise, print the HImode name of the register.
15322 k -- likewise, print the SImode name of the register.
15323 q -- likewise, print the DImode name of the register.
15324 x -- likewise, print the V4SFmode name of the register.
15325 t -- likewise, print the V8SFmode name of the register.
15326 g -- likewise, print the V16SFmode name of the register.
15327 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15328 y -- print "st(0)" instead of "st" as a register.
15329 d -- print duplicated register operand for AVX instruction.
15330 D -- print condition for SSE cmp instruction.
15331 P -- if PIC, print an @PLT suffix.
15332 p -- print raw symbol name.
15333 X -- don't print any sort of PIC '@' suffix for a symbol.
15334 & -- print some in-use local-dynamic symbol name.
15335 H -- print a memory address offset by 8; used for sse high-parts
15336 Y -- print condition for XOP pcom* instruction.
15337 + -- print a branch hint as 'cs' or 'ds' prefix
15338 ; -- print a semicolon (after prefixes due to bug in older gas).
15339 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15340 @ -- print a segment register of thread base pointer load
15341 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15342 ! -- print MPX prefix for jxx/call/ret instructions if required.
15343 */
15344
15345 void
15346 ix86_print_operand (FILE *file, rtx x, int code)
15347 {
15348 if (code)
15349 {
15350 switch (code)
15351 {
15352 case 'A':
15353 switch (ASSEMBLER_DIALECT)
15354 {
15355 case ASM_ATT:
15356 putc ('*', file);
15357 break;
15358
15359 case ASM_INTEL:
15360 /* Intel syntax. For absolute addresses, registers should not
15361 be surrounded by braces. */
15362 if (!REG_P (x))
15363 {
15364 putc ('[', file);
15365 ix86_print_operand (file, x, 0);
15366 putc (']', file);
15367 return;
15368 }
15369 break;
15370
15371 default:
15372 gcc_unreachable ();
15373 }
15374
15375 ix86_print_operand (file, x, 0);
15376 return;
15377
15378 case 'E':
15379 /* Wrap address in an UNSPEC to declare special handling. */
15380 if (TARGET_64BIT)
15381 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15382
15383 output_address (x);
15384 return;
15385
15386 case 'L':
15387 if (ASSEMBLER_DIALECT == ASM_ATT)
15388 putc ('l', file);
15389 return;
15390
15391 case 'W':
15392 if (ASSEMBLER_DIALECT == ASM_ATT)
15393 putc ('w', file);
15394 return;
15395
15396 case 'B':
15397 if (ASSEMBLER_DIALECT == ASM_ATT)
15398 putc ('b', file);
15399 return;
15400
15401 case 'Q':
15402 if (ASSEMBLER_DIALECT == ASM_ATT)
15403 putc ('l', file);
15404 return;
15405
15406 case 'S':
15407 if (ASSEMBLER_DIALECT == ASM_ATT)
15408 putc ('s', file);
15409 return;
15410
15411 case 'T':
15412 if (ASSEMBLER_DIALECT == ASM_ATT)
15413 putc ('t', file);
15414 return;
15415
15416 case 'O':
15417 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15418 if (ASSEMBLER_DIALECT != ASM_ATT)
15419 return;
15420
15421 switch (GET_MODE_SIZE (GET_MODE (x)))
15422 {
15423 case 2:
15424 putc ('w', file);
15425 break;
15426
15427 case 4:
15428 putc ('l', file);
15429 break;
15430
15431 case 8:
15432 putc ('q', file);
15433 break;
15434
15435 default:
15436 output_operand_lossage
15437 ("invalid operand size for operand code 'O'");
15438 return;
15439 }
15440
15441 putc ('.', file);
15442 #endif
15443 return;
15444
15445 case 'z':
15446 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15447 {
15448 /* Opcodes don't get size suffixes if using Intel opcodes. */
15449 if (ASSEMBLER_DIALECT == ASM_INTEL)
15450 return;
15451
15452 switch (GET_MODE_SIZE (GET_MODE (x)))
15453 {
15454 case 1:
15455 putc ('b', file);
15456 return;
15457
15458 case 2:
15459 putc ('w', file);
15460 return;
15461
15462 case 4:
15463 putc ('l', file);
15464 return;
15465
15466 case 8:
15467 putc ('q', file);
15468 return;
15469
15470 default:
15471 output_operand_lossage
15472 ("invalid operand size for operand code 'z'");
15473 return;
15474 }
15475 }
15476
15477 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15478 warning
15479 (0, "non-integer operand used with operand code 'z'");
15480 /* FALLTHRU */
15481
15482 case 'Z':
15483 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15484 if (ASSEMBLER_DIALECT == ASM_INTEL)
15485 return;
15486
15487 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15488 {
15489 switch (GET_MODE_SIZE (GET_MODE (x)))
15490 {
15491 case 2:
15492 #ifdef HAVE_AS_IX86_FILDS
15493 putc ('s', file);
15494 #endif
15495 return;
15496
15497 case 4:
15498 putc ('l', file);
15499 return;
15500
15501 case 8:
15502 #ifdef HAVE_AS_IX86_FILDQ
15503 putc ('q', file);
15504 #else
15505 fputs ("ll", file);
15506 #endif
15507 return;
15508
15509 default:
15510 break;
15511 }
15512 }
15513 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15514 {
15515 /* 387 opcodes don't get size suffixes
15516 if the operands are registers. */
15517 if (STACK_REG_P (x))
15518 return;
15519
15520 switch (GET_MODE_SIZE (GET_MODE (x)))
15521 {
15522 case 4:
15523 putc ('s', file);
15524 return;
15525
15526 case 8:
15527 putc ('l', file);
15528 return;
15529
15530 case 12:
15531 case 16:
15532 putc ('t', file);
15533 return;
15534
15535 default:
15536 break;
15537 }
15538 }
15539 else
15540 {
15541 output_operand_lossage
15542 ("invalid operand type used with operand code 'Z'");
15543 return;
15544 }
15545
15546 output_operand_lossage
15547 ("invalid operand size for operand code 'Z'");
15548 return;
15549
15550 case 'd':
15551 case 'b':
15552 case 'w':
15553 case 'k':
15554 case 'q':
15555 case 'h':
15556 case 't':
15557 case 'g':
15558 case 'y':
15559 case 'x':
15560 case 'X':
15561 case 'P':
15562 case 'p':
15563 break;
15564
15565 case 's':
15566 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15567 {
15568 ix86_print_operand (file, x, 0);
15569 fputs (", ", file);
15570 }
15571 return;
15572
15573 case 'Y':
15574 switch (GET_CODE (x))
15575 {
15576 case NE:
15577 fputs ("neq", file);
15578 break;
15579 case EQ:
15580 fputs ("eq", file);
15581 break;
15582 case GE:
15583 case GEU:
15584 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15585 break;
15586 case GT:
15587 case GTU:
15588 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15589 break;
15590 case LE:
15591 case LEU:
15592 fputs ("le", file);
15593 break;
15594 case LT:
15595 case LTU:
15596 fputs ("lt", file);
15597 break;
15598 case UNORDERED:
15599 fputs ("unord", file);
15600 break;
15601 case ORDERED:
15602 fputs ("ord", file);
15603 break;
15604 case UNEQ:
15605 fputs ("ueq", file);
15606 break;
15607 case UNGE:
15608 fputs ("nlt", file);
15609 break;
15610 case UNGT:
15611 fputs ("nle", file);
15612 break;
15613 case UNLE:
15614 fputs ("ule", file);
15615 break;
15616 case UNLT:
15617 fputs ("ult", file);
15618 break;
15619 case LTGT:
15620 fputs ("une", file);
15621 break;
15622 default:
15623 output_operand_lossage ("operand is not a condition code, "
15624 "invalid operand code 'Y'");
15625 return;
15626 }
15627 return;
15628
15629 case 'D':
15630 /* Little bit of braindamage here. The SSE compare instructions
15631 does use completely different names for the comparisons that the
15632 fp conditional moves. */
15633 switch (GET_CODE (x))
15634 {
15635 case UNEQ:
15636 if (TARGET_AVX)
15637 {
15638 fputs ("eq_us", file);
15639 break;
15640 }
15641 case EQ:
15642 fputs ("eq", file);
15643 break;
15644 case UNLT:
15645 if (TARGET_AVX)
15646 {
15647 fputs ("nge", file);
15648 break;
15649 }
15650 case LT:
15651 fputs ("lt", file);
15652 break;
15653 case UNLE:
15654 if (TARGET_AVX)
15655 {
15656 fputs ("ngt", file);
15657 break;
15658 }
15659 case LE:
15660 fputs ("le", file);
15661 break;
15662 case UNORDERED:
15663 fputs ("unord", file);
15664 break;
15665 case LTGT:
15666 if (TARGET_AVX)
15667 {
15668 fputs ("neq_oq", file);
15669 break;
15670 }
15671 case NE:
15672 fputs ("neq", file);
15673 break;
15674 case GE:
15675 if (TARGET_AVX)
15676 {
15677 fputs ("ge", file);
15678 break;
15679 }
15680 case UNGE:
15681 fputs ("nlt", file);
15682 break;
15683 case GT:
15684 if (TARGET_AVX)
15685 {
15686 fputs ("gt", file);
15687 break;
15688 }
15689 case UNGT:
15690 fputs ("nle", file);
15691 break;
15692 case ORDERED:
15693 fputs ("ord", file);
15694 break;
15695 default:
15696 output_operand_lossage ("operand is not a condition code, "
15697 "invalid operand code 'D'");
15698 return;
15699 }
15700 return;
15701
15702 case 'F':
15703 case 'f':
15704 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15705 if (ASSEMBLER_DIALECT == ASM_ATT)
15706 putc ('.', file);
15707 #endif
15708
15709 case 'C':
15710 case 'c':
15711 if (!COMPARISON_P (x))
15712 {
15713 output_operand_lossage ("operand is not a condition code, "
15714 "invalid operand code '%c'", code);
15715 return;
15716 }
15717 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15718 code == 'c' || code == 'f',
15719 code == 'F' || code == 'f',
15720 file);
15721 return;
15722
15723 case 'H':
15724 if (!offsettable_memref_p (x))
15725 {
15726 output_operand_lossage ("operand is not an offsettable memory "
15727 "reference, invalid operand code 'H'");
15728 return;
15729 }
15730 /* It doesn't actually matter what mode we use here, as we're
15731 only going to use this for printing. */
15732 x = adjust_address_nv (x, DImode, 8);
15733 /* Output 'qword ptr' for intel assembler dialect. */
15734 if (ASSEMBLER_DIALECT == ASM_INTEL)
15735 code = 'q';
15736 break;
15737
15738 case 'K':
15739 gcc_assert (CONST_INT_P (x));
15740
15741 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15742 #ifdef HAVE_AS_IX86_HLE
15743 fputs ("xacquire ", file);
15744 #else
15745 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15746 #endif
15747 else if (INTVAL (x) & IX86_HLE_RELEASE)
15748 #ifdef HAVE_AS_IX86_HLE
15749 fputs ("xrelease ", file);
15750 #else
15751 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15752 #endif
15753 /* We do not want to print value of the operand. */
15754 return;
15755
15756 case 'N':
15757 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15758 fputs ("{z}", file);
15759 return;
15760
15761 case 'r':
15762 gcc_assert (CONST_INT_P (x));
15763 gcc_assert (INTVAL (x) == ROUND_SAE);
15764
15765 if (ASSEMBLER_DIALECT == ASM_INTEL)
15766 fputs (", ", file);
15767
15768 fputs ("{sae}", file);
15769
15770 if (ASSEMBLER_DIALECT == ASM_ATT)
15771 fputs (", ", file);
15772
15773 return;
15774
15775 case 'R':
15776 gcc_assert (CONST_INT_P (x));
15777
15778 if (ASSEMBLER_DIALECT == ASM_INTEL)
15779 fputs (", ", file);
15780
15781 switch (INTVAL (x))
15782 {
15783 case ROUND_NEAREST_INT | ROUND_SAE:
15784 fputs ("{rn-sae}", file);
15785 break;
15786 case ROUND_NEG_INF | ROUND_SAE:
15787 fputs ("{rd-sae}", file);
15788 break;
15789 case ROUND_POS_INF | ROUND_SAE:
15790 fputs ("{ru-sae}", file);
15791 break;
15792 case ROUND_ZERO | ROUND_SAE:
15793 fputs ("{rz-sae}", file);
15794 break;
15795 default:
15796 gcc_unreachable ();
15797 }
15798
15799 if (ASSEMBLER_DIALECT == ASM_ATT)
15800 fputs (", ", file);
15801
15802 return;
15803
15804 case '*':
15805 if (ASSEMBLER_DIALECT == ASM_ATT)
15806 putc ('*', file);
15807 return;
15808
15809 case '&':
15810 {
15811 const char *name = get_some_local_dynamic_name ();
15812 if (name == NULL)
15813 output_operand_lossage ("'%%&' used without any "
15814 "local dynamic TLS references");
15815 else
15816 assemble_name (file, name);
15817 return;
15818 }
15819
15820 case '+':
15821 {
15822 rtx x;
15823
15824 if (!optimize
15825 || optimize_function_for_size_p (cfun)
15826 || !TARGET_BRANCH_PREDICTION_HINTS)
15827 return;
15828
15829 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15830 if (x)
15831 {
15832 int pred_val = XINT (x, 0);
15833
15834 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15835 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15836 {
15837 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15838 bool cputaken
15839 = final_forward_branch_p (current_output_insn) == 0;
15840
15841 /* Emit hints only in the case default branch prediction
15842 heuristics would fail. */
15843 if (taken != cputaken)
15844 {
15845 /* We use 3e (DS) prefix for taken branches and
15846 2e (CS) prefix for not taken branches. */
15847 if (taken)
15848 fputs ("ds ; ", file);
15849 else
15850 fputs ("cs ; ", file);
15851 }
15852 }
15853 }
15854 return;
15855 }
15856
15857 case ';':
15858 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15859 putc (';', file);
15860 #endif
15861 return;
15862
15863 case '@':
15864 if (ASSEMBLER_DIALECT == ASM_ATT)
15865 putc ('%', file);
15866
15867 /* The kernel uses a different segment register for performance
15868 reasons; a system call would not have to trash the userspace
15869 segment register, which would be expensive. */
15870 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15871 fputs ("fs", file);
15872 else
15873 fputs ("gs", file);
15874 return;
15875
15876 case '~':
15877 putc (TARGET_AVX2 ? 'i' : 'f', file);
15878 return;
15879
15880 case '^':
15881 if (TARGET_64BIT && Pmode != word_mode)
15882 fputs ("addr32 ", file);
15883 return;
15884
15885 case '!':
15886 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15887 fputs ("bnd ", file);
15888 return;
15889
15890 default:
15891 output_operand_lossage ("invalid operand code '%c'", code);
15892 }
15893 }
15894
15895 if (REG_P (x))
15896 print_reg (x, code, file);
15897
15898 else if (MEM_P (x))
15899 {
15900 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15901 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15902 && GET_MODE (x) != BLKmode)
15903 {
15904 const char * size;
15905 switch (GET_MODE_SIZE (GET_MODE (x)))
15906 {
15907 case 1: size = "BYTE"; break;
15908 case 2: size = "WORD"; break;
15909 case 4: size = "DWORD"; break;
15910 case 8: size = "QWORD"; break;
15911 case 12: size = "TBYTE"; break;
15912 case 16:
15913 if (GET_MODE (x) == XFmode)
15914 size = "TBYTE";
15915 else
15916 size = "XMMWORD";
15917 break;
15918 case 32: size = "YMMWORD"; break;
15919 case 64: size = "ZMMWORD"; break;
15920 default:
15921 gcc_unreachable ();
15922 }
15923
15924 /* Check for explicit size override (codes 'b', 'w', 'k',
15925 'q' and 'x') */
15926 if (code == 'b')
15927 size = "BYTE";
15928 else if (code == 'w')
15929 size = "WORD";
15930 else if (code == 'k')
15931 size = "DWORD";
15932 else if (code == 'q')
15933 size = "QWORD";
15934 else if (code == 'x')
15935 size = "XMMWORD";
15936
15937 fputs (size, file);
15938 fputs (" PTR ", file);
15939 }
15940
15941 x = XEXP (x, 0);
15942 /* Avoid (%rip) for call operands. */
15943 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15944 && !CONST_INT_P (x))
15945 output_addr_const (file, x);
15946 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15947 output_operand_lossage ("invalid constraints for operand");
15948 else
15949 output_address (x);
15950 }
15951
15952 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15953 {
15954 REAL_VALUE_TYPE r;
15955 long l;
15956
15957 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15958 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15959
15960 if (ASSEMBLER_DIALECT == ASM_ATT)
15961 putc ('$', file);
15962 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15963 if (code == 'q')
15964 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15965 (unsigned long long) (int) l);
15966 else
15967 fprintf (file, "0x%08x", (unsigned int) l);
15968 }
15969
15970 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15971 {
15972 REAL_VALUE_TYPE r;
15973 long l[2];
15974
15975 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15976 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15977
15978 if (ASSEMBLER_DIALECT == ASM_ATT)
15979 putc ('$', file);
15980 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15981 }
15982
15983 /* These float cases don't actually occur as immediate operands. */
15984 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15985 {
15986 char dstr[30];
15987
15988 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15989 fputs (dstr, file);
15990 }
15991
15992 else
15993 {
15994 /* We have patterns that allow zero sets of memory, for instance.
15995 In 64-bit mode, we should probably support all 8-byte vectors,
15996 since we can in fact encode that into an immediate. */
15997 if (GET_CODE (x) == CONST_VECTOR)
15998 {
15999 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16000 x = const0_rtx;
16001 }
16002
16003 if (code != 'P' && code != 'p')
16004 {
16005 if (CONST_INT_P (x))
16006 {
16007 if (ASSEMBLER_DIALECT == ASM_ATT)
16008 putc ('$', file);
16009 }
16010 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16011 || GET_CODE (x) == LABEL_REF)
16012 {
16013 if (ASSEMBLER_DIALECT == ASM_ATT)
16014 putc ('$', file);
16015 else
16016 fputs ("OFFSET FLAT:", file);
16017 }
16018 }
16019 if (CONST_INT_P (x))
16020 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16021 else if (flag_pic || MACHOPIC_INDIRECT)
16022 output_pic_addr_const (file, x, code);
16023 else
16024 output_addr_const (file, x);
16025 }
16026 }
16027
16028 static bool
16029 ix86_print_operand_punct_valid_p (unsigned char code)
16030 {
16031 return (code == '@' || code == '*' || code == '+' || code == '&'
16032 || code == ';' || code == '~' || code == '^' || code == '!');
16033 }
16034 \f
16035 /* Print a memory operand whose address is ADDR. */
16036
16037 static void
16038 ix86_print_operand_address (FILE *file, rtx addr)
16039 {
16040 struct ix86_address parts;
16041 rtx base, index, disp;
16042 int scale;
16043 int ok;
16044 bool vsib = false;
16045 int code = 0;
16046
16047 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16048 {
16049 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16050 gcc_assert (parts.index == NULL_RTX);
16051 parts.index = XVECEXP (addr, 0, 1);
16052 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16053 addr = XVECEXP (addr, 0, 0);
16054 vsib = true;
16055 }
16056 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16057 {
16058 gcc_assert (TARGET_64BIT);
16059 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16060 code = 'q';
16061 }
16062 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16063 {
16064 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16065 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16066 if (parts.base != NULL_RTX)
16067 {
16068 parts.index = parts.base;
16069 parts.scale = 1;
16070 }
16071 parts.base = XVECEXP (addr, 0, 0);
16072 addr = XVECEXP (addr, 0, 0);
16073 }
16074 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16075 {
16076 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16077 gcc_assert (parts.index == NULL_RTX);
16078 parts.index = XVECEXP (addr, 0, 1);
16079 addr = XVECEXP (addr, 0, 0);
16080 }
16081 else
16082 ok = ix86_decompose_address (addr, &parts);
16083
16084 gcc_assert (ok);
16085
16086 base = parts.base;
16087 index = parts.index;
16088 disp = parts.disp;
16089 scale = parts.scale;
16090
16091 switch (parts.seg)
16092 {
16093 case SEG_DEFAULT:
16094 break;
16095 case SEG_FS:
16096 case SEG_GS:
16097 if (ASSEMBLER_DIALECT == ASM_ATT)
16098 putc ('%', file);
16099 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16100 break;
16101 default:
16102 gcc_unreachable ();
16103 }
16104
16105 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16106 if (TARGET_64BIT && !base && !index)
16107 {
16108 rtx symbol = disp;
16109
16110 if (GET_CODE (disp) == CONST
16111 && GET_CODE (XEXP (disp, 0)) == PLUS
16112 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16113 symbol = XEXP (XEXP (disp, 0), 0);
16114
16115 if (GET_CODE (symbol) == LABEL_REF
16116 || (GET_CODE (symbol) == SYMBOL_REF
16117 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16118 base = pc_rtx;
16119 }
16120 if (!base && !index)
16121 {
16122 /* Displacement only requires special attention. */
16123
16124 if (CONST_INT_P (disp))
16125 {
16126 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16127 fputs ("ds:", file);
16128 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16129 }
16130 else if (flag_pic)
16131 output_pic_addr_const (file, disp, 0);
16132 else
16133 output_addr_const (file, disp);
16134 }
16135 else
16136 {
16137 /* Print SImode register names to force addr32 prefix. */
16138 if (SImode_address_operand (addr, VOIDmode))
16139 {
16140 #ifdef ENABLE_CHECKING
16141 gcc_assert (TARGET_64BIT);
16142 switch (GET_CODE (addr))
16143 {
16144 case SUBREG:
16145 gcc_assert (GET_MODE (addr) == SImode);
16146 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16147 break;
16148 case ZERO_EXTEND:
16149 case AND:
16150 gcc_assert (GET_MODE (addr) == DImode);
16151 break;
16152 default:
16153 gcc_unreachable ();
16154 }
16155 #endif
16156 gcc_assert (!code);
16157 code = 'k';
16158 }
16159 else if (code == 0
16160 && TARGET_X32
16161 && disp
16162 && CONST_INT_P (disp)
16163 && INTVAL (disp) < -16*1024*1024)
16164 {
16165 /* X32 runs in 64-bit mode, where displacement, DISP, in
16166 address DISP(%r64), is encoded as 32-bit immediate sign-
16167 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16168 address is %r64 + 0xffffffffbffffd00. When %r64 <
16169 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16170 which is invalid for x32. The correct address is %r64
16171 - 0x40000300 == 0xf7ffdd64. To properly encode
16172 -0x40000300(%r64) for x32, we zero-extend negative
16173 displacement by forcing addr32 prefix which truncates
16174 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16175 zero-extend all negative displacements, including -1(%rsp).
16176 However, for small negative displacements, sign-extension
16177 won't cause overflow. We only zero-extend negative
16178 displacements if they < -16*1024*1024, which is also used
16179 to check legitimate address displacements for PIC. */
16180 code = 'k';
16181 }
16182
16183 if (ASSEMBLER_DIALECT == ASM_ATT)
16184 {
16185 if (disp)
16186 {
16187 if (flag_pic)
16188 output_pic_addr_const (file, disp, 0);
16189 else if (GET_CODE (disp) == LABEL_REF)
16190 output_asm_label (disp);
16191 else
16192 output_addr_const (file, disp);
16193 }
16194
16195 putc ('(', file);
16196 if (base)
16197 print_reg (base, code, file);
16198 if (index)
16199 {
16200 putc (',', file);
16201 print_reg (index, vsib ? 0 : code, file);
16202 if (scale != 1 || vsib)
16203 fprintf (file, ",%d", scale);
16204 }
16205 putc (')', file);
16206 }
16207 else
16208 {
16209 rtx offset = NULL_RTX;
16210
16211 if (disp)
16212 {
16213 /* Pull out the offset of a symbol; print any symbol itself. */
16214 if (GET_CODE (disp) == CONST
16215 && GET_CODE (XEXP (disp, 0)) == PLUS
16216 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16217 {
16218 offset = XEXP (XEXP (disp, 0), 1);
16219 disp = gen_rtx_CONST (VOIDmode,
16220 XEXP (XEXP (disp, 0), 0));
16221 }
16222
16223 if (flag_pic)
16224 output_pic_addr_const (file, disp, 0);
16225 else if (GET_CODE (disp) == LABEL_REF)
16226 output_asm_label (disp);
16227 else if (CONST_INT_P (disp))
16228 offset = disp;
16229 else
16230 output_addr_const (file, disp);
16231 }
16232
16233 putc ('[', file);
16234 if (base)
16235 {
16236 print_reg (base, code, file);
16237 if (offset)
16238 {
16239 if (INTVAL (offset) >= 0)
16240 putc ('+', file);
16241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16242 }
16243 }
16244 else if (offset)
16245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16246 else
16247 putc ('0', file);
16248
16249 if (index)
16250 {
16251 putc ('+', file);
16252 print_reg (index, vsib ? 0 : code, file);
16253 if (scale != 1 || vsib)
16254 fprintf (file, "*%d", scale);
16255 }
16256 putc (']', file);
16257 }
16258 }
16259 }
16260
16261 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16262
16263 static bool
16264 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16265 {
16266 rtx op;
16267
16268 if (GET_CODE (x) != UNSPEC)
16269 return false;
16270
16271 op = XVECEXP (x, 0, 0);
16272 switch (XINT (x, 1))
16273 {
16274 case UNSPEC_GOTTPOFF:
16275 output_addr_const (file, op);
16276 /* FIXME: This might be @TPOFF in Sun ld. */
16277 fputs ("@gottpoff", file);
16278 break;
16279 case UNSPEC_TPOFF:
16280 output_addr_const (file, op);
16281 fputs ("@tpoff", file);
16282 break;
16283 case UNSPEC_NTPOFF:
16284 output_addr_const (file, op);
16285 if (TARGET_64BIT)
16286 fputs ("@tpoff", file);
16287 else
16288 fputs ("@ntpoff", file);
16289 break;
16290 case UNSPEC_DTPOFF:
16291 output_addr_const (file, op);
16292 fputs ("@dtpoff", file);
16293 break;
16294 case UNSPEC_GOTNTPOFF:
16295 output_addr_const (file, op);
16296 if (TARGET_64BIT)
16297 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16298 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16299 else
16300 fputs ("@gotntpoff", file);
16301 break;
16302 case UNSPEC_INDNTPOFF:
16303 output_addr_const (file, op);
16304 fputs ("@indntpoff", file);
16305 break;
16306 #if TARGET_MACHO
16307 case UNSPEC_MACHOPIC_OFFSET:
16308 output_addr_const (file, op);
16309 putc ('-', file);
16310 machopic_output_function_base_name (file);
16311 break;
16312 #endif
16313
16314 case UNSPEC_STACK_CHECK:
16315 {
16316 int offset;
16317
16318 gcc_assert (flag_split_stack);
16319
16320 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16321 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16322 #else
16323 gcc_unreachable ();
16324 #endif
16325
16326 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16327 }
16328 break;
16329
16330 default:
16331 return false;
16332 }
16333
16334 return true;
16335 }
16336 \f
16337 /* Split one or more double-mode RTL references into pairs of half-mode
16338 references. The RTL can be REG, offsettable MEM, integer constant, or
16339 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16340 split and "num" is its length. lo_half and hi_half are output arrays
16341 that parallel "operands". */
16342
16343 void
16344 split_double_mode (machine_mode mode, rtx operands[],
16345 int num, rtx lo_half[], rtx hi_half[])
16346 {
16347 machine_mode half_mode;
16348 unsigned int byte;
16349
16350 switch (mode)
16351 {
16352 case TImode:
16353 half_mode = DImode;
16354 break;
16355 case DImode:
16356 half_mode = SImode;
16357 break;
16358 default:
16359 gcc_unreachable ();
16360 }
16361
16362 byte = GET_MODE_SIZE (half_mode);
16363
16364 while (num--)
16365 {
16366 rtx op = operands[num];
16367
16368 /* simplify_subreg refuse to split volatile memory addresses,
16369 but we still have to handle it. */
16370 if (MEM_P (op))
16371 {
16372 lo_half[num] = adjust_address (op, half_mode, 0);
16373 hi_half[num] = adjust_address (op, half_mode, byte);
16374 }
16375 else
16376 {
16377 lo_half[num] = simplify_gen_subreg (half_mode, op,
16378 GET_MODE (op) == VOIDmode
16379 ? mode : GET_MODE (op), 0);
16380 hi_half[num] = simplify_gen_subreg (half_mode, op,
16381 GET_MODE (op) == VOIDmode
16382 ? mode : GET_MODE (op), byte);
16383 }
16384 }
16385 }
16386 \f
16387 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16388 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16389 is the expression of the binary operation. The output may either be
16390 emitted here, or returned to the caller, like all output_* functions.
16391
16392 There is no guarantee that the operands are the same mode, as they
16393 might be within FLOAT or FLOAT_EXTEND expressions. */
16394
16395 #ifndef SYSV386_COMPAT
16396 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16397 wants to fix the assemblers because that causes incompatibility
16398 with gcc. No-one wants to fix gcc because that causes
16399 incompatibility with assemblers... You can use the option of
16400 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16401 #define SYSV386_COMPAT 1
16402 #endif
16403
16404 const char *
16405 output_387_binary_op (rtx insn, rtx *operands)
16406 {
16407 static char buf[40];
16408 const char *p;
16409 const char *ssep;
16410 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16411
16412 #ifdef ENABLE_CHECKING
16413 /* Even if we do not want to check the inputs, this documents input
16414 constraints. Which helps in understanding the following code. */
16415 if (STACK_REG_P (operands[0])
16416 && ((REG_P (operands[1])
16417 && REGNO (operands[0]) == REGNO (operands[1])
16418 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16419 || (REG_P (operands[2])
16420 && REGNO (operands[0]) == REGNO (operands[2])
16421 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16422 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16423 ; /* ok */
16424 else
16425 gcc_assert (is_sse);
16426 #endif
16427
16428 switch (GET_CODE (operands[3]))
16429 {
16430 case PLUS:
16431 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16432 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16433 p = "fiadd";
16434 else
16435 p = "fadd";
16436 ssep = "vadd";
16437 break;
16438
16439 case MINUS:
16440 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16441 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16442 p = "fisub";
16443 else
16444 p = "fsub";
16445 ssep = "vsub";
16446 break;
16447
16448 case MULT:
16449 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16450 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16451 p = "fimul";
16452 else
16453 p = "fmul";
16454 ssep = "vmul";
16455 break;
16456
16457 case DIV:
16458 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16459 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16460 p = "fidiv";
16461 else
16462 p = "fdiv";
16463 ssep = "vdiv";
16464 break;
16465
16466 default:
16467 gcc_unreachable ();
16468 }
16469
16470 if (is_sse)
16471 {
16472 if (TARGET_AVX)
16473 {
16474 strcpy (buf, ssep);
16475 if (GET_MODE (operands[0]) == SFmode)
16476 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16477 else
16478 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16479 }
16480 else
16481 {
16482 strcpy (buf, ssep + 1);
16483 if (GET_MODE (operands[0]) == SFmode)
16484 strcat (buf, "ss\t{%2, %0|%0, %2}");
16485 else
16486 strcat (buf, "sd\t{%2, %0|%0, %2}");
16487 }
16488 return buf;
16489 }
16490 strcpy (buf, p);
16491
16492 switch (GET_CODE (operands[3]))
16493 {
16494 case MULT:
16495 case PLUS:
16496 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16497 std::swap (operands[1], operands[2]);
16498
16499 /* know operands[0] == operands[1]. */
16500
16501 if (MEM_P (operands[2]))
16502 {
16503 p = "%Z2\t%2";
16504 break;
16505 }
16506
16507 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16508 {
16509 if (STACK_TOP_P (operands[0]))
16510 /* How is it that we are storing to a dead operand[2]?
16511 Well, presumably operands[1] is dead too. We can't
16512 store the result to st(0) as st(0) gets popped on this
16513 instruction. Instead store to operands[2] (which I
16514 think has to be st(1)). st(1) will be popped later.
16515 gcc <= 2.8.1 didn't have this check and generated
16516 assembly code that the Unixware assembler rejected. */
16517 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16518 else
16519 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16520 break;
16521 }
16522
16523 if (STACK_TOP_P (operands[0]))
16524 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16525 else
16526 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16527 break;
16528
16529 case MINUS:
16530 case DIV:
16531 if (MEM_P (operands[1]))
16532 {
16533 p = "r%Z1\t%1";
16534 break;
16535 }
16536
16537 if (MEM_P (operands[2]))
16538 {
16539 p = "%Z2\t%2";
16540 break;
16541 }
16542
16543 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16544 {
16545 #if SYSV386_COMPAT
16546 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16547 derived assemblers, confusingly reverse the direction of
16548 the operation for fsub{r} and fdiv{r} when the
16549 destination register is not st(0). The Intel assembler
16550 doesn't have this brain damage. Read !SYSV386_COMPAT to
16551 figure out what the hardware really does. */
16552 if (STACK_TOP_P (operands[0]))
16553 p = "{p\t%0, %2|rp\t%2, %0}";
16554 else
16555 p = "{rp\t%2, %0|p\t%0, %2}";
16556 #else
16557 if (STACK_TOP_P (operands[0]))
16558 /* As above for fmul/fadd, we can't store to st(0). */
16559 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16560 else
16561 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16562 #endif
16563 break;
16564 }
16565
16566 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16567 {
16568 #if SYSV386_COMPAT
16569 if (STACK_TOP_P (operands[0]))
16570 p = "{rp\t%0, %1|p\t%1, %0}";
16571 else
16572 p = "{p\t%1, %0|rp\t%0, %1}";
16573 #else
16574 if (STACK_TOP_P (operands[0]))
16575 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16576 else
16577 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16578 #endif
16579 break;
16580 }
16581
16582 if (STACK_TOP_P (operands[0]))
16583 {
16584 if (STACK_TOP_P (operands[1]))
16585 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16586 else
16587 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16588 break;
16589 }
16590 else if (STACK_TOP_P (operands[1]))
16591 {
16592 #if SYSV386_COMPAT
16593 p = "{\t%1, %0|r\t%0, %1}";
16594 #else
16595 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16596 #endif
16597 }
16598 else
16599 {
16600 #if SYSV386_COMPAT
16601 p = "{r\t%2, %0|\t%0, %2}";
16602 #else
16603 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16604 #endif
16605 }
16606 break;
16607
16608 default:
16609 gcc_unreachable ();
16610 }
16611
16612 strcat (buf, p);
16613 return buf;
16614 }
16615
16616 /* Check if a 256bit AVX register is referenced inside of EXP. */
16617
16618 static bool
16619 ix86_check_avx256_register (const_rtx exp)
16620 {
16621 if (GET_CODE (exp) == SUBREG)
16622 exp = SUBREG_REG (exp);
16623
16624 return (REG_P (exp)
16625 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16626 }
16627
16628 /* Return needed mode for entity in optimize_mode_switching pass. */
16629
16630 static int
16631 ix86_avx_u128_mode_needed (rtx_insn *insn)
16632 {
16633 if (CALL_P (insn))
16634 {
16635 rtx link;
16636
16637 /* Needed mode is set to AVX_U128_CLEAN if there are
16638 no 256bit modes used in function arguments. */
16639 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16640 link;
16641 link = XEXP (link, 1))
16642 {
16643 if (GET_CODE (XEXP (link, 0)) == USE)
16644 {
16645 rtx arg = XEXP (XEXP (link, 0), 0);
16646
16647 if (ix86_check_avx256_register (arg))
16648 return AVX_U128_DIRTY;
16649 }
16650 }
16651
16652 return AVX_U128_CLEAN;
16653 }
16654
16655 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16656 changes state only when a 256bit register is written to, but we need
16657 to prevent the compiler from moving optimal insertion point above
16658 eventual read from 256bit register. */
16659 subrtx_iterator::array_type array;
16660 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16661 if (ix86_check_avx256_register (*iter))
16662 return AVX_U128_DIRTY;
16663
16664 return AVX_U128_ANY;
16665 }
16666
16667 /* Return mode that i387 must be switched into
16668 prior to the execution of insn. */
16669
16670 static int
16671 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16672 {
16673 enum attr_i387_cw mode;
16674
16675 /* The mode UNINITIALIZED is used to store control word after a
16676 function call or ASM pattern. The mode ANY specify that function
16677 has no requirements on the control word and make no changes in the
16678 bits we are interested in. */
16679
16680 if (CALL_P (insn)
16681 || (NONJUMP_INSN_P (insn)
16682 && (asm_noperands (PATTERN (insn)) >= 0
16683 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16684 return I387_CW_UNINITIALIZED;
16685
16686 if (recog_memoized (insn) < 0)
16687 return I387_CW_ANY;
16688
16689 mode = get_attr_i387_cw (insn);
16690
16691 switch (entity)
16692 {
16693 case I387_TRUNC:
16694 if (mode == I387_CW_TRUNC)
16695 return mode;
16696 break;
16697
16698 case I387_FLOOR:
16699 if (mode == I387_CW_FLOOR)
16700 return mode;
16701 break;
16702
16703 case I387_CEIL:
16704 if (mode == I387_CW_CEIL)
16705 return mode;
16706 break;
16707
16708 case I387_MASK_PM:
16709 if (mode == I387_CW_MASK_PM)
16710 return mode;
16711 break;
16712
16713 default:
16714 gcc_unreachable ();
16715 }
16716
16717 return I387_CW_ANY;
16718 }
16719
16720 /* Return mode that entity must be switched into
16721 prior to the execution of insn. */
16722
16723 static int
16724 ix86_mode_needed (int entity, rtx_insn *insn)
16725 {
16726 switch (entity)
16727 {
16728 case AVX_U128:
16729 return ix86_avx_u128_mode_needed (insn);
16730 case I387_TRUNC:
16731 case I387_FLOOR:
16732 case I387_CEIL:
16733 case I387_MASK_PM:
16734 return ix86_i387_mode_needed (entity, insn);
16735 default:
16736 gcc_unreachable ();
16737 }
16738 return 0;
16739 }
16740
16741 /* Check if a 256bit AVX register is referenced in stores. */
16742
16743 static void
16744 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16745 {
16746 if (ix86_check_avx256_register (dest))
16747 {
16748 bool *used = (bool *) data;
16749 *used = true;
16750 }
16751 }
16752
16753 /* Calculate mode of upper 128bit AVX registers after the insn. */
16754
16755 static int
16756 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16757 {
16758 rtx pat = PATTERN (insn);
16759
16760 if (vzeroupper_operation (pat, VOIDmode)
16761 || vzeroall_operation (pat, VOIDmode))
16762 return AVX_U128_CLEAN;
16763
16764 /* We know that state is clean after CALL insn if there are no
16765 256bit registers used in the function return register. */
16766 if (CALL_P (insn))
16767 {
16768 bool avx_reg256_found = false;
16769 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16770
16771 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16772 }
16773
16774 /* Otherwise, return current mode. Remember that if insn
16775 references AVX 256bit registers, the mode was already changed
16776 to DIRTY from MODE_NEEDED. */
16777 return mode;
16778 }
16779
16780 /* Return the mode that an insn results in. */
16781
16782 static int
16783 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16784 {
16785 switch (entity)
16786 {
16787 case AVX_U128:
16788 return ix86_avx_u128_mode_after (mode, insn);
16789 case I387_TRUNC:
16790 case I387_FLOOR:
16791 case I387_CEIL:
16792 case I387_MASK_PM:
16793 return mode;
16794 default:
16795 gcc_unreachable ();
16796 }
16797 }
16798
16799 static int
16800 ix86_avx_u128_mode_entry (void)
16801 {
16802 tree arg;
16803
16804 /* Entry mode is set to AVX_U128_DIRTY if there are
16805 256bit modes used in function arguments. */
16806 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16807 arg = TREE_CHAIN (arg))
16808 {
16809 rtx incoming = DECL_INCOMING_RTL (arg);
16810
16811 if (incoming && ix86_check_avx256_register (incoming))
16812 return AVX_U128_DIRTY;
16813 }
16814
16815 return AVX_U128_CLEAN;
16816 }
16817
16818 /* Return a mode that ENTITY is assumed to be
16819 switched to at function entry. */
16820
16821 static int
16822 ix86_mode_entry (int entity)
16823 {
16824 switch (entity)
16825 {
16826 case AVX_U128:
16827 return ix86_avx_u128_mode_entry ();
16828 case I387_TRUNC:
16829 case I387_FLOOR:
16830 case I387_CEIL:
16831 case I387_MASK_PM:
16832 return I387_CW_ANY;
16833 default:
16834 gcc_unreachable ();
16835 }
16836 }
16837
16838 static int
16839 ix86_avx_u128_mode_exit (void)
16840 {
16841 rtx reg = crtl->return_rtx;
16842
16843 /* Exit mode is set to AVX_U128_DIRTY if there are
16844 256bit modes used in the function return register. */
16845 if (reg && ix86_check_avx256_register (reg))
16846 return AVX_U128_DIRTY;
16847
16848 return AVX_U128_CLEAN;
16849 }
16850
16851 /* Return a mode that ENTITY is assumed to be
16852 switched to at function exit. */
16853
16854 static int
16855 ix86_mode_exit (int entity)
16856 {
16857 switch (entity)
16858 {
16859 case AVX_U128:
16860 return ix86_avx_u128_mode_exit ();
16861 case I387_TRUNC:
16862 case I387_FLOOR:
16863 case I387_CEIL:
16864 case I387_MASK_PM:
16865 return I387_CW_ANY;
16866 default:
16867 gcc_unreachable ();
16868 }
16869 }
16870
16871 static int
16872 ix86_mode_priority (int, int n)
16873 {
16874 return n;
16875 }
16876
16877 /* Output code to initialize control word copies used by trunc?f?i and
16878 rounding patterns. CURRENT_MODE is set to current control word,
16879 while NEW_MODE is set to new control word. */
16880
16881 static void
16882 emit_i387_cw_initialization (int mode)
16883 {
16884 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16885 rtx new_mode;
16886
16887 enum ix86_stack_slot slot;
16888
16889 rtx reg = gen_reg_rtx (HImode);
16890
16891 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16892 emit_move_insn (reg, copy_rtx (stored_mode));
16893
16894 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16895 || optimize_insn_for_size_p ())
16896 {
16897 switch (mode)
16898 {
16899 case I387_CW_TRUNC:
16900 /* round toward zero (truncate) */
16901 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16902 slot = SLOT_CW_TRUNC;
16903 break;
16904
16905 case I387_CW_FLOOR:
16906 /* round down toward -oo */
16907 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16908 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16909 slot = SLOT_CW_FLOOR;
16910 break;
16911
16912 case I387_CW_CEIL:
16913 /* round up toward +oo */
16914 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16915 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16916 slot = SLOT_CW_CEIL;
16917 break;
16918
16919 case I387_CW_MASK_PM:
16920 /* mask precision exception for nearbyint() */
16921 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16922 slot = SLOT_CW_MASK_PM;
16923 break;
16924
16925 default:
16926 gcc_unreachable ();
16927 }
16928 }
16929 else
16930 {
16931 switch (mode)
16932 {
16933 case I387_CW_TRUNC:
16934 /* round toward zero (truncate) */
16935 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16936 slot = SLOT_CW_TRUNC;
16937 break;
16938
16939 case I387_CW_FLOOR:
16940 /* round down toward -oo */
16941 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16942 slot = SLOT_CW_FLOOR;
16943 break;
16944
16945 case I387_CW_CEIL:
16946 /* round up toward +oo */
16947 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16948 slot = SLOT_CW_CEIL;
16949 break;
16950
16951 case I387_CW_MASK_PM:
16952 /* mask precision exception for nearbyint() */
16953 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16954 slot = SLOT_CW_MASK_PM;
16955 break;
16956
16957 default:
16958 gcc_unreachable ();
16959 }
16960 }
16961
16962 gcc_assert (slot < MAX_386_STACK_LOCALS);
16963
16964 new_mode = assign_386_stack_local (HImode, slot);
16965 emit_move_insn (new_mode, reg);
16966 }
16967
16968 /* Emit vzeroupper. */
16969
16970 void
16971 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16972 {
16973 int i;
16974
16975 /* Cancel automatic vzeroupper insertion if there are
16976 live call-saved SSE registers at the insertion point. */
16977
16978 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16979 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16980 return;
16981
16982 if (TARGET_64BIT)
16983 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16984 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16985 return;
16986
16987 emit_insn (gen_avx_vzeroupper ());
16988 }
16989
16990 /* Generate one or more insns to set ENTITY to MODE. */
16991
16992 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16993 is the set of hard registers live at the point where the insn(s)
16994 are to be inserted. */
16995
16996 static void
16997 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16998 HARD_REG_SET regs_live)
16999 {
17000 switch (entity)
17001 {
17002 case AVX_U128:
17003 if (mode == AVX_U128_CLEAN)
17004 ix86_avx_emit_vzeroupper (regs_live);
17005 break;
17006 case I387_TRUNC:
17007 case I387_FLOOR:
17008 case I387_CEIL:
17009 case I387_MASK_PM:
17010 if (mode != I387_CW_ANY
17011 && mode != I387_CW_UNINITIALIZED)
17012 emit_i387_cw_initialization (mode);
17013 break;
17014 default:
17015 gcc_unreachable ();
17016 }
17017 }
17018
17019 /* Output code for INSN to convert a float to a signed int. OPERANDS
17020 are the insn operands. The output may be [HSD]Imode and the input
17021 operand may be [SDX]Fmode. */
17022
17023 const char *
17024 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17025 {
17026 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17027 int dimode_p = GET_MODE (operands[0]) == DImode;
17028 int round_mode = get_attr_i387_cw (insn);
17029
17030 /* Jump through a hoop or two for DImode, since the hardware has no
17031 non-popping instruction. We used to do this a different way, but
17032 that was somewhat fragile and broke with post-reload splitters. */
17033 if ((dimode_p || fisttp) && !stack_top_dies)
17034 output_asm_insn ("fld\t%y1", operands);
17035
17036 gcc_assert (STACK_TOP_P (operands[1]));
17037 gcc_assert (MEM_P (operands[0]));
17038 gcc_assert (GET_MODE (operands[1]) != TFmode);
17039
17040 if (fisttp)
17041 output_asm_insn ("fisttp%Z0\t%0", operands);
17042 else
17043 {
17044 if (round_mode != I387_CW_ANY)
17045 output_asm_insn ("fldcw\t%3", operands);
17046 if (stack_top_dies || dimode_p)
17047 output_asm_insn ("fistp%Z0\t%0", operands);
17048 else
17049 output_asm_insn ("fist%Z0\t%0", operands);
17050 if (round_mode != I387_CW_ANY)
17051 output_asm_insn ("fldcw\t%2", operands);
17052 }
17053
17054 return "";
17055 }
17056
17057 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17058 have the values zero or one, indicates the ffreep insn's operand
17059 from the OPERANDS array. */
17060
17061 static const char *
17062 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17063 {
17064 if (TARGET_USE_FFREEP)
17065 #ifdef HAVE_AS_IX86_FFREEP
17066 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17067 #else
17068 {
17069 static char retval[32];
17070 int regno = REGNO (operands[opno]);
17071
17072 gcc_assert (STACK_REGNO_P (regno));
17073
17074 regno -= FIRST_STACK_REG;
17075
17076 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17077 return retval;
17078 }
17079 #endif
17080
17081 return opno ? "fstp\t%y1" : "fstp\t%y0";
17082 }
17083
17084
17085 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17086 should be used. UNORDERED_P is true when fucom should be used. */
17087
17088 const char *
17089 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17090 {
17091 int stack_top_dies;
17092 rtx cmp_op0, cmp_op1;
17093 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17094
17095 if (eflags_p)
17096 {
17097 cmp_op0 = operands[0];
17098 cmp_op1 = operands[1];
17099 }
17100 else
17101 {
17102 cmp_op0 = operands[1];
17103 cmp_op1 = operands[2];
17104 }
17105
17106 if (is_sse)
17107 {
17108 if (GET_MODE (operands[0]) == SFmode)
17109 if (unordered_p)
17110 return "%vucomiss\t{%1, %0|%0, %1}";
17111 else
17112 return "%vcomiss\t{%1, %0|%0, %1}";
17113 else
17114 if (unordered_p)
17115 return "%vucomisd\t{%1, %0|%0, %1}";
17116 else
17117 return "%vcomisd\t{%1, %0|%0, %1}";
17118 }
17119
17120 gcc_assert (STACK_TOP_P (cmp_op0));
17121
17122 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17123
17124 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17125 {
17126 if (stack_top_dies)
17127 {
17128 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17129 return output_387_ffreep (operands, 1);
17130 }
17131 else
17132 return "ftst\n\tfnstsw\t%0";
17133 }
17134
17135 if (STACK_REG_P (cmp_op1)
17136 && stack_top_dies
17137 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17138 && REGNO (cmp_op1) != FIRST_STACK_REG)
17139 {
17140 /* If both the top of the 387 stack dies, and the other operand
17141 is also a stack register that dies, then this must be a
17142 `fcompp' float compare */
17143
17144 if (eflags_p)
17145 {
17146 /* There is no double popping fcomi variant. Fortunately,
17147 eflags is immune from the fstp's cc clobbering. */
17148 if (unordered_p)
17149 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17150 else
17151 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17152 return output_387_ffreep (operands, 0);
17153 }
17154 else
17155 {
17156 if (unordered_p)
17157 return "fucompp\n\tfnstsw\t%0";
17158 else
17159 return "fcompp\n\tfnstsw\t%0";
17160 }
17161 }
17162 else
17163 {
17164 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17165
17166 static const char * const alt[16] =
17167 {
17168 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17169 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17170 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17171 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17172
17173 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17174 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17175 NULL,
17176 NULL,
17177
17178 "fcomi\t{%y1, %0|%0, %y1}",
17179 "fcomip\t{%y1, %0|%0, %y1}",
17180 "fucomi\t{%y1, %0|%0, %y1}",
17181 "fucomip\t{%y1, %0|%0, %y1}",
17182
17183 NULL,
17184 NULL,
17185 NULL,
17186 NULL
17187 };
17188
17189 int mask;
17190 const char *ret;
17191
17192 mask = eflags_p << 3;
17193 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17194 mask |= unordered_p << 1;
17195 mask |= stack_top_dies;
17196
17197 gcc_assert (mask < 16);
17198 ret = alt[mask];
17199 gcc_assert (ret);
17200
17201 return ret;
17202 }
17203 }
17204
17205 void
17206 ix86_output_addr_vec_elt (FILE *file, int value)
17207 {
17208 const char *directive = ASM_LONG;
17209
17210 #ifdef ASM_QUAD
17211 if (TARGET_LP64)
17212 directive = ASM_QUAD;
17213 #else
17214 gcc_assert (!TARGET_64BIT);
17215 #endif
17216
17217 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17218 }
17219
17220 void
17221 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17222 {
17223 const char *directive = ASM_LONG;
17224
17225 #ifdef ASM_QUAD
17226 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17227 directive = ASM_QUAD;
17228 #else
17229 gcc_assert (!TARGET_64BIT);
17230 #endif
17231 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17232 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17233 fprintf (file, "%s%s%d-%s%d\n",
17234 directive, LPREFIX, value, LPREFIX, rel);
17235 else if (HAVE_AS_GOTOFF_IN_DATA)
17236 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17237 #if TARGET_MACHO
17238 else if (TARGET_MACHO)
17239 {
17240 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17241 machopic_output_function_base_name (file);
17242 putc ('\n', file);
17243 }
17244 #endif
17245 else
17246 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17247 GOT_SYMBOL_NAME, LPREFIX, value);
17248 }
17249 \f
17250 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17251 for the target. */
17252
17253 void
17254 ix86_expand_clear (rtx dest)
17255 {
17256 rtx tmp;
17257
17258 /* We play register width games, which are only valid after reload. */
17259 gcc_assert (reload_completed);
17260
17261 /* Avoid HImode and its attendant prefix byte. */
17262 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17263 dest = gen_rtx_REG (SImode, REGNO (dest));
17264 tmp = gen_rtx_SET (dest, const0_rtx);
17265
17266 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17267 {
17268 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17269 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17270 }
17271
17272 emit_insn (tmp);
17273 }
17274
17275 /* X is an unchanging MEM. If it is a constant pool reference, return
17276 the constant pool rtx, else NULL. */
17277
17278 rtx
17279 maybe_get_pool_constant (rtx x)
17280 {
17281 x = ix86_delegitimize_address (XEXP (x, 0));
17282
17283 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17284 return get_pool_constant (x);
17285
17286 return NULL_RTX;
17287 }
17288
17289 void
17290 ix86_expand_move (machine_mode mode, rtx operands[])
17291 {
17292 rtx op0, op1;
17293 enum tls_model model;
17294
17295 op0 = operands[0];
17296 op1 = operands[1];
17297
17298 if (GET_CODE (op1) == SYMBOL_REF)
17299 {
17300 rtx tmp;
17301
17302 model = SYMBOL_REF_TLS_MODEL (op1);
17303 if (model)
17304 {
17305 op1 = legitimize_tls_address (op1, model, true);
17306 op1 = force_operand (op1, op0);
17307 if (op1 == op0)
17308 return;
17309 op1 = convert_to_mode (mode, op1, 1);
17310 }
17311 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17312 op1 = tmp;
17313 }
17314 else if (GET_CODE (op1) == CONST
17315 && GET_CODE (XEXP (op1, 0)) == PLUS
17316 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17317 {
17318 rtx addend = XEXP (XEXP (op1, 0), 1);
17319 rtx symbol = XEXP (XEXP (op1, 0), 0);
17320 rtx tmp;
17321
17322 model = SYMBOL_REF_TLS_MODEL (symbol);
17323 if (model)
17324 tmp = legitimize_tls_address (symbol, model, true);
17325 else
17326 tmp = legitimize_pe_coff_symbol (symbol, true);
17327
17328 if (tmp)
17329 {
17330 tmp = force_operand (tmp, NULL);
17331 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17332 op0, 1, OPTAB_DIRECT);
17333 if (tmp == op0)
17334 return;
17335 op1 = convert_to_mode (mode, tmp, 1);
17336 }
17337 }
17338
17339 if ((flag_pic || MACHOPIC_INDIRECT)
17340 && symbolic_operand (op1, mode))
17341 {
17342 if (TARGET_MACHO && !TARGET_64BIT)
17343 {
17344 #if TARGET_MACHO
17345 /* dynamic-no-pic */
17346 if (MACHOPIC_INDIRECT)
17347 {
17348 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17349 ? op0 : gen_reg_rtx (Pmode);
17350 op1 = machopic_indirect_data_reference (op1, temp);
17351 if (MACHOPIC_PURE)
17352 op1 = machopic_legitimize_pic_address (op1, mode,
17353 temp == op1 ? 0 : temp);
17354 }
17355 if (op0 != op1 && GET_CODE (op0) != MEM)
17356 {
17357 rtx insn = gen_rtx_SET (op0, op1);
17358 emit_insn (insn);
17359 return;
17360 }
17361 if (GET_CODE (op0) == MEM)
17362 op1 = force_reg (Pmode, op1);
17363 else
17364 {
17365 rtx temp = op0;
17366 if (GET_CODE (temp) != REG)
17367 temp = gen_reg_rtx (Pmode);
17368 temp = legitimize_pic_address (op1, temp);
17369 if (temp == op0)
17370 return;
17371 op1 = temp;
17372 }
17373 /* dynamic-no-pic */
17374 #endif
17375 }
17376 else
17377 {
17378 if (MEM_P (op0))
17379 op1 = force_reg (mode, op1);
17380 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17381 {
17382 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17383 op1 = legitimize_pic_address (op1, reg);
17384 if (op0 == op1)
17385 return;
17386 op1 = convert_to_mode (mode, op1, 1);
17387 }
17388 }
17389 }
17390 else
17391 {
17392 if (MEM_P (op0)
17393 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17394 || !push_operand (op0, mode))
17395 && MEM_P (op1))
17396 op1 = force_reg (mode, op1);
17397
17398 if (push_operand (op0, mode)
17399 && ! general_no_elim_operand (op1, mode))
17400 op1 = copy_to_mode_reg (mode, op1);
17401
17402 /* Force large constants in 64bit compilation into register
17403 to get them CSEed. */
17404 if (can_create_pseudo_p ()
17405 && (mode == DImode) && TARGET_64BIT
17406 && immediate_operand (op1, mode)
17407 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17408 && !register_operand (op0, mode)
17409 && optimize)
17410 op1 = copy_to_mode_reg (mode, op1);
17411
17412 if (can_create_pseudo_p ()
17413 && CONST_DOUBLE_P (op1))
17414 {
17415 /* If we are loading a floating point constant to a register,
17416 force the value to memory now, since we'll get better code
17417 out the back end. */
17418
17419 op1 = validize_mem (force_const_mem (mode, op1));
17420 if (!register_operand (op0, mode))
17421 {
17422 rtx temp = gen_reg_rtx (mode);
17423 emit_insn (gen_rtx_SET (temp, op1));
17424 emit_move_insn (op0, temp);
17425 return;
17426 }
17427 }
17428 }
17429
17430 emit_insn (gen_rtx_SET (op0, op1));
17431 }
17432
17433 void
17434 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17435 {
17436 rtx op0 = operands[0], op1 = operands[1];
17437 unsigned int align = GET_MODE_ALIGNMENT (mode);
17438
17439 if (push_operand (op0, VOIDmode))
17440 op0 = emit_move_resolve_push (mode, op0);
17441
17442 /* Force constants other than zero into memory. We do not know how
17443 the instructions used to build constants modify the upper 64 bits
17444 of the register, once we have that information we may be able
17445 to handle some of them more efficiently. */
17446 if (can_create_pseudo_p ()
17447 && register_operand (op0, mode)
17448 && (CONSTANT_P (op1)
17449 || (GET_CODE (op1) == SUBREG
17450 && CONSTANT_P (SUBREG_REG (op1))))
17451 && !standard_sse_constant_p (op1))
17452 op1 = validize_mem (force_const_mem (mode, op1));
17453
17454 /* We need to check memory alignment for SSE mode since attribute
17455 can make operands unaligned. */
17456 if (can_create_pseudo_p ()
17457 && SSE_REG_MODE_P (mode)
17458 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17459 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17460 {
17461 rtx tmp[2];
17462
17463 /* ix86_expand_vector_move_misalign() does not like constants ... */
17464 if (CONSTANT_P (op1)
17465 || (GET_CODE (op1) == SUBREG
17466 && CONSTANT_P (SUBREG_REG (op1))))
17467 op1 = validize_mem (force_const_mem (mode, op1));
17468
17469 /* ... nor both arguments in memory. */
17470 if (!register_operand (op0, mode)
17471 && !register_operand (op1, mode))
17472 op1 = force_reg (mode, op1);
17473
17474 tmp[0] = op0; tmp[1] = op1;
17475 ix86_expand_vector_move_misalign (mode, tmp);
17476 return;
17477 }
17478
17479 /* Make operand1 a register if it isn't already. */
17480 if (can_create_pseudo_p ()
17481 && !register_operand (op0, mode)
17482 && !register_operand (op1, mode))
17483 {
17484 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17485 return;
17486 }
17487
17488 emit_insn (gen_rtx_SET (op0, op1));
17489 }
17490
17491 /* Split 32-byte AVX unaligned load and store if needed. */
17492
17493 static void
17494 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17495 {
17496 rtx m;
17497 rtx (*extract) (rtx, rtx, rtx);
17498 rtx (*load_unaligned) (rtx, rtx);
17499 rtx (*store_unaligned) (rtx, rtx);
17500 machine_mode mode;
17501
17502 switch (GET_MODE (op0))
17503 {
17504 default:
17505 gcc_unreachable ();
17506 case V32QImode:
17507 extract = gen_avx_vextractf128v32qi;
17508 load_unaligned = gen_avx_loaddquv32qi;
17509 store_unaligned = gen_avx_storedquv32qi;
17510 mode = V16QImode;
17511 break;
17512 case V8SFmode:
17513 extract = gen_avx_vextractf128v8sf;
17514 load_unaligned = gen_avx_loadups256;
17515 store_unaligned = gen_avx_storeups256;
17516 mode = V4SFmode;
17517 break;
17518 case V4DFmode:
17519 extract = gen_avx_vextractf128v4df;
17520 load_unaligned = gen_avx_loadupd256;
17521 store_unaligned = gen_avx_storeupd256;
17522 mode = V2DFmode;
17523 break;
17524 }
17525
17526 if (MEM_P (op1))
17527 {
17528 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17529 && optimize_insn_for_speed_p ())
17530 {
17531 rtx r = gen_reg_rtx (mode);
17532 m = adjust_address (op1, mode, 0);
17533 emit_move_insn (r, m);
17534 m = adjust_address (op1, mode, 16);
17535 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17536 emit_move_insn (op0, r);
17537 }
17538 /* Normal *mov<mode>_internal pattern will handle
17539 unaligned loads just fine if misaligned_operand
17540 is true, and without the UNSPEC it can be combined
17541 with arithmetic instructions. */
17542 else if (misaligned_operand (op1, GET_MODE (op1)))
17543 emit_insn (gen_rtx_SET (op0, op1));
17544 else
17545 emit_insn (load_unaligned (op0, op1));
17546 }
17547 else if (MEM_P (op0))
17548 {
17549 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17550 && optimize_insn_for_speed_p ())
17551 {
17552 m = adjust_address (op0, mode, 0);
17553 emit_insn (extract (m, op1, const0_rtx));
17554 m = adjust_address (op0, mode, 16);
17555 emit_insn (extract (m, op1, const1_rtx));
17556 }
17557 else
17558 emit_insn (store_unaligned (op0, op1));
17559 }
17560 else
17561 gcc_unreachable ();
17562 }
17563
17564 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17565 straight to ix86_expand_vector_move. */
17566 /* Code generation for scalar reg-reg moves of single and double precision data:
17567 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17568 movaps reg, reg
17569 else
17570 movss reg, reg
17571 if (x86_sse_partial_reg_dependency == true)
17572 movapd reg, reg
17573 else
17574 movsd reg, reg
17575
17576 Code generation for scalar loads of double precision data:
17577 if (x86_sse_split_regs == true)
17578 movlpd mem, reg (gas syntax)
17579 else
17580 movsd mem, reg
17581
17582 Code generation for unaligned packed loads of single precision data
17583 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17584 if (x86_sse_unaligned_move_optimal)
17585 movups mem, reg
17586
17587 if (x86_sse_partial_reg_dependency == true)
17588 {
17589 xorps reg, reg
17590 movlps mem, reg
17591 movhps mem+8, reg
17592 }
17593 else
17594 {
17595 movlps mem, reg
17596 movhps mem+8, reg
17597 }
17598
17599 Code generation for unaligned packed loads of double precision data
17600 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17601 if (x86_sse_unaligned_move_optimal)
17602 movupd mem, reg
17603
17604 if (x86_sse_split_regs == true)
17605 {
17606 movlpd mem, reg
17607 movhpd mem+8, reg
17608 }
17609 else
17610 {
17611 movsd mem, reg
17612 movhpd mem+8, reg
17613 }
17614 */
17615
17616 void
17617 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17618 {
17619 rtx op0, op1, orig_op0 = NULL_RTX, m;
17620 rtx (*load_unaligned) (rtx, rtx);
17621 rtx (*store_unaligned) (rtx, rtx);
17622
17623 op0 = operands[0];
17624 op1 = operands[1];
17625
17626 if (GET_MODE_SIZE (mode) == 64)
17627 {
17628 switch (GET_MODE_CLASS (mode))
17629 {
17630 case MODE_VECTOR_INT:
17631 case MODE_INT:
17632 if (GET_MODE (op0) != V16SImode)
17633 {
17634 if (!MEM_P (op0))
17635 {
17636 orig_op0 = op0;
17637 op0 = gen_reg_rtx (V16SImode);
17638 }
17639 else
17640 op0 = gen_lowpart (V16SImode, op0);
17641 }
17642 op1 = gen_lowpart (V16SImode, op1);
17643 /* FALLTHRU */
17644
17645 case MODE_VECTOR_FLOAT:
17646 switch (GET_MODE (op0))
17647 {
17648 default:
17649 gcc_unreachable ();
17650 case V16SImode:
17651 load_unaligned = gen_avx512f_loaddquv16si;
17652 store_unaligned = gen_avx512f_storedquv16si;
17653 break;
17654 case V16SFmode:
17655 load_unaligned = gen_avx512f_loadups512;
17656 store_unaligned = gen_avx512f_storeups512;
17657 break;
17658 case V8DFmode:
17659 load_unaligned = gen_avx512f_loadupd512;
17660 store_unaligned = gen_avx512f_storeupd512;
17661 break;
17662 }
17663
17664 if (MEM_P (op1))
17665 emit_insn (load_unaligned (op0, op1));
17666 else if (MEM_P (op0))
17667 emit_insn (store_unaligned (op0, op1));
17668 else
17669 gcc_unreachable ();
17670 if (orig_op0)
17671 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17672 break;
17673
17674 default:
17675 gcc_unreachable ();
17676 }
17677
17678 return;
17679 }
17680
17681 if (TARGET_AVX
17682 && GET_MODE_SIZE (mode) == 32)
17683 {
17684 switch (GET_MODE_CLASS (mode))
17685 {
17686 case MODE_VECTOR_INT:
17687 case MODE_INT:
17688 if (GET_MODE (op0) != V32QImode)
17689 {
17690 if (!MEM_P (op0))
17691 {
17692 orig_op0 = op0;
17693 op0 = gen_reg_rtx (V32QImode);
17694 }
17695 else
17696 op0 = gen_lowpart (V32QImode, op0);
17697 }
17698 op1 = gen_lowpart (V32QImode, op1);
17699 /* FALLTHRU */
17700
17701 case MODE_VECTOR_FLOAT:
17702 ix86_avx256_split_vector_move_misalign (op0, op1);
17703 if (orig_op0)
17704 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17705 break;
17706
17707 default:
17708 gcc_unreachable ();
17709 }
17710
17711 return;
17712 }
17713
17714 if (MEM_P (op1))
17715 {
17716 /* Normal *mov<mode>_internal pattern will handle
17717 unaligned loads just fine if misaligned_operand
17718 is true, and without the UNSPEC it can be combined
17719 with arithmetic instructions. */
17720 if (TARGET_AVX
17721 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17722 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17723 && misaligned_operand (op1, GET_MODE (op1)))
17724 emit_insn (gen_rtx_SET (op0, op1));
17725 /* ??? If we have typed data, then it would appear that using
17726 movdqu is the only way to get unaligned data loaded with
17727 integer type. */
17728 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17729 {
17730 if (GET_MODE (op0) != V16QImode)
17731 {
17732 orig_op0 = op0;
17733 op0 = gen_reg_rtx (V16QImode);
17734 }
17735 op1 = gen_lowpart (V16QImode, op1);
17736 /* We will eventually emit movups based on insn attributes. */
17737 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17738 if (orig_op0)
17739 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17740 }
17741 else if (TARGET_SSE2 && mode == V2DFmode)
17742 {
17743 rtx zero;
17744
17745 if (TARGET_AVX
17746 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17747 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17748 || optimize_insn_for_size_p ())
17749 {
17750 /* We will eventually emit movups based on insn attributes. */
17751 emit_insn (gen_sse2_loadupd (op0, op1));
17752 return;
17753 }
17754
17755 /* When SSE registers are split into halves, we can avoid
17756 writing to the top half twice. */
17757 if (TARGET_SSE_SPLIT_REGS)
17758 {
17759 emit_clobber (op0);
17760 zero = op0;
17761 }
17762 else
17763 {
17764 /* ??? Not sure about the best option for the Intel chips.
17765 The following would seem to satisfy; the register is
17766 entirely cleared, breaking the dependency chain. We
17767 then store to the upper half, with a dependency depth
17768 of one. A rumor has it that Intel recommends two movsd
17769 followed by an unpacklpd, but this is unconfirmed. And
17770 given that the dependency depth of the unpacklpd would
17771 still be one, I'm not sure why this would be better. */
17772 zero = CONST0_RTX (V2DFmode);
17773 }
17774
17775 m = adjust_address (op1, DFmode, 0);
17776 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17777 m = adjust_address (op1, DFmode, 8);
17778 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17779 }
17780 else
17781 {
17782 rtx t;
17783
17784 if (TARGET_AVX
17785 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17786 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17787 || optimize_insn_for_size_p ())
17788 {
17789 if (GET_MODE (op0) != V4SFmode)
17790 {
17791 orig_op0 = op0;
17792 op0 = gen_reg_rtx (V4SFmode);
17793 }
17794 op1 = gen_lowpart (V4SFmode, op1);
17795 emit_insn (gen_sse_loadups (op0, op1));
17796 if (orig_op0)
17797 emit_move_insn (orig_op0,
17798 gen_lowpart (GET_MODE (orig_op0), op0));
17799 return;
17800 }
17801
17802 if (mode != V4SFmode)
17803 t = gen_reg_rtx (V4SFmode);
17804 else
17805 t = op0;
17806
17807 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17808 emit_move_insn (t, CONST0_RTX (V4SFmode));
17809 else
17810 emit_clobber (t);
17811
17812 m = adjust_address (op1, V2SFmode, 0);
17813 emit_insn (gen_sse_loadlps (t, t, m));
17814 m = adjust_address (op1, V2SFmode, 8);
17815 emit_insn (gen_sse_loadhps (t, t, m));
17816 if (mode != V4SFmode)
17817 emit_move_insn (op0, gen_lowpart (mode, t));
17818 }
17819 }
17820 else if (MEM_P (op0))
17821 {
17822 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17823 {
17824 op0 = gen_lowpart (V16QImode, op0);
17825 op1 = gen_lowpart (V16QImode, op1);
17826 /* We will eventually emit movups based on insn attributes. */
17827 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17828 }
17829 else if (TARGET_SSE2 && mode == V2DFmode)
17830 {
17831 if (TARGET_AVX
17832 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17833 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17834 || optimize_insn_for_size_p ())
17835 /* We will eventually emit movups based on insn attributes. */
17836 emit_insn (gen_sse2_storeupd (op0, op1));
17837 else
17838 {
17839 m = adjust_address (op0, DFmode, 0);
17840 emit_insn (gen_sse2_storelpd (m, op1));
17841 m = adjust_address (op0, DFmode, 8);
17842 emit_insn (gen_sse2_storehpd (m, op1));
17843 }
17844 }
17845 else
17846 {
17847 if (mode != V4SFmode)
17848 op1 = gen_lowpart (V4SFmode, op1);
17849
17850 if (TARGET_AVX
17851 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17852 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17853 || optimize_insn_for_size_p ())
17854 {
17855 op0 = gen_lowpart (V4SFmode, op0);
17856 emit_insn (gen_sse_storeups (op0, op1));
17857 }
17858 else
17859 {
17860 m = adjust_address (op0, V2SFmode, 0);
17861 emit_insn (gen_sse_storelps (m, op1));
17862 m = adjust_address (op0, V2SFmode, 8);
17863 emit_insn (gen_sse_storehps (m, op1));
17864 }
17865 }
17866 }
17867 else
17868 gcc_unreachable ();
17869 }
17870
17871 /* Helper function of ix86_fixup_binary_operands to canonicalize
17872 operand order. Returns true if the operands should be swapped. */
17873
17874 static bool
17875 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17876 rtx operands[])
17877 {
17878 rtx dst = operands[0];
17879 rtx src1 = operands[1];
17880 rtx src2 = operands[2];
17881
17882 /* If the operation is not commutative, we can't do anything. */
17883 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17884 return false;
17885
17886 /* Highest priority is that src1 should match dst. */
17887 if (rtx_equal_p (dst, src1))
17888 return false;
17889 if (rtx_equal_p (dst, src2))
17890 return true;
17891
17892 /* Next highest priority is that immediate constants come second. */
17893 if (immediate_operand (src2, mode))
17894 return false;
17895 if (immediate_operand (src1, mode))
17896 return true;
17897
17898 /* Lowest priority is that memory references should come second. */
17899 if (MEM_P (src2))
17900 return false;
17901 if (MEM_P (src1))
17902 return true;
17903
17904 return false;
17905 }
17906
17907
17908 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17909 destination to use for the operation. If different from the true
17910 destination in operands[0], a copy operation will be required. */
17911
17912 rtx
17913 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17914 rtx operands[])
17915 {
17916 rtx dst = operands[0];
17917 rtx src1 = operands[1];
17918 rtx src2 = operands[2];
17919
17920 /* Canonicalize operand order. */
17921 if (ix86_swap_binary_operands_p (code, mode, operands))
17922 {
17923 /* It is invalid to swap operands of different modes. */
17924 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17925
17926 std::swap (src1, src2);
17927 }
17928
17929 /* Both source operands cannot be in memory. */
17930 if (MEM_P (src1) && MEM_P (src2))
17931 {
17932 /* Optimization: Only read from memory once. */
17933 if (rtx_equal_p (src1, src2))
17934 {
17935 src2 = force_reg (mode, src2);
17936 src1 = src2;
17937 }
17938 else if (rtx_equal_p (dst, src1))
17939 src2 = force_reg (mode, src2);
17940 else
17941 src1 = force_reg (mode, src1);
17942 }
17943
17944 /* If the destination is memory, and we do not have matching source
17945 operands, do things in registers. */
17946 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17947 dst = gen_reg_rtx (mode);
17948
17949 /* Source 1 cannot be a constant. */
17950 if (CONSTANT_P (src1))
17951 src1 = force_reg (mode, src1);
17952
17953 /* Source 1 cannot be a non-matching memory. */
17954 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17955 src1 = force_reg (mode, src1);
17956
17957 /* Improve address combine. */
17958 if (code == PLUS
17959 && GET_MODE_CLASS (mode) == MODE_INT
17960 && MEM_P (src2))
17961 src2 = force_reg (mode, src2);
17962
17963 operands[1] = src1;
17964 operands[2] = src2;
17965 return dst;
17966 }
17967
17968 /* Similarly, but assume that the destination has already been
17969 set up properly. */
17970
17971 void
17972 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17973 machine_mode mode, rtx operands[])
17974 {
17975 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17976 gcc_assert (dst == operands[0]);
17977 }
17978
17979 /* Attempt to expand a binary operator. Make the expansion closer to the
17980 actual machine, then just general_operand, which will allow 3 separate
17981 memory references (one output, two input) in a single insn. */
17982
17983 void
17984 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17985 rtx operands[])
17986 {
17987 rtx src1, src2, dst, op, clob;
17988
17989 dst = ix86_fixup_binary_operands (code, mode, operands);
17990 src1 = operands[1];
17991 src2 = operands[2];
17992
17993 /* Emit the instruction. */
17994
17995 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17996
17997 if (reload_completed
17998 && code == PLUS
17999 && !rtx_equal_p (dst, src1))
18000 {
18001 /* This is going to be an LEA; avoid splitting it later. */
18002 emit_insn (op);
18003 }
18004 else
18005 {
18006 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18008 }
18009
18010 /* Fix up the destination if needed. */
18011 if (dst != operands[0])
18012 emit_move_insn (operands[0], dst);
18013 }
18014
18015 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18016 the given OPERANDS. */
18017
18018 void
18019 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18020 rtx operands[])
18021 {
18022 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18023 if (GET_CODE (operands[1]) == SUBREG)
18024 {
18025 op1 = operands[1];
18026 op2 = operands[2];
18027 }
18028 else if (GET_CODE (operands[2]) == SUBREG)
18029 {
18030 op1 = operands[2];
18031 op2 = operands[1];
18032 }
18033 /* Optimize (__m128i) d | (__m128i) e and similar code
18034 when d and e are float vectors into float vector logical
18035 insn. In C/C++ without using intrinsics there is no other way
18036 to express vector logical operation on float vectors than
18037 to cast them temporarily to integer vectors. */
18038 if (op1
18039 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18040 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18041 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18042 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18043 && SUBREG_BYTE (op1) == 0
18044 && (GET_CODE (op2) == CONST_VECTOR
18045 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18046 && SUBREG_BYTE (op2) == 0))
18047 && can_create_pseudo_p ())
18048 {
18049 rtx dst;
18050 switch (GET_MODE (SUBREG_REG (op1)))
18051 {
18052 case V4SFmode:
18053 case V8SFmode:
18054 case V16SFmode:
18055 case V2DFmode:
18056 case V4DFmode:
18057 case V8DFmode:
18058 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18059 if (GET_CODE (op2) == CONST_VECTOR)
18060 {
18061 op2 = gen_lowpart (GET_MODE (dst), op2);
18062 op2 = force_reg (GET_MODE (dst), op2);
18063 }
18064 else
18065 {
18066 op1 = operands[1];
18067 op2 = SUBREG_REG (operands[2]);
18068 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18069 op2 = force_reg (GET_MODE (dst), op2);
18070 }
18071 op1 = SUBREG_REG (op1);
18072 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18073 op1 = force_reg (GET_MODE (dst), op1);
18074 emit_insn (gen_rtx_SET (dst,
18075 gen_rtx_fmt_ee (code, GET_MODE (dst),
18076 op1, op2)));
18077 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18078 return;
18079 default:
18080 break;
18081 }
18082 }
18083 if (!nonimmediate_operand (operands[1], mode))
18084 operands[1] = force_reg (mode, operands[1]);
18085 if (!nonimmediate_operand (operands[2], mode))
18086 operands[2] = force_reg (mode, operands[2]);
18087 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18088 emit_insn (gen_rtx_SET (operands[0],
18089 gen_rtx_fmt_ee (code, mode, operands[1],
18090 operands[2])));
18091 }
18092
18093 /* Return TRUE or FALSE depending on whether the binary operator meets the
18094 appropriate constraints. */
18095
18096 bool
18097 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18098 rtx operands[3])
18099 {
18100 rtx dst = operands[0];
18101 rtx src1 = operands[1];
18102 rtx src2 = operands[2];
18103
18104 /* Both source operands cannot be in memory. */
18105 if (MEM_P (src1) && MEM_P (src2))
18106 return false;
18107
18108 /* Canonicalize operand order for commutative operators. */
18109 if (ix86_swap_binary_operands_p (code, mode, operands))
18110 std::swap (src1, src2);
18111
18112 /* If the destination is memory, we must have a matching source operand. */
18113 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18114 return false;
18115
18116 /* Source 1 cannot be a constant. */
18117 if (CONSTANT_P (src1))
18118 return false;
18119
18120 /* Source 1 cannot be a non-matching memory. */
18121 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18122 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18123 return (code == AND
18124 && (mode == HImode
18125 || mode == SImode
18126 || (TARGET_64BIT && mode == DImode))
18127 && satisfies_constraint_L (src2));
18128
18129 return true;
18130 }
18131
18132 /* Attempt to expand a unary operator. Make the expansion closer to the
18133 actual machine, then just general_operand, which will allow 2 separate
18134 memory references (one output, one input) in a single insn. */
18135
18136 void
18137 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18138 rtx operands[])
18139 {
18140 bool matching_memory = false;
18141 rtx src, dst, op, clob;
18142
18143 dst = operands[0];
18144 src = operands[1];
18145
18146 /* If the destination is memory, and we do not have matching source
18147 operands, do things in registers. */
18148 if (MEM_P (dst))
18149 {
18150 if (rtx_equal_p (dst, src))
18151 matching_memory = true;
18152 else
18153 dst = gen_reg_rtx (mode);
18154 }
18155
18156 /* When source operand is memory, destination must match. */
18157 if (MEM_P (src) && !matching_memory)
18158 src = force_reg (mode, src);
18159
18160 /* Emit the instruction. */
18161
18162 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18163
18164 if (code == NOT)
18165 emit_insn (op);
18166 else
18167 {
18168 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18169 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18170 }
18171
18172 /* Fix up the destination if needed. */
18173 if (dst != operands[0])
18174 emit_move_insn (operands[0], dst);
18175 }
18176
18177 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18178 divisor are within the range [0-255]. */
18179
18180 void
18181 ix86_split_idivmod (machine_mode mode, rtx operands[],
18182 bool signed_p)
18183 {
18184 rtx_code_label *end_label, *qimode_label;
18185 rtx insn, div, mod;
18186 rtx scratch, tmp0, tmp1, tmp2;
18187 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18188 rtx (*gen_zero_extend) (rtx, rtx);
18189 rtx (*gen_test_ccno_1) (rtx, rtx);
18190
18191 switch (mode)
18192 {
18193 case SImode:
18194 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18195 gen_test_ccno_1 = gen_testsi_ccno_1;
18196 gen_zero_extend = gen_zero_extendqisi2;
18197 break;
18198 case DImode:
18199 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18200 gen_test_ccno_1 = gen_testdi_ccno_1;
18201 gen_zero_extend = gen_zero_extendqidi2;
18202 break;
18203 default:
18204 gcc_unreachable ();
18205 }
18206
18207 end_label = gen_label_rtx ();
18208 qimode_label = gen_label_rtx ();
18209
18210 scratch = gen_reg_rtx (mode);
18211
18212 /* Use 8bit unsigned divimod if dividend and divisor are within
18213 the range [0-255]. */
18214 emit_move_insn (scratch, operands[2]);
18215 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18216 scratch, 1, OPTAB_DIRECT);
18217 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18218 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18219 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18220 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18221 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18222 pc_rtx);
18223 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18224 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18225 JUMP_LABEL (insn) = qimode_label;
18226
18227 /* Generate original signed/unsigned divimod. */
18228 div = gen_divmod4_1 (operands[0], operands[1],
18229 operands[2], operands[3]);
18230 emit_insn (div);
18231
18232 /* Branch to the end. */
18233 emit_jump_insn (gen_jump (end_label));
18234 emit_barrier ();
18235
18236 /* Generate 8bit unsigned divide. */
18237 emit_label (qimode_label);
18238 /* Don't use operands[0] for result of 8bit divide since not all
18239 registers support QImode ZERO_EXTRACT. */
18240 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18241 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18242 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18243 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18244
18245 if (signed_p)
18246 {
18247 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18248 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18249 }
18250 else
18251 {
18252 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18253 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18254 }
18255
18256 /* Extract remainder from AH. */
18257 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18258 if (REG_P (operands[1]))
18259 insn = emit_move_insn (operands[1], tmp1);
18260 else
18261 {
18262 /* Need a new scratch register since the old one has result
18263 of 8bit divide. */
18264 scratch = gen_reg_rtx (mode);
18265 emit_move_insn (scratch, tmp1);
18266 insn = emit_move_insn (operands[1], scratch);
18267 }
18268 set_unique_reg_note (insn, REG_EQUAL, mod);
18269
18270 /* Zero extend quotient from AL. */
18271 tmp1 = gen_lowpart (QImode, tmp0);
18272 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18273 set_unique_reg_note (insn, REG_EQUAL, div);
18274
18275 emit_label (end_label);
18276 }
18277
18278 #define LEA_MAX_STALL (3)
18279 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18280
18281 /* Increase given DISTANCE in half-cycles according to
18282 dependencies between PREV and NEXT instructions.
18283 Add 1 half-cycle if there is no dependency and
18284 go to next cycle if there is some dependecy. */
18285
18286 static unsigned int
18287 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18288 {
18289 df_ref def, use;
18290
18291 if (!prev || !next)
18292 return distance + (distance & 1) + 2;
18293
18294 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18295 return distance + 1;
18296
18297 FOR_EACH_INSN_USE (use, next)
18298 FOR_EACH_INSN_DEF (def, prev)
18299 if (!DF_REF_IS_ARTIFICIAL (def)
18300 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18301 return distance + (distance & 1) + 2;
18302
18303 return distance + 1;
18304 }
18305
18306 /* Function checks if instruction INSN defines register number
18307 REGNO1 or REGNO2. */
18308
18309 static bool
18310 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18311 rtx_insn *insn)
18312 {
18313 df_ref def;
18314
18315 FOR_EACH_INSN_DEF (def, insn)
18316 if (DF_REF_REG_DEF_P (def)
18317 && !DF_REF_IS_ARTIFICIAL (def)
18318 && (regno1 == DF_REF_REGNO (def)
18319 || regno2 == DF_REF_REGNO (def)))
18320 return true;
18321
18322 return false;
18323 }
18324
18325 /* Function checks if instruction INSN uses register number
18326 REGNO as a part of address expression. */
18327
18328 static bool
18329 insn_uses_reg_mem (unsigned int regno, rtx insn)
18330 {
18331 df_ref use;
18332
18333 FOR_EACH_INSN_USE (use, insn)
18334 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18335 return true;
18336
18337 return false;
18338 }
18339
18340 /* Search backward for non-agu definition of register number REGNO1
18341 or register number REGNO2 in basic block starting from instruction
18342 START up to head of basic block or instruction INSN.
18343
18344 Function puts true value into *FOUND var if definition was found
18345 and false otherwise.
18346
18347 Distance in half-cycles between START and found instruction or head
18348 of BB is added to DISTANCE and returned. */
18349
18350 static int
18351 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18352 rtx_insn *insn, int distance,
18353 rtx_insn *start, bool *found)
18354 {
18355 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18356 rtx_insn *prev = start;
18357 rtx_insn *next = NULL;
18358
18359 *found = false;
18360
18361 while (prev
18362 && prev != insn
18363 && distance < LEA_SEARCH_THRESHOLD)
18364 {
18365 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18366 {
18367 distance = increase_distance (prev, next, distance);
18368 if (insn_defines_reg (regno1, regno2, prev))
18369 {
18370 if (recog_memoized (prev) < 0
18371 || get_attr_type (prev) != TYPE_LEA)
18372 {
18373 *found = true;
18374 return distance;
18375 }
18376 }
18377
18378 next = prev;
18379 }
18380 if (prev == BB_HEAD (bb))
18381 break;
18382
18383 prev = PREV_INSN (prev);
18384 }
18385
18386 return distance;
18387 }
18388
18389 /* Search backward for non-agu definition of register number REGNO1
18390 or register number REGNO2 in INSN's basic block until
18391 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18392 2. Reach neighbour BBs boundary, or
18393 3. Reach agu definition.
18394 Returns the distance between the non-agu definition point and INSN.
18395 If no definition point, returns -1. */
18396
18397 static int
18398 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18399 rtx_insn *insn)
18400 {
18401 basic_block bb = BLOCK_FOR_INSN (insn);
18402 int distance = 0;
18403 bool found = false;
18404
18405 if (insn != BB_HEAD (bb))
18406 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18407 distance, PREV_INSN (insn),
18408 &found);
18409
18410 if (!found && distance < LEA_SEARCH_THRESHOLD)
18411 {
18412 edge e;
18413 edge_iterator ei;
18414 bool simple_loop = false;
18415
18416 FOR_EACH_EDGE (e, ei, bb->preds)
18417 if (e->src == bb)
18418 {
18419 simple_loop = true;
18420 break;
18421 }
18422
18423 if (simple_loop)
18424 distance = distance_non_agu_define_in_bb (regno1, regno2,
18425 insn, distance,
18426 BB_END (bb), &found);
18427 else
18428 {
18429 int shortest_dist = -1;
18430 bool found_in_bb = false;
18431
18432 FOR_EACH_EDGE (e, ei, bb->preds)
18433 {
18434 int bb_dist
18435 = distance_non_agu_define_in_bb (regno1, regno2,
18436 insn, distance,
18437 BB_END (e->src),
18438 &found_in_bb);
18439 if (found_in_bb)
18440 {
18441 if (shortest_dist < 0)
18442 shortest_dist = bb_dist;
18443 else if (bb_dist > 0)
18444 shortest_dist = MIN (bb_dist, shortest_dist);
18445
18446 found = true;
18447 }
18448 }
18449
18450 distance = shortest_dist;
18451 }
18452 }
18453
18454 /* get_attr_type may modify recog data. We want to make sure
18455 that recog data is valid for instruction INSN, on which
18456 distance_non_agu_define is called. INSN is unchanged here. */
18457 extract_insn_cached (insn);
18458
18459 if (!found)
18460 return -1;
18461
18462 return distance >> 1;
18463 }
18464
18465 /* Return the distance in half-cycles between INSN and the next
18466 insn that uses register number REGNO in memory address added
18467 to DISTANCE. Return -1 if REGNO0 is set.
18468
18469 Put true value into *FOUND if register usage was found and
18470 false otherwise.
18471 Put true value into *REDEFINED if register redefinition was
18472 found and false otherwise. */
18473
18474 static int
18475 distance_agu_use_in_bb (unsigned int regno,
18476 rtx_insn *insn, int distance, rtx_insn *start,
18477 bool *found, bool *redefined)
18478 {
18479 basic_block bb = NULL;
18480 rtx_insn *next = start;
18481 rtx_insn *prev = NULL;
18482
18483 *found = false;
18484 *redefined = false;
18485
18486 if (start != NULL_RTX)
18487 {
18488 bb = BLOCK_FOR_INSN (start);
18489 if (start != BB_HEAD (bb))
18490 /* If insn and start belong to the same bb, set prev to insn,
18491 so the call to increase_distance will increase the distance
18492 between insns by 1. */
18493 prev = insn;
18494 }
18495
18496 while (next
18497 && next != insn
18498 && distance < LEA_SEARCH_THRESHOLD)
18499 {
18500 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18501 {
18502 distance = increase_distance(prev, next, distance);
18503 if (insn_uses_reg_mem (regno, next))
18504 {
18505 /* Return DISTANCE if OP0 is used in memory
18506 address in NEXT. */
18507 *found = true;
18508 return distance;
18509 }
18510
18511 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18512 {
18513 /* Return -1 if OP0 is set in NEXT. */
18514 *redefined = true;
18515 return -1;
18516 }
18517
18518 prev = next;
18519 }
18520
18521 if (next == BB_END (bb))
18522 break;
18523
18524 next = NEXT_INSN (next);
18525 }
18526
18527 return distance;
18528 }
18529
18530 /* Return the distance between INSN and the next insn that uses
18531 register number REGNO0 in memory address. Return -1 if no such
18532 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18533
18534 static int
18535 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18536 {
18537 basic_block bb = BLOCK_FOR_INSN (insn);
18538 int distance = 0;
18539 bool found = false;
18540 bool redefined = false;
18541
18542 if (insn != BB_END (bb))
18543 distance = distance_agu_use_in_bb (regno0, insn, distance,
18544 NEXT_INSN (insn),
18545 &found, &redefined);
18546
18547 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18548 {
18549 edge e;
18550 edge_iterator ei;
18551 bool simple_loop = false;
18552
18553 FOR_EACH_EDGE (e, ei, bb->succs)
18554 if (e->dest == bb)
18555 {
18556 simple_loop = true;
18557 break;
18558 }
18559
18560 if (simple_loop)
18561 distance = distance_agu_use_in_bb (regno0, insn,
18562 distance, BB_HEAD (bb),
18563 &found, &redefined);
18564 else
18565 {
18566 int shortest_dist = -1;
18567 bool found_in_bb = false;
18568 bool redefined_in_bb = false;
18569
18570 FOR_EACH_EDGE (e, ei, bb->succs)
18571 {
18572 int bb_dist
18573 = distance_agu_use_in_bb (regno0, insn,
18574 distance, BB_HEAD (e->dest),
18575 &found_in_bb, &redefined_in_bb);
18576 if (found_in_bb)
18577 {
18578 if (shortest_dist < 0)
18579 shortest_dist = bb_dist;
18580 else if (bb_dist > 0)
18581 shortest_dist = MIN (bb_dist, shortest_dist);
18582
18583 found = true;
18584 }
18585 }
18586
18587 distance = shortest_dist;
18588 }
18589 }
18590
18591 if (!found || redefined)
18592 return -1;
18593
18594 return distance >> 1;
18595 }
18596
18597 /* Define this macro to tune LEA priority vs ADD, it take effect when
18598 there is a dilemma of choicing LEA or ADD
18599 Negative value: ADD is more preferred than LEA
18600 Zero: Netrual
18601 Positive value: LEA is more preferred than ADD*/
18602 #define IX86_LEA_PRIORITY 0
18603
18604 /* Return true if usage of lea INSN has performance advantage
18605 over a sequence of instructions. Instructions sequence has
18606 SPLIT_COST cycles higher latency than lea latency. */
18607
18608 static bool
18609 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18610 unsigned int regno2, int split_cost, bool has_scale)
18611 {
18612 int dist_define, dist_use;
18613
18614 /* For Silvermont if using a 2-source or 3-source LEA for
18615 non-destructive destination purposes, or due to wanting
18616 ability to use SCALE, the use of LEA is justified. */
18617 if (TARGET_SILVERMONT || TARGET_INTEL)
18618 {
18619 if (has_scale)
18620 return true;
18621 if (split_cost < 1)
18622 return false;
18623 if (regno0 == regno1 || regno0 == regno2)
18624 return false;
18625 return true;
18626 }
18627
18628 dist_define = distance_non_agu_define (regno1, regno2, insn);
18629 dist_use = distance_agu_use (regno0, insn);
18630
18631 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18632 {
18633 /* If there is no non AGU operand definition, no AGU
18634 operand usage and split cost is 0 then both lea
18635 and non lea variants have same priority. Currently
18636 we prefer lea for 64 bit code and non lea on 32 bit
18637 code. */
18638 if (dist_use < 0 && split_cost == 0)
18639 return TARGET_64BIT || IX86_LEA_PRIORITY;
18640 else
18641 return true;
18642 }
18643
18644 /* With longer definitions distance lea is more preferable.
18645 Here we change it to take into account splitting cost and
18646 lea priority. */
18647 dist_define += split_cost + IX86_LEA_PRIORITY;
18648
18649 /* If there is no use in memory addess then we just check
18650 that split cost exceeds AGU stall. */
18651 if (dist_use < 0)
18652 return dist_define > LEA_MAX_STALL;
18653
18654 /* If this insn has both backward non-agu dependence and forward
18655 agu dependence, the one with short distance takes effect. */
18656 return dist_define >= dist_use;
18657 }
18658
18659 /* Return true if it is legal to clobber flags by INSN and
18660 false otherwise. */
18661
18662 static bool
18663 ix86_ok_to_clobber_flags (rtx_insn *insn)
18664 {
18665 basic_block bb = BLOCK_FOR_INSN (insn);
18666 df_ref use;
18667 bitmap live;
18668
18669 while (insn)
18670 {
18671 if (NONDEBUG_INSN_P (insn))
18672 {
18673 FOR_EACH_INSN_USE (use, insn)
18674 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18675 return false;
18676
18677 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18678 return true;
18679 }
18680
18681 if (insn == BB_END (bb))
18682 break;
18683
18684 insn = NEXT_INSN (insn);
18685 }
18686
18687 live = df_get_live_out(bb);
18688 return !REGNO_REG_SET_P (live, FLAGS_REG);
18689 }
18690
18691 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18692 move and add to avoid AGU stalls. */
18693
18694 bool
18695 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18696 {
18697 unsigned int regno0, regno1, regno2;
18698
18699 /* Check if we need to optimize. */
18700 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18701 return false;
18702
18703 /* Check it is correct to split here. */
18704 if (!ix86_ok_to_clobber_flags(insn))
18705 return false;
18706
18707 regno0 = true_regnum (operands[0]);
18708 regno1 = true_regnum (operands[1]);
18709 regno2 = true_regnum (operands[2]);
18710
18711 /* We need to split only adds with non destructive
18712 destination operand. */
18713 if (regno0 == regno1 || regno0 == regno2)
18714 return false;
18715 else
18716 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18717 }
18718
18719 /* Return true if we should emit lea instruction instead of mov
18720 instruction. */
18721
18722 bool
18723 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18724 {
18725 unsigned int regno0, regno1;
18726
18727 /* Check if we need to optimize. */
18728 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18729 return false;
18730
18731 /* Use lea for reg to reg moves only. */
18732 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18733 return false;
18734
18735 regno0 = true_regnum (operands[0]);
18736 regno1 = true_regnum (operands[1]);
18737
18738 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18739 }
18740
18741 /* Return true if we need to split lea into a sequence of
18742 instructions to avoid AGU stalls. */
18743
18744 bool
18745 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18746 {
18747 unsigned int regno0, regno1, regno2;
18748 int split_cost;
18749 struct ix86_address parts;
18750 int ok;
18751
18752 /* Check we need to optimize. */
18753 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18754 return false;
18755
18756 /* The "at least two components" test below might not catch simple
18757 move or zero extension insns if parts.base is non-NULL and parts.disp
18758 is const0_rtx as the only components in the address, e.g. if the
18759 register is %rbp or %r13. As this test is much cheaper and moves or
18760 zero extensions are the common case, do this check first. */
18761 if (REG_P (operands[1])
18762 || (SImode_address_operand (operands[1], VOIDmode)
18763 && REG_P (XEXP (operands[1], 0))))
18764 return false;
18765
18766 /* Check if it is OK to split here. */
18767 if (!ix86_ok_to_clobber_flags (insn))
18768 return false;
18769
18770 ok = ix86_decompose_address (operands[1], &parts);
18771 gcc_assert (ok);
18772
18773 /* There should be at least two components in the address. */
18774 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18775 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18776 return false;
18777
18778 /* We should not split into add if non legitimate pic
18779 operand is used as displacement. */
18780 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18781 return false;
18782
18783 regno0 = true_regnum (operands[0]) ;
18784 regno1 = INVALID_REGNUM;
18785 regno2 = INVALID_REGNUM;
18786
18787 if (parts.base)
18788 regno1 = true_regnum (parts.base);
18789 if (parts.index)
18790 regno2 = true_regnum (parts.index);
18791
18792 split_cost = 0;
18793
18794 /* Compute how many cycles we will add to execution time
18795 if split lea into a sequence of instructions. */
18796 if (parts.base || parts.index)
18797 {
18798 /* Have to use mov instruction if non desctructive
18799 destination form is used. */
18800 if (regno1 != regno0 && regno2 != regno0)
18801 split_cost += 1;
18802
18803 /* Have to add index to base if both exist. */
18804 if (parts.base && parts.index)
18805 split_cost += 1;
18806
18807 /* Have to use shift and adds if scale is 2 or greater. */
18808 if (parts.scale > 1)
18809 {
18810 if (regno0 != regno1)
18811 split_cost += 1;
18812 else if (regno2 == regno0)
18813 split_cost += 4;
18814 else
18815 split_cost += parts.scale;
18816 }
18817
18818 /* Have to use add instruction with immediate if
18819 disp is non zero. */
18820 if (parts.disp && parts.disp != const0_rtx)
18821 split_cost += 1;
18822
18823 /* Subtract the price of lea. */
18824 split_cost -= 1;
18825 }
18826
18827 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18828 parts.scale > 1);
18829 }
18830
18831 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18832 matches destination. RTX includes clobber of FLAGS_REG. */
18833
18834 static void
18835 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18836 rtx dst, rtx src)
18837 {
18838 rtx op, clob;
18839
18840 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18841 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18842
18843 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18844 }
18845
18846 /* Return true if regno1 def is nearest to the insn. */
18847
18848 static bool
18849 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18850 {
18851 rtx_insn *prev = insn;
18852 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18853
18854 if (insn == start)
18855 return false;
18856 while (prev && prev != start)
18857 {
18858 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18859 {
18860 prev = PREV_INSN (prev);
18861 continue;
18862 }
18863 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18864 return true;
18865 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18866 return false;
18867 prev = PREV_INSN (prev);
18868 }
18869
18870 /* None of the regs is defined in the bb. */
18871 return false;
18872 }
18873
18874 /* Split lea instructions into a sequence of instructions
18875 which are executed on ALU to avoid AGU stalls.
18876 It is assumed that it is allowed to clobber flags register
18877 at lea position. */
18878
18879 void
18880 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18881 {
18882 unsigned int regno0, regno1, regno2;
18883 struct ix86_address parts;
18884 rtx target, tmp;
18885 int ok, adds;
18886
18887 ok = ix86_decompose_address (operands[1], &parts);
18888 gcc_assert (ok);
18889
18890 target = gen_lowpart (mode, operands[0]);
18891
18892 regno0 = true_regnum (target);
18893 regno1 = INVALID_REGNUM;
18894 regno2 = INVALID_REGNUM;
18895
18896 if (parts.base)
18897 {
18898 parts.base = gen_lowpart (mode, parts.base);
18899 regno1 = true_regnum (parts.base);
18900 }
18901
18902 if (parts.index)
18903 {
18904 parts.index = gen_lowpart (mode, parts.index);
18905 regno2 = true_regnum (parts.index);
18906 }
18907
18908 if (parts.disp)
18909 parts.disp = gen_lowpart (mode, parts.disp);
18910
18911 if (parts.scale > 1)
18912 {
18913 /* Case r1 = r1 + ... */
18914 if (regno1 == regno0)
18915 {
18916 /* If we have a case r1 = r1 + C * r2 then we
18917 should use multiplication which is very
18918 expensive. Assume cost model is wrong if we
18919 have such case here. */
18920 gcc_assert (regno2 != regno0);
18921
18922 for (adds = parts.scale; adds > 0; adds--)
18923 ix86_emit_binop (PLUS, mode, target, parts.index);
18924 }
18925 else
18926 {
18927 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18928 if (regno0 != regno2)
18929 emit_insn (gen_rtx_SET (target, parts.index));
18930
18931 /* Use shift for scaling. */
18932 ix86_emit_binop (ASHIFT, mode, target,
18933 GEN_INT (exact_log2 (parts.scale)));
18934
18935 if (parts.base)
18936 ix86_emit_binop (PLUS, mode, target, parts.base);
18937
18938 if (parts.disp && parts.disp != const0_rtx)
18939 ix86_emit_binop (PLUS, mode, target, parts.disp);
18940 }
18941 }
18942 else if (!parts.base && !parts.index)
18943 {
18944 gcc_assert(parts.disp);
18945 emit_insn (gen_rtx_SET (target, parts.disp));
18946 }
18947 else
18948 {
18949 if (!parts.base)
18950 {
18951 if (regno0 != regno2)
18952 emit_insn (gen_rtx_SET (target, parts.index));
18953 }
18954 else if (!parts.index)
18955 {
18956 if (regno0 != regno1)
18957 emit_insn (gen_rtx_SET (target, parts.base));
18958 }
18959 else
18960 {
18961 if (regno0 == regno1)
18962 tmp = parts.index;
18963 else if (regno0 == regno2)
18964 tmp = parts.base;
18965 else
18966 {
18967 rtx tmp1;
18968
18969 /* Find better operand for SET instruction, depending
18970 on which definition is farther from the insn. */
18971 if (find_nearest_reg_def (insn, regno1, regno2))
18972 tmp = parts.index, tmp1 = parts.base;
18973 else
18974 tmp = parts.base, tmp1 = parts.index;
18975
18976 emit_insn (gen_rtx_SET (target, tmp));
18977
18978 if (parts.disp && parts.disp != const0_rtx)
18979 ix86_emit_binop (PLUS, mode, target, parts.disp);
18980
18981 ix86_emit_binop (PLUS, mode, target, tmp1);
18982 return;
18983 }
18984
18985 ix86_emit_binop (PLUS, mode, target, tmp);
18986 }
18987
18988 if (parts.disp && parts.disp != const0_rtx)
18989 ix86_emit_binop (PLUS, mode, target, parts.disp);
18990 }
18991 }
18992
18993 /* Return true if it is ok to optimize an ADD operation to LEA
18994 operation to avoid flag register consumation. For most processors,
18995 ADD is faster than LEA. For the processors like BONNELL, if the
18996 destination register of LEA holds an actual address which will be
18997 used soon, LEA is better and otherwise ADD is better. */
18998
18999 bool
19000 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19001 {
19002 unsigned int regno0 = true_regnum (operands[0]);
19003 unsigned int regno1 = true_regnum (operands[1]);
19004 unsigned int regno2 = true_regnum (operands[2]);
19005
19006 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19007 if (regno0 != regno1 && regno0 != regno2)
19008 return true;
19009
19010 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19011 return false;
19012
19013 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19014 }
19015
19016 /* Return true if destination reg of SET_BODY is shift count of
19017 USE_BODY. */
19018
19019 static bool
19020 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19021 {
19022 rtx set_dest;
19023 rtx shift_rtx;
19024 int i;
19025
19026 /* Retrieve destination of SET_BODY. */
19027 switch (GET_CODE (set_body))
19028 {
19029 case SET:
19030 set_dest = SET_DEST (set_body);
19031 if (!set_dest || !REG_P (set_dest))
19032 return false;
19033 break;
19034 case PARALLEL:
19035 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19036 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19037 use_body))
19038 return true;
19039 default:
19040 return false;
19041 break;
19042 }
19043
19044 /* Retrieve shift count of USE_BODY. */
19045 switch (GET_CODE (use_body))
19046 {
19047 case SET:
19048 shift_rtx = XEXP (use_body, 1);
19049 break;
19050 case PARALLEL:
19051 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19052 if (ix86_dep_by_shift_count_body (set_body,
19053 XVECEXP (use_body, 0, i)))
19054 return true;
19055 default:
19056 return false;
19057 break;
19058 }
19059
19060 if (shift_rtx
19061 && (GET_CODE (shift_rtx) == ASHIFT
19062 || GET_CODE (shift_rtx) == LSHIFTRT
19063 || GET_CODE (shift_rtx) == ASHIFTRT
19064 || GET_CODE (shift_rtx) == ROTATE
19065 || GET_CODE (shift_rtx) == ROTATERT))
19066 {
19067 rtx shift_count = XEXP (shift_rtx, 1);
19068
19069 /* Return true if shift count is dest of SET_BODY. */
19070 if (REG_P (shift_count))
19071 {
19072 /* Add check since it can be invoked before register
19073 allocation in pre-reload schedule. */
19074 if (reload_completed
19075 && true_regnum (set_dest) == true_regnum (shift_count))
19076 return true;
19077 else if (REGNO(set_dest) == REGNO(shift_count))
19078 return true;
19079 }
19080 }
19081
19082 return false;
19083 }
19084
19085 /* Return true if destination reg of SET_INSN is shift count of
19086 USE_INSN. */
19087
19088 bool
19089 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19090 {
19091 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19092 PATTERN (use_insn));
19093 }
19094
19095 /* Return TRUE or FALSE depending on whether the unary operator meets the
19096 appropriate constraints. */
19097
19098 bool
19099 ix86_unary_operator_ok (enum rtx_code,
19100 machine_mode,
19101 rtx operands[2])
19102 {
19103 /* If one of operands is memory, source and destination must match. */
19104 if ((MEM_P (operands[0])
19105 || MEM_P (operands[1]))
19106 && ! rtx_equal_p (operands[0], operands[1]))
19107 return false;
19108 return true;
19109 }
19110
19111 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19112 are ok, keeping in mind the possible movddup alternative. */
19113
19114 bool
19115 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19116 {
19117 if (MEM_P (operands[0]))
19118 return rtx_equal_p (operands[0], operands[1 + high]);
19119 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19120 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19121 return true;
19122 }
19123
19124 /* Post-reload splitter for converting an SF or DFmode value in an
19125 SSE register into an unsigned SImode. */
19126
19127 void
19128 ix86_split_convert_uns_si_sse (rtx operands[])
19129 {
19130 machine_mode vecmode;
19131 rtx value, large, zero_or_two31, input, two31, x;
19132
19133 large = operands[1];
19134 zero_or_two31 = operands[2];
19135 input = operands[3];
19136 two31 = operands[4];
19137 vecmode = GET_MODE (large);
19138 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19139
19140 /* Load up the value into the low element. We must ensure that the other
19141 elements are valid floats -- zero is the easiest such value. */
19142 if (MEM_P (input))
19143 {
19144 if (vecmode == V4SFmode)
19145 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19146 else
19147 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19148 }
19149 else
19150 {
19151 input = gen_rtx_REG (vecmode, REGNO (input));
19152 emit_move_insn (value, CONST0_RTX (vecmode));
19153 if (vecmode == V4SFmode)
19154 emit_insn (gen_sse_movss (value, value, input));
19155 else
19156 emit_insn (gen_sse2_movsd (value, value, input));
19157 }
19158
19159 emit_move_insn (large, two31);
19160 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19161
19162 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19163 emit_insn (gen_rtx_SET (large, x));
19164
19165 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19166 emit_insn (gen_rtx_SET (zero_or_two31, x));
19167
19168 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19169 emit_insn (gen_rtx_SET (value, x));
19170
19171 large = gen_rtx_REG (V4SImode, REGNO (large));
19172 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19173
19174 x = gen_rtx_REG (V4SImode, REGNO (value));
19175 if (vecmode == V4SFmode)
19176 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19177 else
19178 emit_insn (gen_sse2_cvttpd2dq (x, value));
19179 value = x;
19180
19181 emit_insn (gen_xorv4si3 (value, value, large));
19182 }
19183
19184 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19185 Expects the 64-bit DImode to be supplied in a pair of integral
19186 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19187 -mfpmath=sse, !optimize_size only. */
19188
19189 void
19190 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19191 {
19192 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19193 rtx int_xmm, fp_xmm;
19194 rtx biases, exponents;
19195 rtx x;
19196
19197 int_xmm = gen_reg_rtx (V4SImode);
19198 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19199 emit_insn (gen_movdi_to_sse (int_xmm, input));
19200 else if (TARGET_SSE_SPLIT_REGS)
19201 {
19202 emit_clobber (int_xmm);
19203 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19204 }
19205 else
19206 {
19207 x = gen_reg_rtx (V2DImode);
19208 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19209 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19210 }
19211
19212 x = gen_rtx_CONST_VECTOR (V4SImode,
19213 gen_rtvec (4, GEN_INT (0x43300000UL),
19214 GEN_INT (0x45300000UL),
19215 const0_rtx, const0_rtx));
19216 exponents = validize_mem (force_const_mem (V4SImode, x));
19217
19218 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19219 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19220
19221 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19222 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19223 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19224 (0x1.0p84 + double(fp_value_hi_xmm)).
19225 Note these exponents differ by 32. */
19226
19227 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19228
19229 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19230 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19231 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19232 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19233 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19234 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19235 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19236 biases = validize_mem (force_const_mem (V2DFmode, biases));
19237 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19238
19239 /* Add the upper and lower DFmode values together. */
19240 if (TARGET_SSE3)
19241 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19242 else
19243 {
19244 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19245 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19246 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19247 }
19248
19249 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19250 }
19251
19252 /* Not used, but eases macroization of patterns. */
19253 void
19254 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19255 {
19256 gcc_unreachable ();
19257 }
19258
19259 /* Convert an unsigned SImode value into a DFmode. Only currently used
19260 for SSE, but applicable anywhere. */
19261
19262 void
19263 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19264 {
19265 REAL_VALUE_TYPE TWO31r;
19266 rtx x, fp;
19267
19268 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19269 NULL, 1, OPTAB_DIRECT);
19270
19271 fp = gen_reg_rtx (DFmode);
19272 emit_insn (gen_floatsidf2 (fp, x));
19273
19274 real_ldexp (&TWO31r, &dconst1, 31);
19275 x = const_double_from_real_value (TWO31r, DFmode);
19276
19277 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19278 if (x != target)
19279 emit_move_insn (target, x);
19280 }
19281
19282 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19283 32-bit mode; otherwise we have a direct convert instruction. */
19284
19285 void
19286 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19287 {
19288 REAL_VALUE_TYPE TWO32r;
19289 rtx fp_lo, fp_hi, x;
19290
19291 fp_lo = gen_reg_rtx (DFmode);
19292 fp_hi = gen_reg_rtx (DFmode);
19293
19294 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19295
19296 real_ldexp (&TWO32r, &dconst1, 32);
19297 x = const_double_from_real_value (TWO32r, DFmode);
19298 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19299
19300 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19301
19302 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19303 0, OPTAB_DIRECT);
19304 if (x != target)
19305 emit_move_insn (target, x);
19306 }
19307
19308 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19309 For x86_32, -mfpmath=sse, !optimize_size only. */
19310 void
19311 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19312 {
19313 REAL_VALUE_TYPE ONE16r;
19314 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19315
19316 real_ldexp (&ONE16r, &dconst1, 16);
19317 x = const_double_from_real_value (ONE16r, SFmode);
19318 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19319 NULL, 0, OPTAB_DIRECT);
19320 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19321 NULL, 0, OPTAB_DIRECT);
19322 fp_hi = gen_reg_rtx (SFmode);
19323 fp_lo = gen_reg_rtx (SFmode);
19324 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19325 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19326 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19327 0, OPTAB_DIRECT);
19328 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19329 0, OPTAB_DIRECT);
19330 if (!rtx_equal_p (target, fp_hi))
19331 emit_move_insn (target, fp_hi);
19332 }
19333
19334 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19335 a vector of unsigned ints VAL to vector of floats TARGET. */
19336
19337 void
19338 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19339 {
19340 rtx tmp[8];
19341 REAL_VALUE_TYPE TWO16r;
19342 machine_mode intmode = GET_MODE (val);
19343 machine_mode fltmode = GET_MODE (target);
19344 rtx (*cvt) (rtx, rtx);
19345
19346 if (intmode == V4SImode)
19347 cvt = gen_floatv4siv4sf2;
19348 else
19349 cvt = gen_floatv8siv8sf2;
19350 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19351 tmp[0] = force_reg (intmode, tmp[0]);
19352 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19353 OPTAB_DIRECT);
19354 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19355 NULL_RTX, 1, OPTAB_DIRECT);
19356 tmp[3] = gen_reg_rtx (fltmode);
19357 emit_insn (cvt (tmp[3], tmp[1]));
19358 tmp[4] = gen_reg_rtx (fltmode);
19359 emit_insn (cvt (tmp[4], tmp[2]));
19360 real_ldexp (&TWO16r, &dconst1, 16);
19361 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19362 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19363 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19364 OPTAB_DIRECT);
19365 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19366 OPTAB_DIRECT);
19367 if (tmp[7] != target)
19368 emit_move_insn (target, tmp[7]);
19369 }
19370
19371 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19372 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19373 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19374 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19375
19376 rtx
19377 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19378 {
19379 REAL_VALUE_TYPE TWO31r;
19380 rtx two31r, tmp[4];
19381 machine_mode mode = GET_MODE (val);
19382 machine_mode scalarmode = GET_MODE_INNER (mode);
19383 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19384 rtx (*cmp) (rtx, rtx, rtx, rtx);
19385 int i;
19386
19387 for (i = 0; i < 3; i++)
19388 tmp[i] = gen_reg_rtx (mode);
19389 real_ldexp (&TWO31r, &dconst1, 31);
19390 two31r = const_double_from_real_value (TWO31r, scalarmode);
19391 two31r = ix86_build_const_vector (mode, 1, two31r);
19392 two31r = force_reg (mode, two31r);
19393 switch (mode)
19394 {
19395 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19396 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19397 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19398 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19399 default: gcc_unreachable ();
19400 }
19401 tmp[3] = gen_rtx_LE (mode, two31r, val);
19402 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19403 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19404 0, OPTAB_DIRECT);
19405 if (intmode == V4SImode || TARGET_AVX2)
19406 *xorp = expand_simple_binop (intmode, ASHIFT,
19407 gen_lowpart (intmode, tmp[0]),
19408 GEN_INT (31), NULL_RTX, 0,
19409 OPTAB_DIRECT);
19410 else
19411 {
19412 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19413 two31 = ix86_build_const_vector (intmode, 1, two31);
19414 *xorp = expand_simple_binop (intmode, AND,
19415 gen_lowpart (intmode, tmp[0]),
19416 two31, NULL_RTX, 0,
19417 OPTAB_DIRECT);
19418 }
19419 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19420 0, OPTAB_DIRECT);
19421 }
19422
19423 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19424 then replicate the value for all elements of the vector
19425 register. */
19426
19427 rtx
19428 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19429 {
19430 int i, n_elt;
19431 rtvec v;
19432 machine_mode scalar_mode;
19433
19434 switch (mode)
19435 {
19436 case V64QImode:
19437 case V32QImode:
19438 case V16QImode:
19439 case V32HImode:
19440 case V16HImode:
19441 case V8HImode:
19442 case V16SImode:
19443 case V8SImode:
19444 case V4SImode:
19445 case V8DImode:
19446 case V4DImode:
19447 case V2DImode:
19448 gcc_assert (vect);
19449 case V16SFmode:
19450 case V8SFmode:
19451 case V4SFmode:
19452 case V8DFmode:
19453 case V4DFmode:
19454 case V2DFmode:
19455 n_elt = GET_MODE_NUNITS (mode);
19456 v = rtvec_alloc (n_elt);
19457 scalar_mode = GET_MODE_INNER (mode);
19458
19459 RTVEC_ELT (v, 0) = value;
19460
19461 for (i = 1; i < n_elt; ++i)
19462 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19463
19464 return gen_rtx_CONST_VECTOR (mode, v);
19465
19466 default:
19467 gcc_unreachable ();
19468 }
19469 }
19470
19471 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19472 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19473 for an SSE register. If VECT is true, then replicate the mask for
19474 all elements of the vector register. If INVERT is true, then create
19475 a mask excluding the sign bit. */
19476
19477 rtx
19478 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19479 {
19480 machine_mode vec_mode, imode;
19481 wide_int w;
19482 rtx mask, v;
19483
19484 switch (mode)
19485 {
19486 case V16SImode:
19487 case V16SFmode:
19488 case V8SImode:
19489 case V4SImode:
19490 case V8SFmode:
19491 case V4SFmode:
19492 vec_mode = mode;
19493 mode = GET_MODE_INNER (mode);
19494 imode = SImode;
19495 break;
19496
19497 case V8DImode:
19498 case V4DImode:
19499 case V2DImode:
19500 case V8DFmode:
19501 case V4DFmode:
19502 case V2DFmode:
19503 vec_mode = mode;
19504 mode = GET_MODE_INNER (mode);
19505 imode = DImode;
19506 break;
19507
19508 case TImode:
19509 case TFmode:
19510 vec_mode = VOIDmode;
19511 imode = TImode;
19512 break;
19513
19514 default:
19515 gcc_unreachable ();
19516 }
19517
19518 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19519 GET_MODE_BITSIZE (mode));
19520 if (invert)
19521 w = wi::bit_not (w);
19522
19523 /* Force this value into the low part of a fp vector constant. */
19524 mask = immed_wide_int_const (w, imode);
19525 mask = gen_lowpart (mode, mask);
19526
19527 if (vec_mode == VOIDmode)
19528 return force_reg (mode, mask);
19529
19530 v = ix86_build_const_vector (vec_mode, vect, mask);
19531 return force_reg (vec_mode, v);
19532 }
19533
19534 /* Generate code for floating point ABS or NEG. */
19535
19536 void
19537 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19538 rtx operands[])
19539 {
19540 rtx mask, set, dst, src;
19541 bool use_sse = false;
19542 bool vector_mode = VECTOR_MODE_P (mode);
19543 machine_mode vmode = mode;
19544
19545 if (vector_mode)
19546 use_sse = true;
19547 else if (mode == TFmode)
19548 use_sse = true;
19549 else if (TARGET_SSE_MATH)
19550 {
19551 use_sse = SSE_FLOAT_MODE_P (mode);
19552 if (mode == SFmode)
19553 vmode = V4SFmode;
19554 else if (mode == DFmode)
19555 vmode = V2DFmode;
19556 }
19557
19558 /* NEG and ABS performed with SSE use bitwise mask operations.
19559 Create the appropriate mask now. */
19560 if (use_sse)
19561 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19562 else
19563 mask = NULL_RTX;
19564
19565 dst = operands[0];
19566 src = operands[1];
19567
19568 set = gen_rtx_fmt_e (code, mode, src);
19569 set = gen_rtx_SET (dst, set);
19570
19571 if (mask)
19572 {
19573 rtx use, clob;
19574 rtvec par;
19575
19576 use = gen_rtx_USE (VOIDmode, mask);
19577 if (vector_mode)
19578 par = gen_rtvec (2, set, use);
19579 else
19580 {
19581 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19582 par = gen_rtvec (3, set, use, clob);
19583 }
19584 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19585 }
19586 else
19587 emit_insn (set);
19588 }
19589
19590 /* Expand a copysign operation. Special case operand 0 being a constant. */
19591
19592 void
19593 ix86_expand_copysign (rtx operands[])
19594 {
19595 machine_mode mode, vmode;
19596 rtx dest, op0, op1, mask, nmask;
19597
19598 dest = operands[0];
19599 op0 = operands[1];
19600 op1 = operands[2];
19601
19602 mode = GET_MODE (dest);
19603
19604 if (mode == SFmode)
19605 vmode = V4SFmode;
19606 else if (mode == DFmode)
19607 vmode = V2DFmode;
19608 else
19609 vmode = mode;
19610
19611 if (CONST_DOUBLE_P (op0))
19612 {
19613 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19614
19615 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19616 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19617
19618 if (mode == SFmode || mode == DFmode)
19619 {
19620 if (op0 == CONST0_RTX (mode))
19621 op0 = CONST0_RTX (vmode);
19622 else
19623 {
19624 rtx v = ix86_build_const_vector (vmode, false, op0);
19625
19626 op0 = force_reg (vmode, v);
19627 }
19628 }
19629 else if (op0 != CONST0_RTX (mode))
19630 op0 = force_reg (mode, op0);
19631
19632 mask = ix86_build_signbit_mask (vmode, 0, 0);
19633
19634 if (mode == SFmode)
19635 copysign_insn = gen_copysignsf3_const;
19636 else if (mode == DFmode)
19637 copysign_insn = gen_copysigndf3_const;
19638 else
19639 copysign_insn = gen_copysigntf3_const;
19640
19641 emit_insn (copysign_insn (dest, op0, op1, mask));
19642 }
19643 else
19644 {
19645 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19646
19647 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19648 mask = ix86_build_signbit_mask (vmode, 0, 0);
19649
19650 if (mode == SFmode)
19651 copysign_insn = gen_copysignsf3_var;
19652 else if (mode == DFmode)
19653 copysign_insn = gen_copysigndf3_var;
19654 else
19655 copysign_insn = gen_copysigntf3_var;
19656
19657 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19658 }
19659 }
19660
19661 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19662 be a constant, and so has already been expanded into a vector constant. */
19663
19664 void
19665 ix86_split_copysign_const (rtx operands[])
19666 {
19667 machine_mode mode, vmode;
19668 rtx dest, op0, mask, x;
19669
19670 dest = operands[0];
19671 op0 = operands[1];
19672 mask = operands[3];
19673
19674 mode = GET_MODE (dest);
19675 vmode = GET_MODE (mask);
19676
19677 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19678 x = gen_rtx_AND (vmode, dest, mask);
19679 emit_insn (gen_rtx_SET (dest, x));
19680
19681 if (op0 != CONST0_RTX (vmode))
19682 {
19683 x = gen_rtx_IOR (vmode, dest, op0);
19684 emit_insn (gen_rtx_SET (dest, x));
19685 }
19686 }
19687
19688 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19689 so we have to do two masks. */
19690
19691 void
19692 ix86_split_copysign_var (rtx operands[])
19693 {
19694 machine_mode mode, vmode;
19695 rtx dest, scratch, op0, op1, mask, nmask, x;
19696
19697 dest = operands[0];
19698 scratch = operands[1];
19699 op0 = operands[2];
19700 op1 = operands[3];
19701 nmask = operands[4];
19702 mask = operands[5];
19703
19704 mode = GET_MODE (dest);
19705 vmode = GET_MODE (mask);
19706
19707 if (rtx_equal_p (op0, op1))
19708 {
19709 /* Shouldn't happen often (it's useless, obviously), but when it does
19710 we'd generate incorrect code if we continue below. */
19711 emit_move_insn (dest, op0);
19712 return;
19713 }
19714
19715 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19716 {
19717 gcc_assert (REGNO (op1) == REGNO (scratch));
19718
19719 x = gen_rtx_AND (vmode, scratch, mask);
19720 emit_insn (gen_rtx_SET (scratch, x));
19721
19722 dest = mask;
19723 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19724 x = gen_rtx_NOT (vmode, dest);
19725 x = gen_rtx_AND (vmode, x, op0);
19726 emit_insn (gen_rtx_SET (dest, x));
19727 }
19728 else
19729 {
19730 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19731 {
19732 x = gen_rtx_AND (vmode, scratch, mask);
19733 }
19734 else /* alternative 2,4 */
19735 {
19736 gcc_assert (REGNO (mask) == REGNO (scratch));
19737 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19738 x = gen_rtx_AND (vmode, scratch, op1);
19739 }
19740 emit_insn (gen_rtx_SET (scratch, x));
19741
19742 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19743 {
19744 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19745 x = gen_rtx_AND (vmode, dest, nmask);
19746 }
19747 else /* alternative 3,4 */
19748 {
19749 gcc_assert (REGNO (nmask) == REGNO (dest));
19750 dest = nmask;
19751 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19752 x = gen_rtx_AND (vmode, dest, op0);
19753 }
19754 emit_insn (gen_rtx_SET (dest, x));
19755 }
19756
19757 x = gen_rtx_IOR (vmode, dest, scratch);
19758 emit_insn (gen_rtx_SET (dest, x));
19759 }
19760
19761 /* Return TRUE or FALSE depending on whether the first SET in INSN
19762 has source and destination with matching CC modes, and that the
19763 CC mode is at least as constrained as REQ_MODE. */
19764
19765 bool
19766 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19767 {
19768 rtx set;
19769 machine_mode set_mode;
19770
19771 set = PATTERN (insn);
19772 if (GET_CODE (set) == PARALLEL)
19773 set = XVECEXP (set, 0, 0);
19774 gcc_assert (GET_CODE (set) == SET);
19775 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19776
19777 set_mode = GET_MODE (SET_DEST (set));
19778 switch (set_mode)
19779 {
19780 case CCNOmode:
19781 if (req_mode != CCNOmode
19782 && (req_mode != CCmode
19783 || XEXP (SET_SRC (set), 1) != const0_rtx))
19784 return false;
19785 break;
19786 case CCmode:
19787 if (req_mode == CCGCmode)
19788 return false;
19789 /* FALLTHRU */
19790 case CCGCmode:
19791 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19792 return false;
19793 /* FALLTHRU */
19794 case CCGOCmode:
19795 if (req_mode == CCZmode)
19796 return false;
19797 /* FALLTHRU */
19798 case CCZmode:
19799 break;
19800
19801 case CCAmode:
19802 case CCCmode:
19803 case CCOmode:
19804 case CCPmode:
19805 case CCSmode:
19806 if (set_mode != req_mode)
19807 return false;
19808 break;
19809
19810 default:
19811 gcc_unreachable ();
19812 }
19813
19814 return GET_MODE (SET_SRC (set)) == set_mode;
19815 }
19816
19817 /* Generate insn patterns to do an integer compare of OPERANDS. */
19818
19819 static rtx
19820 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19821 {
19822 machine_mode cmpmode;
19823 rtx tmp, flags;
19824
19825 cmpmode = SELECT_CC_MODE (code, op0, op1);
19826 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19827
19828 /* This is very simple, but making the interface the same as in the
19829 FP case makes the rest of the code easier. */
19830 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19831 emit_insn (gen_rtx_SET (flags, tmp));
19832
19833 /* Return the test that should be put into the flags user, i.e.
19834 the bcc, scc, or cmov instruction. */
19835 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19836 }
19837
19838 /* Figure out whether to use ordered or unordered fp comparisons.
19839 Return the appropriate mode to use. */
19840
19841 machine_mode
19842 ix86_fp_compare_mode (enum rtx_code)
19843 {
19844 /* ??? In order to make all comparisons reversible, we do all comparisons
19845 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19846 all forms trapping and nontrapping comparisons, we can make inequality
19847 comparisons trapping again, since it results in better code when using
19848 FCOM based compares. */
19849 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19850 }
19851
19852 machine_mode
19853 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19854 {
19855 machine_mode mode = GET_MODE (op0);
19856
19857 if (SCALAR_FLOAT_MODE_P (mode))
19858 {
19859 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19860 return ix86_fp_compare_mode (code);
19861 }
19862
19863 switch (code)
19864 {
19865 /* Only zero flag is needed. */
19866 case EQ: /* ZF=0 */
19867 case NE: /* ZF!=0 */
19868 return CCZmode;
19869 /* Codes needing carry flag. */
19870 case GEU: /* CF=0 */
19871 case LTU: /* CF=1 */
19872 /* Detect overflow checks. They need just the carry flag. */
19873 if (GET_CODE (op0) == PLUS
19874 && rtx_equal_p (op1, XEXP (op0, 0)))
19875 return CCCmode;
19876 else
19877 return CCmode;
19878 case GTU: /* CF=0 & ZF=0 */
19879 case LEU: /* CF=1 | ZF=1 */
19880 return CCmode;
19881 /* Codes possibly doable only with sign flag when
19882 comparing against zero. */
19883 case GE: /* SF=OF or SF=0 */
19884 case LT: /* SF<>OF or SF=1 */
19885 if (op1 == const0_rtx)
19886 return CCGOCmode;
19887 else
19888 /* For other cases Carry flag is not required. */
19889 return CCGCmode;
19890 /* Codes doable only with sign flag when comparing
19891 against zero, but we miss jump instruction for it
19892 so we need to use relational tests against overflow
19893 that thus needs to be zero. */
19894 case GT: /* ZF=0 & SF=OF */
19895 case LE: /* ZF=1 | SF<>OF */
19896 if (op1 == const0_rtx)
19897 return CCNOmode;
19898 else
19899 return CCGCmode;
19900 /* strcmp pattern do (use flags) and combine may ask us for proper
19901 mode. */
19902 case USE:
19903 return CCmode;
19904 default:
19905 gcc_unreachable ();
19906 }
19907 }
19908
19909 /* Return the fixed registers used for condition codes. */
19910
19911 static bool
19912 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19913 {
19914 *p1 = FLAGS_REG;
19915 *p2 = FPSR_REG;
19916 return true;
19917 }
19918
19919 /* If two condition code modes are compatible, return a condition code
19920 mode which is compatible with both. Otherwise, return
19921 VOIDmode. */
19922
19923 static machine_mode
19924 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19925 {
19926 if (m1 == m2)
19927 return m1;
19928
19929 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19930 return VOIDmode;
19931
19932 if ((m1 == CCGCmode && m2 == CCGOCmode)
19933 || (m1 == CCGOCmode && m2 == CCGCmode))
19934 return CCGCmode;
19935
19936 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19937 return m2;
19938 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19939 return m1;
19940
19941 switch (m1)
19942 {
19943 default:
19944 gcc_unreachable ();
19945
19946 case CCmode:
19947 case CCGCmode:
19948 case CCGOCmode:
19949 case CCNOmode:
19950 case CCAmode:
19951 case CCCmode:
19952 case CCOmode:
19953 case CCPmode:
19954 case CCSmode:
19955 case CCZmode:
19956 switch (m2)
19957 {
19958 default:
19959 return VOIDmode;
19960
19961 case CCmode:
19962 case CCGCmode:
19963 case CCGOCmode:
19964 case CCNOmode:
19965 case CCAmode:
19966 case CCCmode:
19967 case CCOmode:
19968 case CCPmode:
19969 case CCSmode:
19970 case CCZmode:
19971 return CCmode;
19972 }
19973
19974 case CCFPmode:
19975 case CCFPUmode:
19976 /* These are only compatible with themselves, which we already
19977 checked above. */
19978 return VOIDmode;
19979 }
19980 }
19981
19982
19983 /* Return a comparison we can do and that it is equivalent to
19984 swap_condition (code) apart possibly from orderedness.
19985 But, never change orderedness if TARGET_IEEE_FP, returning
19986 UNKNOWN in that case if necessary. */
19987
19988 static enum rtx_code
19989 ix86_fp_swap_condition (enum rtx_code code)
19990 {
19991 switch (code)
19992 {
19993 case GT: /* GTU - CF=0 & ZF=0 */
19994 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19995 case GE: /* GEU - CF=0 */
19996 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19997 case UNLT: /* LTU - CF=1 */
19998 return TARGET_IEEE_FP ? UNKNOWN : GT;
19999 case UNLE: /* LEU - CF=1 | ZF=1 */
20000 return TARGET_IEEE_FP ? UNKNOWN : GE;
20001 default:
20002 return swap_condition (code);
20003 }
20004 }
20005
20006 /* Return cost of comparison CODE using the best strategy for performance.
20007 All following functions do use number of instructions as a cost metrics.
20008 In future this should be tweaked to compute bytes for optimize_size and
20009 take into account performance of various instructions on various CPUs. */
20010
20011 static int
20012 ix86_fp_comparison_cost (enum rtx_code code)
20013 {
20014 int arith_cost;
20015
20016 /* The cost of code using bit-twiddling on %ah. */
20017 switch (code)
20018 {
20019 case UNLE:
20020 case UNLT:
20021 case LTGT:
20022 case GT:
20023 case GE:
20024 case UNORDERED:
20025 case ORDERED:
20026 case UNEQ:
20027 arith_cost = 4;
20028 break;
20029 case LT:
20030 case NE:
20031 case EQ:
20032 case UNGE:
20033 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20034 break;
20035 case LE:
20036 case UNGT:
20037 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20038 break;
20039 default:
20040 gcc_unreachable ();
20041 }
20042
20043 switch (ix86_fp_comparison_strategy (code))
20044 {
20045 case IX86_FPCMP_COMI:
20046 return arith_cost > 4 ? 3 : 2;
20047 case IX86_FPCMP_SAHF:
20048 return arith_cost > 4 ? 4 : 3;
20049 default:
20050 return arith_cost;
20051 }
20052 }
20053
20054 /* Return strategy to use for floating-point. We assume that fcomi is always
20055 preferrable where available, since that is also true when looking at size
20056 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20057
20058 enum ix86_fpcmp_strategy
20059 ix86_fp_comparison_strategy (enum rtx_code)
20060 {
20061 /* Do fcomi/sahf based test when profitable. */
20062
20063 if (TARGET_CMOVE)
20064 return IX86_FPCMP_COMI;
20065
20066 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20067 return IX86_FPCMP_SAHF;
20068
20069 return IX86_FPCMP_ARITH;
20070 }
20071
20072 /* Swap, force into registers, or otherwise massage the two operands
20073 to a fp comparison. The operands are updated in place; the new
20074 comparison code is returned. */
20075
20076 static enum rtx_code
20077 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20078 {
20079 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20080 rtx op0 = *pop0, op1 = *pop1;
20081 machine_mode op_mode = GET_MODE (op0);
20082 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20083
20084 /* All of the unordered compare instructions only work on registers.
20085 The same is true of the fcomi compare instructions. The XFmode
20086 compare instructions require registers except when comparing
20087 against zero or when converting operand 1 from fixed point to
20088 floating point. */
20089
20090 if (!is_sse
20091 && (fpcmp_mode == CCFPUmode
20092 || (op_mode == XFmode
20093 && ! (standard_80387_constant_p (op0) == 1
20094 || standard_80387_constant_p (op1) == 1)
20095 && GET_CODE (op1) != FLOAT)
20096 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20097 {
20098 op0 = force_reg (op_mode, op0);
20099 op1 = force_reg (op_mode, op1);
20100 }
20101 else
20102 {
20103 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20104 things around if they appear profitable, otherwise force op0
20105 into a register. */
20106
20107 if (standard_80387_constant_p (op0) == 0
20108 || (MEM_P (op0)
20109 && ! (standard_80387_constant_p (op1) == 0
20110 || MEM_P (op1))))
20111 {
20112 enum rtx_code new_code = ix86_fp_swap_condition (code);
20113 if (new_code != UNKNOWN)
20114 {
20115 std::swap (op0, op1);
20116 code = new_code;
20117 }
20118 }
20119
20120 if (!REG_P (op0))
20121 op0 = force_reg (op_mode, op0);
20122
20123 if (CONSTANT_P (op1))
20124 {
20125 int tmp = standard_80387_constant_p (op1);
20126 if (tmp == 0)
20127 op1 = validize_mem (force_const_mem (op_mode, op1));
20128 else if (tmp == 1)
20129 {
20130 if (TARGET_CMOVE)
20131 op1 = force_reg (op_mode, op1);
20132 }
20133 else
20134 op1 = force_reg (op_mode, op1);
20135 }
20136 }
20137
20138 /* Try to rearrange the comparison to make it cheaper. */
20139 if (ix86_fp_comparison_cost (code)
20140 > ix86_fp_comparison_cost (swap_condition (code))
20141 && (REG_P (op1) || can_create_pseudo_p ()))
20142 {
20143 std::swap (op0, op1);
20144 code = swap_condition (code);
20145 if (!REG_P (op0))
20146 op0 = force_reg (op_mode, op0);
20147 }
20148
20149 *pop0 = op0;
20150 *pop1 = op1;
20151 return code;
20152 }
20153
20154 /* Convert comparison codes we use to represent FP comparison to integer
20155 code that will result in proper branch. Return UNKNOWN if no such code
20156 is available. */
20157
20158 enum rtx_code
20159 ix86_fp_compare_code_to_integer (enum rtx_code code)
20160 {
20161 switch (code)
20162 {
20163 case GT:
20164 return GTU;
20165 case GE:
20166 return GEU;
20167 case ORDERED:
20168 case UNORDERED:
20169 return code;
20170 break;
20171 case UNEQ:
20172 return EQ;
20173 break;
20174 case UNLT:
20175 return LTU;
20176 break;
20177 case UNLE:
20178 return LEU;
20179 break;
20180 case LTGT:
20181 return NE;
20182 break;
20183 default:
20184 return UNKNOWN;
20185 }
20186 }
20187
20188 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20189
20190 static rtx
20191 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20192 {
20193 machine_mode fpcmp_mode, intcmp_mode;
20194 rtx tmp, tmp2;
20195
20196 fpcmp_mode = ix86_fp_compare_mode (code);
20197 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20198
20199 /* Do fcomi/sahf based test when profitable. */
20200 switch (ix86_fp_comparison_strategy (code))
20201 {
20202 case IX86_FPCMP_COMI:
20203 intcmp_mode = fpcmp_mode;
20204 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20205 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20206 emit_insn (tmp);
20207 break;
20208
20209 case IX86_FPCMP_SAHF:
20210 intcmp_mode = fpcmp_mode;
20211 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20212 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20213
20214 if (!scratch)
20215 scratch = gen_reg_rtx (HImode);
20216 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20217 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20218 break;
20219
20220 case IX86_FPCMP_ARITH:
20221 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20222 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20223 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20224 if (!scratch)
20225 scratch = gen_reg_rtx (HImode);
20226 emit_insn (gen_rtx_SET (scratch, tmp2));
20227
20228 /* In the unordered case, we have to check C2 for NaN's, which
20229 doesn't happen to work out to anything nice combination-wise.
20230 So do some bit twiddling on the value we've got in AH to come
20231 up with an appropriate set of condition codes. */
20232
20233 intcmp_mode = CCNOmode;
20234 switch (code)
20235 {
20236 case GT:
20237 case UNGT:
20238 if (code == GT || !TARGET_IEEE_FP)
20239 {
20240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20241 code = EQ;
20242 }
20243 else
20244 {
20245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20246 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20247 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20248 intcmp_mode = CCmode;
20249 code = GEU;
20250 }
20251 break;
20252 case LT:
20253 case UNLT:
20254 if (code == LT && TARGET_IEEE_FP)
20255 {
20256 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20257 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20258 intcmp_mode = CCmode;
20259 code = EQ;
20260 }
20261 else
20262 {
20263 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20264 code = NE;
20265 }
20266 break;
20267 case GE:
20268 case UNGE:
20269 if (code == GE || !TARGET_IEEE_FP)
20270 {
20271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20272 code = EQ;
20273 }
20274 else
20275 {
20276 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20277 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20278 code = NE;
20279 }
20280 break;
20281 case LE:
20282 case UNLE:
20283 if (code == LE && TARGET_IEEE_FP)
20284 {
20285 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20286 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20287 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20288 intcmp_mode = CCmode;
20289 code = LTU;
20290 }
20291 else
20292 {
20293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20294 code = NE;
20295 }
20296 break;
20297 case EQ:
20298 case UNEQ:
20299 if (code == EQ && TARGET_IEEE_FP)
20300 {
20301 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20302 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20303 intcmp_mode = CCmode;
20304 code = EQ;
20305 }
20306 else
20307 {
20308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20309 code = NE;
20310 }
20311 break;
20312 case NE:
20313 case LTGT:
20314 if (code == NE && TARGET_IEEE_FP)
20315 {
20316 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20317 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20318 GEN_INT (0x40)));
20319 code = NE;
20320 }
20321 else
20322 {
20323 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20324 code = EQ;
20325 }
20326 break;
20327
20328 case UNORDERED:
20329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20330 code = NE;
20331 break;
20332 case ORDERED:
20333 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20334 code = EQ;
20335 break;
20336
20337 default:
20338 gcc_unreachable ();
20339 }
20340 break;
20341
20342 default:
20343 gcc_unreachable();
20344 }
20345
20346 /* Return the test that should be put into the flags user, i.e.
20347 the bcc, scc, or cmov instruction. */
20348 return gen_rtx_fmt_ee (code, VOIDmode,
20349 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20350 const0_rtx);
20351 }
20352
20353 static rtx
20354 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20355 {
20356 rtx ret;
20357
20358 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20359 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20360
20361 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20362 {
20363 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20364 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20365 }
20366 else
20367 ret = ix86_expand_int_compare (code, op0, op1);
20368
20369 return ret;
20370 }
20371
20372 void
20373 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20374 {
20375 machine_mode mode = GET_MODE (op0);
20376 rtx tmp;
20377
20378 switch (mode)
20379 {
20380 case SFmode:
20381 case DFmode:
20382 case XFmode:
20383 case QImode:
20384 case HImode:
20385 case SImode:
20386 simple:
20387 tmp = ix86_expand_compare (code, op0, op1);
20388 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20389 gen_rtx_LABEL_REF (VOIDmode, label),
20390 pc_rtx);
20391 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20392 return;
20393
20394 case DImode:
20395 if (TARGET_64BIT)
20396 goto simple;
20397 case TImode:
20398 /* Expand DImode branch into multiple compare+branch. */
20399 {
20400 rtx lo[2], hi[2];
20401 rtx_code_label *label2;
20402 enum rtx_code code1, code2, code3;
20403 machine_mode submode;
20404
20405 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20406 {
20407 std::swap (op0, op1);
20408 code = swap_condition (code);
20409 }
20410
20411 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20412 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20413
20414 submode = mode == DImode ? SImode : DImode;
20415
20416 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20417 avoid two branches. This costs one extra insn, so disable when
20418 optimizing for size. */
20419
20420 if ((code == EQ || code == NE)
20421 && (!optimize_insn_for_size_p ()
20422 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20423 {
20424 rtx xor0, xor1;
20425
20426 xor1 = hi[0];
20427 if (hi[1] != const0_rtx)
20428 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20429 NULL_RTX, 0, OPTAB_WIDEN);
20430
20431 xor0 = lo[0];
20432 if (lo[1] != const0_rtx)
20433 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20434 NULL_RTX, 0, OPTAB_WIDEN);
20435
20436 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20437 NULL_RTX, 0, OPTAB_WIDEN);
20438
20439 ix86_expand_branch (code, tmp, const0_rtx, label);
20440 return;
20441 }
20442
20443 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20444 op1 is a constant and the low word is zero, then we can just
20445 examine the high word. Similarly for low word -1 and
20446 less-or-equal-than or greater-than. */
20447
20448 if (CONST_INT_P (hi[1]))
20449 switch (code)
20450 {
20451 case LT: case LTU: case GE: case GEU:
20452 if (lo[1] == const0_rtx)
20453 {
20454 ix86_expand_branch (code, hi[0], hi[1], label);
20455 return;
20456 }
20457 break;
20458 case LE: case LEU: case GT: case GTU:
20459 if (lo[1] == constm1_rtx)
20460 {
20461 ix86_expand_branch (code, hi[0], hi[1], label);
20462 return;
20463 }
20464 break;
20465 default:
20466 break;
20467 }
20468
20469 /* Otherwise, we need two or three jumps. */
20470
20471 label2 = gen_label_rtx ();
20472
20473 code1 = code;
20474 code2 = swap_condition (code);
20475 code3 = unsigned_condition (code);
20476
20477 switch (code)
20478 {
20479 case LT: case GT: case LTU: case GTU:
20480 break;
20481
20482 case LE: code1 = LT; code2 = GT; break;
20483 case GE: code1 = GT; code2 = LT; break;
20484 case LEU: code1 = LTU; code2 = GTU; break;
20485 case GEU: code1 = GTU; code2 = LTU; break;
20486
20487 case EQ: code1 = UNKNOWN; code2 = NE; break;
20488 case NE: code2 = UNKNOWN; break;
20489
20490 default:
20491 gcc_unreachable ();
20492 }
20493
20494 /*
20495 * a < b =>
20496 * if (hi(a) < hi(b)) goto true;
20497 * if (hi(a) > hi(b)) goto false;
20498 * if (lo(a) < lo(b)) goto true;
20499 * false:
20500 */
20501
20502 if (code1 != UNKNOWN)
20503 ix86_expand_branch (code1, hi[0], hi[1], label);
20504 if (code2 != UNKNOWN)
20505 ix86_expand_branch (code2, hi[0], hi[1], label2);
20506
20507 ix86_expand_branch (code3, lo[0], lo[1], label);
20508
20509 if (code2 != UNKNOWN)
20510 emit_label (label2);
20511 return;
20512 }
20513
20514 default:
20515 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20516 goto simple;
20517 }
20518 }
20519
20520 /* Split branch based on floating point condition. */
20521 void
20522 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20523 rtx target1, rtx target2, rtx tmp)
20524 {
20525 rtx condition;
20526 rtx i;
20527
20528 if (target2 != pc_rtx)
20529 {
20530 std::swap (target1, target2);
20531 code = reverse_condition_maybe_unordered (code);
20532 }
20533
20534 condition = ix86_expand_fp_compare (code, op1, op2,
20535 tmp);
20536
20537 i = emit_jump_insn (gen_rtx_SET
20538 (pc_rtx,
20539 gen_rtx_IF_THEN_ELSE (VOIDmode,
20540 condition, target1, target2)));
20541 if (split_branch_probability >= 0)
20542 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20543 }
20544
20545 void
20546 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20547 {
20548 rtx ret;
20549
20550 gcc_assert (GET_MODE (dest) == QImode);
20551
20552 ret = ix86_expand_compare (code, op0, op1);
20553 PUT_MODE (ret, QImode);
20554 emit_insn (gen_rtx_SET (dest, ret));
20555 }
20556
20557 /* Expand comparison setting or clearing carry flag. Return true when
20558 successful and set pop for the operation. */
20559 static bool
20560 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20561 {
20562 machine_mode mode =
20563 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20564
20565 /* Do not handle double-mode compares that go through special path. */
20566 if (mode == (TARGET_64BIT ? TImode : DImode))
20567 return false;
20568
20569 if (SCALAR_FLOAT_MODE_P (mode))
20570 {
20571 rtx compare_op;
20572 rtx_insn *compare_seq;
20573
20574 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20575
20576 /* Shortcut: following common codes never translate
20577 into carry flag compares. */
20578 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20579 || code == ORDERED || code == UNORDERED)
20580 return false;
20581
20582 /* These comparisons require zero flag; swap operands so they won't. */
20583 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20584 && !TARGET_IEEE_FP)
20585 {
20586 std::swap (op0, op1);
20587 code = swap_condition (code);
20588 }
20589
20590 /* Try to expand the comparison and verify that we end up with
20591 carry flag based comparison. This fails to be true only when
20592 we decide to expand comparison using arithmetic that is not
20593 too common scenario. */
20594 start_sequence ();
20595 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20596 compare_seq = get_insns ();
20597 end_sequence ();
20598
20599 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20600 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20601 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20602 else
20603 code = GET_CODE (compare_op);
20604
20605 if (code != LTU && code != GEU)
20606 return false;
20607
20608 emit_insn (compare_seq);
20609 *pop = compare_op;
20610 return true;
20611 }
20612
20613 if (!INTEGRAL_MODE_P (mode))
20614 return false;
20615
20616 switch (code)
20617 {
20618 case LTU:
20619 case GEU:
20620 break;
20621
20622 /* Convert a==0 into (unsigned)a<1. */
20623 case EQ:
20624 case NE:
20625 if (op1 != const0_rtx)
20626 return false;
20627 op1 = const1_rtx;
20628 code = (code == EQ ? LTU : GEU);
20629 break;
20630
20631 /* Convert a>b into b<a or a>=b-1. */
20632 case GTU:
20633 case LEU:
20634 if (CONST_INT_P (op1))
20635 {
20636 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20637 /* Bail out on overflow. We still can swap operands but that
20638 would force loading of the constant into register. */
20639 if (op1 == const0_rtx
20640 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20641 return false;
20642 code = (code == GTU ? GEU : LTU);
20643 }
20644 else
20645 {
20646 std::swap (op0, op1);
20647 code = (code == GTU ? LTU : GEU);
20648 }
20649 break;
20650
20651 /* Convert a>=0 into (unsigned)a<0x80000000. */
20652 case LT:
20653 case GE:
20654 if (mode == DImode || op1 != const0_rtx)
20655 return false;
20656 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20657 code = (code == LT ? GEU : LTU);
20658 break;
20659 case LE:
20660 case GT:
20661 if (mode == DImode || op1 != constm1_rtx)
20662 return false;
20663 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20664 code = (code == LE ? GEU : LTU);
20665 break;
20666
20667 default:
20668 return false;
20669 }
20670 /* Swapping operands may cause constant to appear as first operand. */
20671 if (!nonimmediate_operand (op0, VOIDmode))
20672 {
20673 if (!can_create_pseudo_p ())
20674 return false;
20675 op0 = force_reg (mode, op0);
20676 }
20677 *pop = ix86_expand_compare (code, op0, op1);
20678 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20679 return true;
20680 }
20681
20682 bool
20683 ix86_expand_int_movcc (rtx operands[])
20684 {
20685 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20686 rtx_insn *compare_seq;
20687 rtx compare_op;
20688 machine_mode mode = GET_MODE (operands[0]);
20689 bool sign_bit_compare_p = false;
20690 rtx op0 = XEXP (operands[1], 0);
20691 rtx op1 = XEXP (operands[1], 1);
20692
20693 if (GET_MODE (op0) == TImode
20694 || (GET_MODE (op0) == DImode
20695 && !TARGET_64BIT))
20696 return false;
20697
20698 start_sequence ();
20699 compare_op = ix86_expand_compare (code, op0, op1);
20700 compare_seq = get_insns ();
20701 end_sequence ();
20702
20703 compare_code = GET_CODE (compare_op);
20704
20705 if ((op1 == const0_rtx && (code == GE || code == LT))
20706 || (op1 == constm1_rtx && (code == GT || code == LE)))
20707 sign_bit_compare_p = true;
20708
20709 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20710 HImode insns, we'd be swallowed in word prefix ops. */
20711
20712 if ((mode != HImode || TARGET_FAST_PREFIX)
20713 && (mode != (TARGET_64BIT ? TImode : DImode))
20714 && CONST_INT_P (operands[2])
20715 && CONST_INT_P (operands[3]))
20716 {
20717 rtx out = operands[0];
20718 HOST_WIDE_INT ct = INTVAL (operands[2]);
20719 HOST_WIDE_INT cf = INTVAL (operands[3]);
20720 HOST_WIDE_INT diff;
20721
20722 diff = ct - cf;
20723 /* Sign bit compares are better done using shifts than we do by using
20724 sbb. */
20725 if (sign_bit_compare_p
20726 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20727 {
20728 /* Detect overlap between destination and compare sources. */
20729 rtx tmp = out;
20730
20731 if (!sign_bit_compare_p)
20732 {
20733 rtx flags;
20734 bool fpcmp = false;
20735
20736 compare_code = GET_CODE (compare_op);
20737
20738 flags = XEXP (compare_op, 0);
20739
20740 if (GET_MODE (flags) == CCFPmode
20741 || GET_MODE (flags) == CCFPUmode)
20742 {
20743 fpcmp = true;
20744 compare_code
20745 = ix86_fp_compare_code_to_integer (compare_code);
20746 }
20747
20748 /* To simplify rest of code, restrict to the GEU case. */
20749 if (compare_code == LTU)
20750 {
20751 std::swap (ct, cf);
20752 compare_code = reverse_condition (compare_code);
20753 code = reverse_condition (code);
20754 }
20755 else
20756 {
20757 if (fpcmp)
20758 PUT_CODE (compare_op,
20759 reverse_condition_maybe_unordered
20760 (GET_CODE (compare_op)));
20761 else
20762 PUT_CODE (compare_op,
20763 reverse_condition (GET_CODE (compare_op)));
20764 }
20765 diff = ct - cf;
20766
20767 if (reg_overlap_mentioned_p (out, op0)
20768 || reg_overlap_mentioned_p (out, op1))
20769 tmp = gen_reg_rtx (mode);
20770
20771 if (mode == DImode)
20772 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20773 else
20774 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20775 flags, compare_op));
20776 }
20777 else
20778 {
20779 if (code == GT || code == GE)
20780 code = reverse_condition (code);
20781 else
20782 {
20783 std::swap (ct, cf);
20784 diff = ct - cf;
20785 }
20786 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20787 }
20788
20789 if (diff == 1)
20790 {
20791 /*
20792 * cmpl op0,op1
20793 * sbbl dest,dest
20794 * [addl dest, ct]
20795 *
20796 * Size 5 - 8.
20797 */
20798 if (ct)
20799 tmp = expand_simple_binop (mode, PLUS,
20800 tmp, GEN_INT (ct),
20801 copy_rtx (tmp), 1, OPTAB_DIRECT);
20802 }
20803 else if (cf == -1)
20804 {
20805 /*
20806 * cmpl op0,op1
20807 * sbbl dest,dest
20808 * orl $ct, dest
20809 *
20810 * Size 8.
20811 */
20812 tmp = expand_simple_binop (mode, IOR,
20813 tmp, GEN_INT (ct),
20814 copy_rtx (tmp), 1, OPTAB_DIRECT);
20815 }
20816 else if (diff == -1 && ct)
20817 {
20818 /*
20819 * cmpl op0,op1
20820 * sbbl dest,dest
20821 * notl dest
20822 * [addl dest, cf]
20823 *
20824 * Size 8 - 11.
20825 */
20826 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20827 if (cf)
20828 tmp = expand_simple_binop (mode, PLUS,
20829 copy_rtx (tmp), GEN_INT (cf),
20830 copy_rtx (tmp), 1, OPTAB_DIRECT);
20831 }
20832 else
20833 {
20834 /*
20835 * cmpl op0,op1
20836 * sbbl dest,dest
20837 * [notl dest]
20838 * andl cf - ct, dest
20839 * [addl dest, ct]
20840 *
20841 * Size 8 - 11.
20842 */
20843
20844 if (cf == 0)
20845 {
20846 cf = ct;
20847 ct = 0;
20848 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20849 }
20850
20851 tmp = expand_simple_binop (mode, AND,
20852 copy_rtx (tmp),
20853 gen_int_mode (cf - ct, mode),
20854 copy_rtx (tmp), 1, OPTAB_DIRECT);
20855 if (ct)
20856 tmp = expand_simple_binop (mode, PLUS,
20857 copy_rtx (tmp), GEN_INT (ct),
20858 copy_rtx (tmp), 1, OPTAB_DIRECT);
20859 }
20860
20861 if (!rtx_equal_p (tmp, out))
20862 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20863
20864 return true;
20865 }
20866
20867 if (diff < 0)
20868 {
20869 machine_mode cmp_mode = GET_MODE (op0);
20870 enum rtx_code new_code;
20871
20872 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20873 {
20874 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20875
20876 /* We may be reversing unordered compare to normal compare, that
20877 is not valid in general (we may convert non-trapping condition
20878 to trapping one), however on i386 we currently emit all
20879 comparisons unordered. */
20880 new_code = reverse_condition_maybe_unordered (code);
20881 }
20882 else
20883 new_code = ix86_reverse_condition (code, cmp_mode);
20884 if (new_code != UNKNOWN)
20885 {
20886 std::swap (ct, cf);
20887 diff = -diff;
20888 code = new_code;
20889 }
20890 }
20891
20892 compare_code = UNKNOWN;
20893 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20894 && CONST_INT_P (op1))
20895 {
20896 if (op1 == const0_rtx
20897 && (code == LT || code == GE))
20898 compare_code = code;
20899 else if (op1 == constm1_rtx)
20900 {
20901 if (code == LE)
20902 compare_code = LT;
20903 else if (code == GT)
20904 compare_code = GE;
20905 }
20906 }
20907
20908 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20909 if (compare_code != UNKNOWN
20910 && GET_MODE (op0) == GET_MODE (out)
20911 && (cf == -1 || ct == -1))
20912 {
20913 /* If lea code below could be used, only optimize
20914 if it results in a 2 insn sequence. */
20915
20916 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20917 || diff == 3 || diff == 5 || diff == 9)
20918 || (compare_code == LT && ct == -1)
20919 || (compare_code == GE && cf == -1))
20920 {
20921 /*
20922 * notl op1 (if necessary)
20923 * sarl $31, op1
20924 * orl cf, op1
20925 */
20926 if (ct != -1)
20927 {
20928 cf = ct;
20929 ct = -1;
20930 code = reverse_condition (code);
20931 }
20932
20933 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20934
20935 out = expand_simple_binop (mode, IOR,
20936 out, GEN_INT (cf),
20937 out, 1, OPTAB_DIRECT);
20938 if (out != operands[0])
20939 emit_move_insn (operands[0], out);
20940
20941 return true;
20942 }
20943 }
20944
20945
20946 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20947 || diff == 3 || diff == 5 || diff == 9)
20948 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20949 && (mode != DImode
20950 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20951 {
20952 /*
20953 * xorl dest,dest
20954 * cmpl op1,op2
20955 * setcc dest
20956 * lea cf(dest*(ct-cf)),dest
20957 *
20958 * Size 14.
20959 *
20960 * This also catches the degenerate setcc-only case.
20961 */
20962
20963 rtx tmp;
20964 int nops;
20965
20966 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20967
20968 nops = 0;
20969 /* On x86_64 the lea instruction operates on Pmode, so we need
20970 to get arithmetics done in proper mode to match. */
20971 if (diff == 1)
20972 tmp = copy_rtx (out);
20973 else
20974 {
20975 rtx out1;
20976 out1 = copy_rtx (out);
20977 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20978 nops++;
20979 if (diff & 1)
20980 {
20981 tmp = gen_rtx_PLUS (mode, tmp, out1);
20982 nops++;
20983 }
20984 }
20985 if (cf != 0)
20986 {
20987 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20988 nops++;
20989 }
20990 if (!rtx_equal_p (tmp, out))
20991 {
20992 if (nops == 1)
20993 out = force_operand (tmp, copy_rtx (out));
20994 else
20995 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
20996 }
20997 if (!rtx_equal_p (out, operands[0]))
20998 emit_move_insn (operands[0], copy_rtx (out));
20999
21000 return true;
21001 }
21002
21003 /*
21004 * General case: Jumpful:
21005 * xorl dest,dest cmpl op1, op2
21006 * cmpl op1, op2 movl ct, dest
21007 * setcc dest jcc 1f
21008 * decl dest movl cf, dest
21009 * andl (cf-ct),dest 1:
21010 * addl ct,dest
21011 *
21012 * Size 20. Size 14.
21013 *
21014 * This is reasonably steep, but branch mispredict costs are
21015 * high on modern cpus, so consider failing only if optimizing
21016 * for space.
21017 */
21018
21019 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21020 && BRANCH_COST (optimize_insn_for_speed_p (),
21021 false) >= 2)
21022 {
21023 if (cf == 0)
21024 {
21025 machine_mode cmp_mode = GET_MODE (op0);
21026 enum rtx_code new_code;
21027
21028 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21029 {
21030 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21031
21032 /* We may be reversing unordered compare to normal compare,
21033 that is not valid in general (we may convert non-trapping
21034 condition to trapping one), however on i386 we currently
21035 emit all comparisons unordered. */
21036 new_code = reverse_condition_maybe_unordered (code);
21037 }
21038 else
21039 {
21040 new_code = ix86_reverse_condition (code, cmp_mode);
21041 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21042 compare_code = reverse_condition (compare_code);
21043 }
21044
21045 if (new_code != UNKNOWN)
21046 {
21047 cf = ct;
21048 ct = 0;
21049 code = new_code;
21050 }
21051 }
21052
21053 if (compare_code != UNKNOWN)
21054 {
21055 /* notl op1 (if needed)
21056 sarl $31, op1
21057 andl (cf-ct), op1
21058 addl ct, op1
21059
21060 For x < 0 (resp. x <= -1) there will be no notl,
21061 so if possible swap the constants to get rid of the
21062 complement.
21063 True/false will be -1/0 while code below (store flag
21064 followed by decrement) is 0/-1, so the constants need
21065 to be exchanged once more. */
21066
21067 if (compare_code == GE || !cf)
21068 {
21069 code = reverse_condition (code);
21070 compare_code = LT;
21071 }
21072 else
21073 std::swap (ct, cf);
21074
21075 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21076 }
21077 else
21078 {
21079 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21080
21081 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21082 constm1_rtx,
21083 copy_rtx (out), 1, OPTAB_DIRECT);
21084 }
21085
21086 out = expand_simple_binop (mode, AND, copy_rtx (out),
21087 gen_int_mode (cf - ct, mode),
21088 copy_rtx (out), 1, OPTAB_DIRECT);
21089 if (ct)
21090 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21091 copy_rtx (out), 1, OPTAB_DIRECT);
21092 if (!rtx_equal_p (out, operands[0]))
21093 emit_move_insn (operands[0], copy_rtx (out));
21094
21095 return true;
21096 }
21097 }
21098
21099 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21100 {
21101 /* Try a few things more with specific constants and a variable. */
21102
21103 optab op;
21104 rtx var, orig_out, out, tmp;
21105
21106 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21107 return false;
21108
21109 /* If one of the two operands is an interesting constant, load a
21110 constant with the above and mask it in with a logical operation. */
21111
21112 if (CONST_INT_P (operands[2]))
21113 {
21114 var = operands[3];
21115 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21116 operands[3] = constm1_rtx, op = and_optab;
21117 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21118 operands[3] = const0_rtx, op = ior_optab;
21119 else
21120 return false;
21121 }
21122 else if (CONST_INT_P (operands[3]))
21123 {
21124 var = operands[2];
21125 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21126 operands[2] = constm1_rtx, op = and_optab;
21127 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21128 operands[2] = const0_rtx, op = ior_optab;
21129 else
21130 return false;
21131 }
21132 else
21133 return false;
21134
21135 orig_out = operands[0];
21136 tmp = gen_reg_rtx (mode);
21137 operands[0] = tmp;
21138
21139 /* Recurse to get the constant loaded. */
21140 if (ix86_expand_int_movcc (operands) == 0)
21141 return false;
21142
21143 /* Mask in the interesting variable. */
21144 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21145 OPTAB_WIDEN);
21146 if (!rtx_equal_p (out, orig_out))
21147 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21148
21149 return true;
21150 }
21151
21152 /*
21153 * For comparison with above,
21154 *
21155 * movl cf,dest
21156 * movl ct,tmp
21157 * cmpl op1,op2
21158 * cmovcc tmp,dest
21159 *
21160 * Size 15.
21161 */
21162
21163 if (! nonimmediate_operand (operands[2], mode))
21164 operands[2] = force_reg (mode, operands[2]);
21165 if (! nonimmediate_operand (operands[3], mode))
21166 operands[3] = force_reg (mode, operands[3]);
21167
21168 if (! register_operand (operands[2], VOIDmode)
21169 && (mode == QImode
21170 || ! register_operand (operands[3], VOIDmode)))
21171 operands[2] = force_reg (mode, operands[2]);
21172
21173 if (mode == QImode
21174 && ! register_operand (operands[3], VOIDmode))
21175 operands[3] = force_reg (mode, operands[3]);
21176
21177 emit_insn (compare_seq);
21178 emit_insn (gen_rtx_SET (operands[0],
21179 gen_rtx_IF_THEN_ELSE (mode,
21180 compare_op, operands[2],
21181 operands[3])));
21182 return true;
21183 }
21184
21185 /* Swap, force into registers, or otherwise massage the two operands
21186 to an sse comparison with a mask result. Thus we differ a bit from
21187 ix86_prepare_fp_compare_args which expects to produce a flags result.
21188
21189 The DEST operand exists to help determine whether to commute commutative
21190 operators. The POP0/POP1 operands are updated in place. The new
21191 comparison code is returned, or UNKNOWN if not implementable. */
21192
21193 static enum rtx_code
21194 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21195 rtx *pop0, rtx *pop1)
21196 {
21197 switch (code)
21198 {
21199 case LTGT:
21200 case UNEQ:
21201 /* AVX supports all the needed comparisons. */
21202 if (TARGET_AVX)
21203 break;
21204 /* We have no LTGT as an operator. We could implement it with
21205 NE & ORDERED, but this requires an extra temporary. It's
21206 not clear that it's worth it. */
21207 return UNKNOWN;
21208
21209 case LT:
21210 case LE:
21211 case UNGT:
21212 case UNGE:
21213 /* These are supported directly. */
21214 break;
21215
21216 case EQ:
21217 case NE:
21218 case UNORDERED:
21219 case ORDERED:
21220 /* AVX has 3 operand comparisons, no need to swap anything. */
21221 if (TARGET_AVX)
21222 break;
21223 /* For commutative operators, try to canonicalize the destination
21224 operand to be first in the comparison - this helps reload to
21225 avoid extra moves. */
21226 if (!dest || !rtx_equal_p (dest, *pop1))
21227 break;
21228 /* FALLTHRU */
21229
21230 case GE:
21231 case GT:
21232 case UNLE:
21233 case UNLT:
21234 /* These are not supported directly before AVX, and furthermore
21235 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21236 comparison operands to transform into something that is
21237 supported. */
21238 std::swap (*pop0, *pop1);
21239 code = swap_condition (code);
21240 break;
21241
21242 default:
21243 gcc_unreachable ();
21244 }
21245
21246 return code;
21247 }
21248
21249 /* Detect conditional moves that exactly match min/max operational
21250 semantics. Note that this is IEEE safe, as long as we don't
21251 interchange the operands.
21252
21253 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21254 and TRUE if the operation is successful and instructions are emitted. */
21255
21256 static bool
21257 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21258 rtx cmp_op1, rtx if_true, rtx if_false)
21259 {
21260 machine_mode mode;
21261 bool is_min;
21262 rtx tmp;
21263
21264 if (code == LT)
21265 ;
21266 else if (code == UNGE)
21267 std::swap (if_true, if_false);
21268 else
21269 return false;
21270
21271 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21272 is_min = true;
21273 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21274 is_min = false;
21275 else
21276 return false;
21277
21278 mode = GET_MODE (dest);
21279
21280 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21281 but MODE may be a vector mode and thus not appropriate. */
21282 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21283 {
21284 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21285 rtvec v;
21286
21287 if_true = force_reg (mode, if_true);
21288 v = gen_rtvec (2, if_true, if_false);
21289 tmp = gen_rtx_UNSPEC (mode, v, u);
21290 }
21291 else
21292 {
21293 code = is_min ? SMIN : SMAX;
21294 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21295 }
21296
21297 emit_insn (gen_rtx_SET (dest, tmp));
21298 return true;
21299 }
21300
21301 /* Expand an sse vector comparison. Return the register with the result. */
21302
21303 static rtx
21304 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21305 rtx op_true, rtx op_false)
21306 {
21307 machine_mode mode = GET_MODE (dest);
21308 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21309
21310 /* In general case result of comparison can differ from operands' type. */
21311 machine_mode cmp_mode;
21312
21313 /* In AVX512F the result of comparison is an integer mask. */
21314 bool maskcmp = false;
21315 rtx x;
21316
21317 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21318 {
21319 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21320 gcc_assert (cmp_mode != BLKmode);
21321
21322 maskcmp = true;
21323 }
21324 else
21325 cmp_mode = cmp_ops_mode;
21326
21327
21328 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21329 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21330 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21331
21332 if (optimize
21333 || reg_overlap_mentioned_p (dest, op_true)
21334 || reg_overlap_mentioned_p (dest, op_false))
21335 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21336
21337 /* Compare patterns for int modes are unspec in AVX512F only. */
21338 if (maskcmp && (code == GT || code == EQ))
21339 {
21340 rtx (*gen)(rtx, rtx, rtx);
21341
21342 switch (cmp_ops_mode)
21343 {
21344 case V64QImode:
21345 gcc_assert (TARGET_AVX512BW);
21346 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21347 break;
21348 case V32HImode:
21349 gcc_assert (TARGET_AVX512BW);
21350 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21351 break;
21352 case V16SImode:
21353 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21354 break;
21355 case V8DImode:
21356 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21357 break;
21358 default:
21359 gen = NULL;
21360 }
21361
21362 if (gen)
21363 {
21364 emit_insn (gen (dest, cmp_op0, cmp_op1));
21365 return dest;
21366 }
21367 }
21368 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21369
21370 if (cmp_mode != mode && !maskcmp)
21371 {
21372 x = force_reg (cmp_ops_mode, x);
21373 convert_move (dest, x, false);
21374 }
21375 else
21376 emit_insn (gen_rtx_SET (dest, x));
21377
21378 return dest;
21379 }
21380
21381 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21382 operations. This is used for both scalar and vector conditional moves. */
21383
21384 static void
21385 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21386 {
21387 machine_mode mode = GET_MODE (dest);
21388 machine_mode cmpmode = GET_MODE (cmp);
21389
21390 /* In AVX512F the result of comparison is an integer mask. */
21391 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21392
21393 rtx t2, t3, x;
21394
21395 if (vector_all_ones_operand (op_true, mode)
21396 && rtx_equal_p (op_false, CONST0_RTX (mode))
21397 && !maskcmp)
21398 {
21399 emit_insn (gen_rtx_SET (dest, cmp));
21400 }
21401 else if (op_false == CONST0_RTX (mode)
21402 && !maskcmp)
21403 {
21404 op_true = force_reg (mode, op_true);
21405 x = gen_rtx_AND (mode, cmp, op_true);
21406 emit_insn (gen_rtx_SET (dest, x));
21407 }
21408 else if (op_true == CONST0_RTX (mode)
21409 && !maskcmp)
21410 {
21411 op_false = force_reg (mode, op_false);
21412 x = gen_rtx_NOT (mode, cmp);
21413 x = gen_rtx_AND (mode, x, op_false);
21414 emit_insn (gen_rtx_SET (dest, x));
21415 }
21416 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21417 && !maskcmp)
21418 {
21419 op_false = force_reg (mode, op_false);
21420 x = gen_rtx_IOR (mode, cmp, op_false);
21421 emit_insn (gen_rtx_SET (dest, x));
21422 }
21423 else if (TARGET_XOP
21424 && !maskcmp)
21425 {
21426 op_true = force_reg (mode, op_true);
21427
21428 if (!nonimmediate_operand (op_false, mode))
21429 op_false = force_reg (mode, op_false);
21430
21431 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21432 op_true,
21433 op_false)));
21434 }
21435 else
21436 {
21437 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21438 rtx d = dest;
21439
21440 if (!nonimmediate_operand (op_true, mode))
21441 op_true = force_reg (mode, op_true);
21442
21443 op_false = force_reg (mode, op_false);
21444
21445 switch (mode)
21446 {
21447 case V4SFmode:
21448 if (TARGET_SSE4_1)
21449 gen = gen_sse4_1_blendvps;
21450 break;
21451 case V2DFmode:
21452 if (TARGET_SSE4_1)
21453 gen = gen_sse4_1_blendvpd;
21454 break;
21455 case V16QImode:
21456 case V8HImode:
21457 case V4SImode:
21458 case V2DImode:
21459 if (TARGET_SSE4_1)
21460 {
21461 gen = gen_sse4_1_pblendvb;
21462 if (mode != V16QImode)
21463 d = gen_reg_rtx (V16QImode);
21464 op_false = gen_lowpart (V16QImode, op_false);
21465 op_true = gen_lowpart (V16QImode, op_true);
21466 cmp = gen_lowpart (V16QImode, cmp);
21467 }
21468 break;
21469 case V8SFmode:
21470 if (TARGET_AVX)
21471 gen = gen_avx_blendvps256;
21472 break;
21473 case V4DFmode:
21474 if (TARGET_AVX)
21475 gen = gen_avx_blendvpd256;
21476 break;
21477 case V32QImode:
21478 case V16HImode:
21479 case V8SImode:
21480 case V4DImode:
21481 if (TARGET_AVX2)
21482 {
21483 gen = gen_avx2_pblendvb;
21484 if (mode != V32QImode)
21485 d = gen_reg_rtx (V32QImode);
21486 op_false = gen_lowpart (V32QImode, op_false);
21487 op_true = gen_lowpart (V32QImode, op_true);
21488 cmp = gen_lowpart (V32QImode, cmp);
21489 }
21490 break;
21491
21492 case V64QImode:
21493 gen = gen_avx512bw_blendmv64qi;
21494 break;
21495 case V32HImode:
21496 gen = gen_avx512bw_blendmv32hi;
21497 break;
21498 case V16SImode:
21499 gen = gen_avx512f_blendmv16si;
21500 break;
21501 case V8DImode:
21502 gen = gen_avx512f_blendmv8di;
21503 break;
21504 case V8DFmode:
21505 gen = gen_avx512f_blendmv8df;
21506 break;
21507 case V16SFmode:
21508 gen = gen_avx512f_blendmv16sf;
21509 break;
21510
21511 default:
21512 break;
21513 }
21514
21515 if (gen != NULL)
21516 {
21517 emit_insn (gen (d, op_false, op_true, cmp));
21518 if (d != dest)
21519 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21520 }
21521 else
21522 {
21523 op_true = force_reg (mode, op_true);
21524
21525 t2 = gen_reg_rtx (mode);
21526 if (optimize)
21527 t3 = gen_reg_rtx (mode);
21528 else
21529 t3 = dest;
21530
21531 x = gen_rtx_AND (mode, op_true, cmp);
21532 emit_insn (gen_rtx_SET (t2, x));
21533
21534 x = gen_rtx_NOT (mode, cmp);
21535 x = gen_rtx_AND (mode, x, op_false);
21536 emit_insn (gen_rtx_SET (t3, x));
21537
21538 x = gen_rtx_IOR (mode, t3, t2);
21539 emit_insn (gen_rtx_SET (dest, x));
21540 }
21541 }
21542 }
21543
21544 /* Expand a floating-point conditional move. Return true if successful. */
21545
21546 bool
21547 ix86_expand_fp_movcc (rtx operands[])
21548 {
21549 machine_mode mode = GET_MODE (operands[0]);
21550 enum rtx_code code = GET_CODE (operands[1]);
21551 rtx tmp, compare_op;
21552 rtx op0 = XEXP (operands[1], 0);
21553 rtx op1 = XEXP (operands[1], 1);
21554
21555 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21556 {
21557 machine_mode cmode;
21558
21559 /* Since we've no cmove for sse registers, don't force bad register
21560 allocation just to gain access to it. Deny movcc when the
21561 comparison mode doesn't match the move mode. */
21562 cmode = GET_MODE (op0);
21563 if (cmode == VOIDmode)
21564 cmode = GET_MODE (op1);
21565 if (cmode != mode)
21566 return false;
21567
21568 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21569 if (code == UNKNOWN)
21570 return false;
21571
21572 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21573 operands[2], operands[3]))
21574 return true;
21575
21576 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21577 operands[2], operands[3]);
21578 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21579 return true;
21580 }
21581
21582 if (GET_MODE (op0) == TImode
21583 || (GET_MODE (op0) == DImode
21584 && !TARGET_64BIT))
21585 return false;
21586
21587 /* The floating point conditional move instructions don't directly
21588 support conditions resulting from a signed integer comparison. */
21589
21590 compare_op = ix86_expand_compare (code, op0, op1);
21591 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21592 {
21593 tmp = gen_reg_rtx (QImode);
21594 ix86_expand_setcc (tmp, code, op0, op1);
21595
21596 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21597 }
21598
21599 emit_insn (gen_rtx_SET (operands[0],
21600 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21601 operands[2], operands[3])));
21602
21603 return true;
21604 }
21605
21606 /* Expand a floating-point vector conditional move; a vcond operation
21607 rather than a movcc operation. */
21608
21609 bool
21610 ix86_expand_fp_vcond (rtx operands[])
21611 {
21612 enum rtx_code code = GET_CODE (operands[3]);
21613 rtx cmp;
21614
21615 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21616 &operands[4], &operands[5]);
21617 if (code == UNKNOWN)
21618 {
21619 rtx temp;
21620 switch (GET_CODE (operands[3]))
21621 {
21622 case LTGT:
21623 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21624 operands[5], operands[0], operands[0]);
21625 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21626 operands[5], operands[1], operands[2]);
21627 code = AND;
21628 break;
21629 case UNEQ:
21630 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21631 operands[5], operands[0], operands[0]);
21632 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21633 operands[5], operands[1], operands[2]);
21634 code = IOR;
21635 break;
21636 default:
21637 gcc_unreachable ();
21638 }
21639 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21640 OPTAB_DIRECT);
21641 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21642 return true;
21643 }
21644
21645 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21646 operands[5], operands[1], operands[2]))
21647 return true;
21648
21649 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21650 operands[1], operands[2]);
21651 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21652 return true;
21653 }
21654
21655 /* Expand a signed/unsigned integral vector conditional move. */
21656
21657 bool
21658 ix86_expand_int_vcond (rtx operands[])
21659 {
21660 machine_mode data_mode = GET_MODE (operands[0]);
21661 machine_mode mode = GET_MODE (operands[4]);
21662 enum rtx_code code = GET_CODE (operands[3]);
21663 bool negate = false;
21664 rtx x, cop0, cop1;
21665
21666 cop0 = operands[4];
21667 cop1 = operands[5];
21668
21669 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21670 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21671 if ((code == LT || code == GE)
21672 && data_mode == mode
21673 && cop1 == CONST0_RTX (mode)
21674 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21675 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21676 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21677 && (GET_MODE_SIZE (data_mode) == 16
21678 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21679 {
21680 rtx negop = operands[2 - (code == LT)];
21681 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21682 if (negop == CONST1_RTX (data_mode))
21683 {
21684 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21685 operands[0], 1, OPTAB_DIRECT);
21686 if (res != operands[0])
21687 emit_move_insn (operands[0], res);
21688 return true;
21689 }
21690 else if (GET_MODE_INNER (data_mode) != DImode
21691 && vector_all_ones_operand (negop, data_mode))
21692 {
21693 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21694 operands[0], 0, OPTAB_DIRECT);
21695 if (res != operands[0])
21696 emit_move_insn (operands[0], res);
21697 return true;
21698 }
21699 }
21700
21701 if (!nonimmediate_operand (cop1, mode))
21702 cop1 = force_reg (mode, cop1);
21703 if (!general_operand (operands[1], data_mode))
21704 operands[1] = force_reg (data_mode, operands[1]);
21705 if (!general_operand (operands[2], data_mode))
21706 operands[2] = force_reg (data_mode, operands[2]);
21707
21708 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21709 if (TARGET_XOP
21710 && (mode == V16QImode || mode == V8HImode
21711 || mode == V4SImode || mode == V2DImode))
21712 ;
21713 else
21714 {
21715 /* Canonicalize the comparison to EQ, GT, GTU. */
21716 switch (code)
21717 {
21718 case EQ:
21719 case GT:
21720 case GTU:
21721 break;
21722
21723 case NE:
21724 case LE:
21725 case LEU:
21726 code = reverse_condition (code);
21727 negate = true;
21728 break;
21729
21730 case GE:
21731 case GEU:
21732 code = reverse_condition (code);
21733 negate = true;
21734 /* FALLTHRU */
21735
21736 case LT:
21737 case LTU:
21738 std::swap (cop0, cop1);
21739 code = swap_condition (code);
21740 break;
21741
21742 default:
21743 gcc_unreachable ();
21744 }
21745
21746 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21747 if (mode == V2DImode)
21748 {
21749 switch (code)
21750 {
21751 case EQ:
21752 /* SSE4.1 supports EQ. */
21753 if (!TARGET_SSE4_1)
21754 return false;
21755 break;
21756
21757 case GT:
21758 case GTU:
21759 /* SSE4.2 supports GT/GTU. */
21760 if (!TARGET_SSE4_2)
21761 return false;
21762 break;
21763
21764 default:
21765 gcc_unreachable ();
21766 }
21767 }
21768
21769 /* Unsigned parallel compare is not supported by the hardware.
21770 Play some tricks to turn this into a signed comparison
21771 against 0. */
21772 if (code == GTU)
21773 {
21774 cop0 = force_reg (mode, cop0);
21775
21776 switch (mode)
21777 {
21778 case V16SImode:
21779 case V8DImode:
21780 case V8SImode:
21781 case V4DImode:
21782 case V4SImode:
21783 case V2DImode:
21784 {
21785 rtx t1, t2, mask;
21786 rtx (*gen_sub3) (rtx, rtx, rtx);
21787
21788 switch (mode)
21789 {
21790 case V16SImode: gen_sub3 = gen_subv16si3; break;
21791 case V8DImode: gen_sub3 = gen_subv8di3; break;
21792 case V8SImode: gen_sub3 = gen_subv8si3; break;
21793 case V4DImode: gen_sub3 = gen_subv4di3; break;
21794 case V4SImode: gen_sub3 = gen_subv4si3; break;
21795 case V2DImode: gen_sub3 = gen_subv2di3; break;
21796 default:
21797 gcc_unreachable ();
21798 }
21799 /* Subtract (-(INT MAX) - 1) from both operands to make
21800 them signed. */
21801 mask = ix86_build_signbit_mask (mode, true, false);
21802 t1 = gen_reg_rtx (mode);
21803 emit_insn (gen_sub3 (t1, cop0, mask));
21804
21805 t2 = gen_reg_rtx (mode);
21806 emit_insn (gen_sub3 (t2, cop1, mask));
21807
21808 cop0 = t1;
21809 cop1 = t2;
21810 code = GT;
21811 }
21812 break;
21813
21814 case V64QImode:
21815 case V32HImode:
21816 case V32QImode:
21817 case V16HImode:
21818 case V16QImode:
21819 case V8HImode:
21820 /* Perform a parallel unsigned saturating subtraction. */
21821 x = gen_reg_rtx (mode);
21822 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21823
21824 cop0 = x;
21825 cop1 = CONST0_RTX (mode);
21826 code = EQ;
21827 negate = !negate;
21828 break;
21829
21830 default:
21831 gcc_unreachable ();
21832 }
21833 }
21834 }
21835
21836 /* Allow the comparison to be done in one mode, but the movcc to
21837 happen in another mode. */
21838 if (data_mode == mode)
21839 {
21840 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21841 operands[1+negate], operands[2-negate]);
21842 }
21843 else
21844 {
21845 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21846 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21847 operands[1+negate], operands[2-negate]);
21848 if (GET_MODE (x) == mode)
21849 x = gen_lowpart (data_mode, x);
21850 }
21851
21852 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21853 operands[2-negate]);
21854 return true;
21855 }
21856
21857 /* AVX512F does support 64-byte integer vector operations,
21858 thus the longest vector we are faced with is V64QImode. */
21859 #define MAX_VECT_LEN 64
21860
21861 struct expand_vec_perm_d
21862 {
21863 rtx target, op0, op1;
21864 unsigned char perm[MAX_VECT_LEN];
21865 machine_mode vmode;
21866 unsigned char nelt;
21867 bool one_operand_p;
21868 bool testing_p;
21869 };
21870
21871 static bool
21872 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21873 struct expand_vec_perm_d *d)
21874 {
21875 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21876 expander, so args are either in d, or in op0, op1 etc. */
21877 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21878 machine_mode maskmode = mode;
21879 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21880
21881 switch (mode)
21882 {
21883 case V8HImode:
21884 if (TARGET_AVX512VL && TARGET_AVX512BW)
21885 gen = gen_avx512vl_vpermi2varv8hi3;
21886 break;
21887 case V16HImode:
21888 if (TARGET_AVX512VL && TARGET_AVX512BW)
21889 gen = gen_avx512vl_vpermi2varv16hi3;
21890 break;
21891 case V64QImode:
21892 if (TARGET_AVX512VBMI)
21893 gen = gen_avx512bw_vpermi2varv64qi3;
21894 break;
21895 case V32HImode:
21896 if (TARGET_AVX512BW)
21897 gen = gen_avx512bw_vpermi2varv32hi3;
21898 break;
21899 case V4SImode:
21900 if (TARGET_AVX512VL)
21901 gen = gen_avx512vl_vpermi2varv4si3;
21902 break;
21903 case V8SImode:
21904 if (TARGET_AVX512VL)
21905 gen = gen_avx512vl_vpermi2varv8si3;
21906 break;
21907 case V16SImode:
21908 if (TARGET_AVX512F)
21909 gen = gen_avx512f_vpermi2varv16si3;
21910 break;
21911 case V4SFmode:
21912 if (TARGET_AVX512VL)
21913 {
21914 gen = gen_avx512vl_vpermi2varv4sf3;
21915 maskmode = V4SImode;
21916 }
21917 break;
21918 case V8SFmode:
21919 if (TARGET_AVX512VL)
21920 {
21921 gen = gen_avx512vl_vpermi2varv8sf3;
21922 maskmode = V8SImode;
21923 }
21924 break;
21925 case V16SFmode:
21926 if (TARGET_AVX512F)
21927 {
21928 gen = gen_avx512f_vpermi2varv16sf3;
21929 maskmode = V16SImode;
21930 }
21931 break;
21932 case V2DImode:
21933 if (TARGET_AVX512VL)
21934 gen = gen_avx512vl_vpermi2varv2di3;
21935 break;
21936 case V4DImode:
21937 if (TARGET_AVX512VL)
21938 gen = gen_avx512vl_vpermi2varv4di3;
21939 break;
21940 case V8DImode:
21941 if (TARGET_AVX512F)
21942 gen = gen_avx512f_vpermi2varv8di3;
21943 break;
21944 case V2DFmode:
21945 if (TARGET_AVX512VL)
21946 {
21947 gen = gen_avx512vl_vpermi2varv2df3;
21948 maskmode = V2DImode;
21949 }
21950 break;
21951 case V4DFmode:
21952 if (TARGET_AVX512VL)
21953 {
21954 gen = gen_avx512vl_vpermi2varv4df3;
21955 maskmode = V4DImode;
21956 }
21957 break;
21958 case V8DFmode:
21959 if (TARGET_AVX512F)
21960 {
21961 gen = gen_avx512f_vpermi2varv8df3;
21962 maskmode = V8DImode;
21963 }
21964 break;
21965 default:
21966 break;
21967 }
21968
21969 if (gen == NULL)
21970 return false;
21971
21972 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21973 expander, so args are either in d, or in op0, op1 etc. */
21974 if (d)
21975 {
21976 rtx vec[64];
21977 target = d->target;
21978 op0 = d->op0;
21979 op1 = d->op1;
21980 for (int i = 0; i < d->nelt; ++i)
21981 vec[i] = GEN_INT (d->perm[i]);
21982 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21983 }
21984
21985 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21986 return true;
21987 }
21988
21989 /* Expand a variable vector permutation. */
21990
21991 void
21992 ix86_expand_vec_perm (rtx operands[])
21993 {
21994 rtx target = operands[0];
21995 rtx op0 = operands[1];
21996 rtx op1 = operands[2];
21997 rtx mask = operands[3];
21998 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21999 machine_mode mode = GET_MODE (op0);
22000 machine_mode maskmode = GET_MODE (mask);
22001 int w, e, i;
22002 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22003
22004 /* Number of elements in the vector. */
22005 w = GET_MODE_NUNITS (mode);
22006 e = GET_MODE_UNIT_SIZE (mode);
22007 gcc_assert (w <= 64);
22008
22009 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22010 return;
22011
22012 if (TARGET_AVX2)
22013 {
22014 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22015 {
22016 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22017 an constant shuffle operand. With a tiny bit of effort we can
22018 use VPERMD instead. A re-interpretation stall for V4DFmode is
22019 unfortunate but there's no avoiding it.
22020 Similarly for V16HImode we don't have instructions for variable
22021 shuffling, while for V32QImode we can use after preparing suitable
22022 masks vpshufb; vpshufb; vpermq; vpor. */
22023
22024 if (mode == V16HImode)
22025 {
22026 maskmode = mode = V32QImode;
22027 w = 32;
22028 e = 1;
22029 }
22030 else
22031 {
22032 maskmode = mode = V8SImode;
22033 w = 8;
22034 e = 4;
22035 }
22036 t1 = gen_reg_rtx (maskmode);
22037
22038 /* Replicate the low bits of the V4DImode mask into V8SImode:
22039 mask = { A B C D }
22040 t1 = { A A B B C C D D }. */
22041 for (i = 0; i < w / 2; ++i)
22042 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22043 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22044 vt = force_reg (maskmode, vt);
22045 mask = gen_lowpart (maskmode, mask);
22046 if (maskmode == V8SImode)
22047 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22048 else
22049 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22050
22051 /* Multiply the shuffle indicies by two. */
22052 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22053 OPTAB_DIRECT);
22054
22055 /* Add one to the odd shuffle indicies:
22056 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22057 for (i = 0; i < w / 2; ++i)
22058 {
22059 vec[i * 2] = const0_rtx;
22060 vec[i * 2 + 1] = const1_rtx;
22061 }
22062 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22063 vt = validize_mem (force_const_mem (maskmode, vt));
22064 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22065 OPTAB_DIRECT);
22066
22067 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22068 operands[3] = mask = t1;
22069 target = gen_reg_rtx (mode);
22070 op0 = gen_lowpart (mode, op0);
22071 op1 = gen_lowpart (mode, op1);
22072 }
22073
22074 switch (mode)
22075 {
22076 case V8SImode:
22077 /* The VPERMD and VPERMPS instructions already properly ignore
22078 the high bits of the shuffle elements. No need for us to
22079 perform an AND ourselves. */
22080 if (one_operand_shuffle)
22081 {
22082 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22083 if (target != operands[0])
22084 emit_move_insn (operands[0],
22085 gen_lowpart (GET_MODE (operands[0]), target));
22086 }
22087 else
22088 {
22089 t1 = gen_reg_rtx (V8SImode);
22090 t2 = gen_reg_rtx (V8SImode);
22091 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22092 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22093 goto merge_two;
22094 }
22095 return;
22096
22097 case V8SFmode:
22098 mask = gen_lowpart (V8SImode, mask);
22099 if (one_operand_shuffle)
22100 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22101 else
22102 {
22103 t1 = gen_reg_rtx (V8SFmode);
22104 t2 = gen_reg_rtx (V8SFmode);
22105 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22106 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22107 goto merge_two;
22108 }
22109 return;
22110
22111 case V4SImode:
22112 /* By combining the two 128-bit input vectors into one 256-bit
22113 input vector, we can use VPERMD and VPERMPS for the full
22114 two-operand shuffle. */
22115 t1 = gen_reg_rtx (V8SImode);
22116 t2 = gen_reg_rtx (V8SImode);
22117 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22118 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22119 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22120 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22121 return;
22122
22123 case V4SFmode:
22124 t1 = gen_reg_rtx (V8SFmode);
22125 t2 = gen_reg_rtx (V8SImode);
22126 mask = gen_lowpart (V4SImode, mask);
22127 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22128 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22129 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22130 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22131 return;
22132
22133 case V32QImode:
22134 t1 = gen_reg_rtx (V32QImode);
22135 t2 = gen_reg_rtx (V32QImode);
22136 t3 = gen_reg_rtx (V32QImode);
22137 vt2 = GEN_INT (-128);
22138 for (i = 0; i < 32; i++)
22139 vec[i] = vt2;
22140 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22141 vt = force_reg (V32QImode, vt);
22142 for (i = 0; i < 32; i++)
22143 vec[i] = i < 16 ? vt2 : const0_rtx;
22144 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22145 vt2 = force_reg (V32QImode, vt2);
22146 /* From mask create two adjusted masks, which contain the same
22147 bits as mask in the low 7 bits of each vector element.
22148 The first mask will have the most significant bit clear
22149 if it requests element from the same 128-bit lane
22150 and MSB set if it requests element from the other 128-bit lane.
22151 The second mask will have the opposite values of the MSB,
22152 and additionally will have its 128-bit lanes swapped.
22153 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22154 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22155 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22156 stands for other 12 bytes. */
22157 /* The bit whether element is from the same lane or the other
22158 lane is bit 4, so shift it up by 3 to the MSB position. */
22159 t5 = gen_reg_rtx (V4DImode);
22160 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22161 GEN_INT (3)));
22162 /* Clear MSB bits from the mask just in case it had them set. */
22163 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22164 /* After this t1 will have MSB set for elements from other lane. */
22165 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22166 /* Clear bits other than MSB. */
22167 emit_insn (gen_andv32qi3 (t1, t1, vt));
22168 /* Or in the lower bits from mask into t3. */
22169 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22170 /* And invert MSB bits in t1, so MSB is set for elements from the same
22171 lane. */
22172 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22173 /* Swap 128-bit lanes in t3. */
22174 t6 = gen_reg_rtx (V4DImode);
22175 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22176 const2_rtx, GEN_INT (3),
22177 const0_rtx, const1_rtx));
22178 /* And or in the lower bits from mask into t1. */
22179 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22180 if (one_operand_shuffle)
22181 {
22182 /* Each of these shuffles will put 0s in places where
22183 element from the other 128-bit lane is needed, otherwise
22184 will shuffle in the requested value. */
22185 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22186 gen_lowpart (V32QImode, t6)));
22187 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22188 /* For t3 the 128-bit lanes are swapped again. */
22189 t7 = gen_reg_rtx (V4DImode);
22190 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22191 const2_rtx, GEN_INT (3),
22192 const0_rtx, const1_rtx));
22193 /* And oring both together leads to the result. */
22194 emit_insn (gen_iorv32qi3 (target, t1,
22195 gen_lowpart (V32QImode, t7)));
22196 if (target != operands[0])
22197 emit_move_insn (operands[0],
22198 gen_lowpart (GET_MODE (operands[0]), target));
22199 return;
22200 }
22201
22202 t4 = gen_reg_rtx (V32QImode);
22203 /* Similarly to the above one_operand_shuffle code,
22204 just for repeated twice for each operand. merge_two:
22205 code will merge the two results together. */
22206 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22207 gen_lowpart (V32QImode, t6)));
22208 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22209 gen_lowpart (V32QImode, t6)));
22210 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22211 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22212 t7 = gen_reg_rtx (V4DImode);
22213 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22214 const2_rtx, GEN_INT (3),
22215 const0_rtx, const1_rtx));
22216 t8 = gen_reg_rtx (V4DImode);
22217 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22218 const2_rtx, GEN_INT (3),
22219 const0_rtx, const1_rtx));
22220 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22221 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22222 t1 = t4;
22223 t2 = t3;
22224 goto merge_two;
22225
22226 default:
22227 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22228 break;
22229 }
22230 }
22231
22232 if (TARGET_XOP)
22233 {
22234 /* The XOP VPPERM insn supports three inputs. By ignoring the
22235 one_operand_shuffle special case, we avoid creating another
22236 set of constant vectors in memory. */
22237 one_operand_shuffle = false;
22238
22239 /* mask = mask & {2*w-1, ...} */
22240 vt = GEN_INT (2*w - 1);
22241 }
22242 else
22243 {
22244 /* mask = mask & {w-1, ...} */
22245 vt = GEN_INT (w - 1);
22246 }
22247
22248 for (i = 0; i < w; i++)
22249 vec[i] = vt;
22250 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22251 mask = expand_simple_binop (maskmode, AND, mask, vt,
22252 NULL_RTX, 0, OPTAB_DIRECT);
22253
22254 /* For non-QImode operations, convert the word permutation control
22255 into a byte permutation control. */
22256 if (mode != V16QImode)
22257 {
22258 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22259 GEN_INT (exact_log2 (e)),
22260 NULL_RTX, 0, OPTAB_DIRECT);
22261
22262 /* Convert mask to vector of chars. */
22263 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22264
22265 /* Replicate each of the input bytes into byte positions:
22266 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22267 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22268 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22269 for (i = 0; i < 16; ++i)
22270 vec[i] = GEN_INT (i/e * e);
22271 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22272 vt = validize_mem (force_const_mem (V16QImode, vt));
22273 if (TARGET_XOP)
22274 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22275 else
22276 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22277
22278 /* Convert it into the byte positions by doing
22279 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22280 for (i = 0; i < 16; ++i)
22281 vec[i] = GEN_INT (i % e);
22282 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22283 vt = validize_mem (force_const_mem (V16QImode, vt));
22284 emit_insn (gen_addv16qi3 (mask, mask, vt));
22285 }
22286
22287 /* The actual shuffle operations all operate on V16QImode. */
22288 op0 = gen_lowpart (V16QImode, op0);
22289 op1 = gen_lowpart (V16QImode, op1);
22290
22291 if (TARGET_XOP)
22292 {
22293 if (GET_MODE (target) != V16QImode)
22294 target = gen_reg_rtx (V16QImode);
22295 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22296 if (target != operands[0])
22297 emit_move_insn (operands[0],
22298 gen_lowpart (GET_MODE (operands[0]), target));
22299 }
22300 else if (one_operand_shuffle)
22301 {
22302 if (GET_MODE (target) != V16QImode)
22303 target = gen_reg_rtx (V16QImode);
22304 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22305 if (target != operands[0])
22306 emit_move_insn (operands[0],
22307 gen_lowpart (GET_MODE (operands[0]), target));
22308 }
22309 else
22310 {
22311 rtx xops[6];
22312 bool ok;
22313
22314 /* Shuffle the two input vectors independently. */
22315 t1 = gen_reg_rtx (V16QImode);
22316 t2 = gen_reg_rtx (V16QImode);
22317 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22318 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22319
22320 merge_two:
22321 /* Then merge them together. The key is whether any given control
22322 element contained a bit set that indicates the second word. */
22323 mask = operands[3];
22324 vt = GEN_INT (w);
22325 if (maskmode == V2DImode && !TARGET_SSE4_1)
22326 {
22327 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22328 more shuffle to convert the V2DI input mask into a V4SI
22329 input mask. At which point the masking that expand_int_vcond
22330 will work as desired. */
22331 rtx t3 = gen_reg_rtx (V4SImode);
22332 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22333 const0_rtx, const0_rtx,
22334 const2_rtx, const2_rtx));
22335 mask = t3;
22336 maskmode = V4SImode;
22337 e = w = 4;
22338 }
22339
22340 for (i = 0; i < w; i++)
22341 vec[i] = vt;
22342 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22343 vt = force_reg (maskmode, vt);
22344 mask = expand_simple_binop (maskmode, AND, mask, vt,
22345 NULL_RTX, 0, OPTAB_DIRECT);
22346
22347 if (GET_MODE (target) != mode)
22348 target = gen_reg_rtx (mode);
22349 xops[0] = target;
22350 xops[1] = gen_lowpart (mode, t2);
22351 xops[2] = gen_lowpart (mode, t1);
22352 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22353 xops[4] = mask;
22354 xops[5] = vt;
22355 ok = ix86_expand_int_vcond (xops);
22356 gcc_assert (ok);
22357 if (target != operands[0])
22358 emit_move_insn (operands[0],
22359 gen_lowpart (GET_MODE (operands[0]), target));
22360 }
22361 }
22362
22363 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22364 true if we should do zero extension, else sign extension. HIGH_P is
22365 true if we want the N/2 high elements, else the low elements. */
22366
22367 void
22368 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22369 {
22370 machine_mode imode = GET_MODE (src);
22371 rtx tmp;
22372
22373 if (TARGET_SSE4_1)
22374 {
22375 rtx (*unpack)(rtx, rtx);
22376 rtx (*extract)(rtx, rtx) = NULL;
22377 machine_mode halfmode = BLKmode;
22378
22379 switch (imode)
22380 {
22381 case V64QImode:
22382 if (unsigned_p)
22383 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22384 else
22385 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22386 halfmode = V32QImode;
22387 extract
22388 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22389 break;
22390 case V32QImode:
22391 if (unsigned_p)
22392 unpack = gen_avx2_zero_extendv16qiv16hi2;
22393 else
22394 unpack = gen_avx2_sign_extendv16qiv16hi2;
22395 halfmode = V16QImode;
22396 extract
22397 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22398 break;
22399 case V32HImode:
22400 if (unsigned_p)
22401 unpack = gen_avx512f_zero_extendv16hiv16si2;
22402 else
22403 unpack = gen_avx512f_sign_extendv16hiv16si2;
22404 halfmode = V16HImode;
22405 extract
22406 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22407 break;
22408 case V16HImode:
22409 if (unsigned_p)
22410 unpack = gen_avx2_zero_extendv8hiv8si2;
22411 else
22412 unpack = gen_avx2_sign_extendv8hiv8si2;
22413 halfmode = V8HImode;
22414 extract
22415 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22416 break;
22417 case V16SImode:
22418 if (unsigned_p)
22419 unpack = gen_avx512f_zero_extendv8siv8di2;
22420 else
22421 unpack = gen_avx512f_sign_extendv8siv8di2;
22422 halfmode = V8SImode;
22423 extract
22424 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22425 break;
22426 case V8SImode:
22427 if (unsigned_p)
22428 unpack = gen_avx2_zero_extendv4siv4di2;
22429 else
22430 unpack = gen_avx2_sign_extendv4siv4di2;
22431 halfmode = V4SImode;
22432 extract
22433 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22434 break;
22435 case V16QImode:
22436 if (unsigned_p)
22437 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22438 else
22439 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22440 break;
22441 case V8HImode:
22442 if (unsigned_p)
22443 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22444 else
22445 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22446 break;
22447 case V4SImode:
22448 if (unsigned_p)
22449 unpack = gen_sse4_1_zero_extendv2siv2di2;
22450 else
22451 unpack = gen_sse4_1_sign_extendv2siv2di2;
22452 break;
22453 default:
22454 gcc_unreachable ();
22455 }
22456
22457 if (GET_MODE_SIZE (imode) >= 32)
22458 {
22459 tmp = gen_reg_rtx (halfmode);
22460 emit_insn (extract (tmp, src));
22461 }
22462 else if (high_p)
22463 {
22464 /* Shift higher 8 bytes to lower 8 bytes. */
22465 tmp = gen_reg_rtx (V1TImode);
22466 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22467 GEN_INT (64)));
22468 tmp = gen_lowpart (imode, tmp);
22469 }
22470 else
22471 tmp = src;
22472
22473 emit_insn (unpack (dest, tmp));
22474 }
22475 else
22476 {
22477 rtx (*unpack)(rtx, rtx, rtx);
22478
22479 switch (imode)
22480 {
22481 case V16QImode:
22482 if (high_p)
22483 unpack = gen_vec_interleave_highv16qi;
22484 else
22485 unpack = gen_vec_interleave_lowv16qi;
22486 break;
22487 case V8HImode:
22488 if (high_p)
22489 unpack = gen_vec_interleave_highv8hi;
22490 else
22491 unpack = gen_vec_interleave_lowv8hi;
22492 break;
22493 case V4SImode:
22494 if (high_p)
22495 unpack = gen_vec_interleave_highv4si;
22496 else
22497 unpack = gen_vec_interleave_lowv4si;
22498 break;
22499 default:
22500 gcc_unreachable ();
22501 }
22502
22503 if (unsigned_p)
22504 tmp = force_reg (imode, CONST0_RTX (imode));
22505 else
22506 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22507 src, pc_rtx, pc_rtx);
22508
22509 rtx tmp2 = gen_reg_rtx (imode);
22510 emit_insn (unpack (tmp2, src, tmp));
22511 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22512 }
22513 }
22514
22515 /* Expand conditional increment or decrement using adb/sbb instructions.
22516 The default case using setcc followed by the conditional move can be
22517 done by generic code. */
22518 bool
22519 ix86_expand_int_addcc (rtx operands[])
22520 {
22521 enum rtx_code code = GET_CODE (operands[1]);
22522 rtx flags;
22523 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22524 rtx compare_op;
22525 rtx val = const0_rtx;
22526 bool fpcmp = false;
22527 machine_mode mode;
22528 rtx op0 = XEXP (operands[1], 0);
22529 rtx op1 = XEXP (operands[1], 1);
22530
22531 if (operands[3] != const1_rtx
22532 && operands[3] != constm1_rtx)
22533 return false;
22534 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22535 return false;
22536 code = GET_CODE (compare_op);
22537
22538 flags = XEXP (compare_op, 0);
22539
22540 if (GET_MODE (flags) == CCFPmode
22541 || GET_MODE (flags) == CCFPUmode)
22542 {
22543 fpcmp = true;
22544 code = ix86_fp_compare_code_to_integer (code);
22545 }
22546
22547 if (code != LTU)
22548 {
22549 val = constm1_rtx;
22550 if (fpcmp)
22551 PUT_CODE (compare_op,
22552 reverse_condition_maybe_unordered
22553 (GET_CODE (compare_op)));
22554 else
22555 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22556 }
22557
22558 mode = GET_MODE (operands[0]);
22559
22560 /* Construct either adc or sbb insn. */
22561 if ((code == LTU) == (operands[3] == constm1_rtx))
22562 {
22563 switch (mode)
22564 {
22565 case QImode:
22566 insn = gen_subqi3_carry;
22567 break;
22568 case HImode:
22569 insn = gen_subhi3_carry;
22570 break;
22571 case SImode:
22572 insn = gen_subsi3_carry;
22573 break;
22574 case DImode:
22575 insn = gen_subdi3_carry;
22576 break;
22577 default:
22578 gcc_unreachable ();
22579 }
22580 }
22581 else
22582 {
22583 switch (mode)
22584 {
22585 case QImode:
22586 insn = gen_addqi3_carry;
22587 break;
22588 case HImode:
22589 insn = gen_addhi3_carry;
22590 break;
22591 case SImode:
22592 insn = gen_addsi3_carry;
22593 break;
22594 case DImode:
22595 insn = gen_adddi3_carry;
22596 break;
22597 default:
22598 gcc_unreachable ();
22599 }
22600 }
22601 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22602
22603 return true;
22604 }
22605
22606
22607 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22608 but works for floating pointer parameters and nonoffsetable memories.
22609 For pushes, it returns just stack offsets; the values will be saved
22610 in the right order. Maximally three parts are generated. */
22611
22612 static int
22613 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22614 {
22615 int size;
22616
22617 if (!TARGET_64BIT)
22618 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22619 else
22620 size = (GET_MODE_SIZE (mode) + 4) / 8;
22621
22622 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22623 gcc_assert (size >= 2 && size <= 4);
22624
22625 /* Optimize constant pool reference to immediates. This is used by fp
22626 moves, that force all constants to memory to allow combining. */
22627 if (MEM_P (operand) && MEM_READONLY_P (operand))
22628 {
22629 rtx tmp = maybe_get_pool_constant (operand);
22630 if (tmp)
22631 operand = tmp;
22632 }
22633
22634 if (MEM_P (operand) && !offsettable_memref_p (operand))
22635 {
22636 /* The only non-offsetable memories we handle are pushes. */
22637 int ok = push_operand (operand, VOIDmode);
22638
22639 gcc_assert (ok);
22640
22641 operand = copy_rtx (operand);
22642 PUT_MODE (operand, word_mode);
22643 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22644 return size;
22645 }
22646
22647 if (GET_CODE (operand) == CONST_VECTOR)
22648 {
22649 machine_mode imode = int_mode_for_mode (mode);
22650 /* Caution: if we looked through a constant pool memory above,
22651 the operand may actually have a different mode now. That's
22652 ok, since we want to pun this all the way back to an integer. */
22653 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22654 gcc_assert (operand != NULL);
22655 mode = imode;
22656 }
22657
22658 if (!TARGET_64BIT)
22659 {
22660 if (mode == DImode)
22661 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22662 else
22663 {
22664 int i;
22665
22666 if (REG_P (operand))
22667 {
22668 gcc_assert (reload_completed);
22669 for (i = 0; i < size; i++)
22670 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22671 }
22672 else if (offsettable_memref_p (operand))
22673 {
22674 operand = adjust_address (operand, SImode, 0);
22675 parts[0] = operand;
22676 for (i = 1; i < size; i++)
22677 parts[i] = adjust_address (operand, SImode, 4 * i);
22678 }
22679 else if (CONST_DOUBLE_P (operand))
22680 {
22681 REAL_VALUE_TYPE r;
22682 long l[4];
22683
22684 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22685 switch (mode)
22686 {
22687 case TFmode:
22688 real_to_target (l, &r, mode);
22689 parts[3] = gen_int_mode (l[3], SImode);
22690 parts[2] = gen_int_mode (l[2], SImode);
22691 break;
22692 case XFmode:
22693 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22694 long double may not be 80-bit. */
22695 real_to_target (l, &r, mode);
22696 parts[2] = gen_int_mode (l[2], SImode);
22697 break;
22698 case DFmode:
22699 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22700 break;
22701 default:
22702 gcc_unreachable ();
22703 }
22704 parts[1] = gen_int_mode (l[1], SImode);
22705 parts[0] = gen_int_mode (l[0], SImode);
22706 }
22707 else
22708 gcc_unreachable ();
22709 }
22710 }
22711 else
22712 {
22713 if (mode == TImode)
22714 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22715 if (mode == XFmode || mode == TFmode)
22716 {
22717 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22718 if (REG_P (operand))
22719 {
22720 gcc_assert (reload_completed);
22721 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22722 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22723 }
22724 else if (offsettable_memref_p (operand))
22725 {
22726 operand = adjust_address (operand, DImode, 0);
22727 parts[0] = operand;
22728 parts[1] = adjust_address (operand, upper_mode, 8);
22729 }
22730 else if (CONST_DOUBLE_P (operand))
22731 {
22732 REAL_VALUE_TYPE r;
22733 long l[4];
22734
22735 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22736 real_to_target (l, &r, mode);
22737
22738 /* real_to_target puts 32-bit pieces in each long. */
22739 parts[0] =
22740 gen_int_mode
22741 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22742 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22743 DImode);
22744
22745 if (upper_mode == SImode)
22746 parts[1] = gen_int_mode (l[2], SImode);
22747 else
22748 parts[1] =
22749 gen_int_mode
22750 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22751 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22752 DImode);
22753 }
22754 else
22755 gcc_unreachable ();
22756 }
22757 }
22758
22759 return size;
22760 }
22761
22762 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22763 Return false when normal moves are needed; true when all required
22764 insns have been emitted. Operands 2-4 contain the input values
22765 int the correct order; operands 5-7 contain the output values. */
22766
22767 void
22768 ix86_split_long_move (rtx operands[])
22769 {
22770 rtx part[2][4];
22771 int nparts, i, j;
22772 int push = 0;
22773 int collisions = 0;
22774 machine_mode mode = GET_MODE (operands[0]);
22775 bool collisionparts[4];
22776
22777 /* The DFmode expanders may ask us to move double.
22778 For 64bit target this is single move. By hiding the fact
22779 here we simplify i386.md splitters. */
22780 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22781 {
22782 /* Optimize constant pool reference to immediates. This is used by
22783 fp moves, that force all constants to memory to allow combining. */
22784
22785 if (MEM_P (operands[1])
22786 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22787 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22788 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22789 if (push_operand (operands[0], VOIDmode))
22790 {
22791 operands[0] = copy_rtx (operands[0]);
22792 PUT_MODE (operands[0], word_mode);
22793 }
22794 else
22795 operands[0] = gen_lowpart (DImode, operands[0]);
22796 operands[1] = gen_lowpart (DImode, operands[1]);
22797 emit_move_insn (operands[0], operands[1]);
22798 return;
22799 }
22800
22801 /* The only non-offsettable memory we handle is push. */
22802 if (push_operand (operands[0], VOIDmode))
22803 push = 1;
22804 else
22805 gcc_assert (!MEM_P (operands[0])
22806 || offsettable_memref_p (operands[0]));
22807
22808 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22809 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22810
22811 /* When emitting push, take care for source operands on the stack. */
22812 if (push && MEM_P (operands[1])
22813 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22814 {
22815 rtx src_base = XEXP (part[1][nparts - 1], 0);
22816
22817 /* Compensate for the stack decrement by 4. */
22818 if (!TARGET_64BIT && nparts == 3
22819 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22820 src_base = plus_constant (Pmode, src_base, 4);
22821
22822 /* src_base refers to the stack pointer and is
22823 automatically decreased by emitted push. */
22824 for (i = 0; i < nparts; i++)
22825 part[1][i] = change_address (part[1][i],
22826 GET_MODE (part[1][i]), src_base);
22827 }
22828
22829 /* We need to do copy in the right order in case an address register
22830 of the source overlaps the destination. */
22831 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22832 {
22833 rtx tmp;
22834
22835 for (i = 0; i < nparts; i++)
22836 {
22837 collisionparts[i]
22838 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22839 if (collisionparts[i])
22840 collisions++;
22841 }
22842
22843 /* Collision in the middle part can be handled by reordering. */
22844 if (collisions == 1 && nparts == 3 && collisionparts [1])
22845 {
22846 std::swap (part[0][1], part[0][2]);
22847 std::swap (part[1][1], part[1][2]);
22848 }
22849 else if (collisions == 1
22850 && nparts == 4
22851 && (collisionparts [1] || collisionparts [2]))
22852 {
22853 if (collisionparts [1])
22854 {
22855 std::swap (part[0][1], part[0][2]);
22856 std::swap (part[1][1], part[1][2]);
22857 }
22858 else
22859 {
22860 std::swap (part[0][2], part[0][3]);
22861 std::swap (part[1][2], part[1][3]);
22862 }
22863 }
22864
22865 /* If there are more collisions, we can't handle it by reordering.
22866 Do an lea to the last part and use only one colliding move. */
22867 else if (collisions > 1)
22868 {
22869 rtx base;
22870
22871 collisions = 1;
22872
22873 base = part[0][nparts - 1];
22874
22875 /* Handle the case when the last part isn't valid for lea.
22876 Happens in 64-bit mode storing the 12-byte XFmode. */
22877 if (GET_MODE (base) != Pmode)
22878 base = gen_rtx_REG (Pmode, REGNO (base));
22879
22880 emit_insn (gen_rtx_SET (base, XEXP (part[1][0], 0)));
22881 part[1][0] = replace_equiv_address (part[1][0], base);
22882 for (i = 1; i < nparts; i++)
22883 {
22884 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22885 part[1][i] = replace_equiv_address (part[1][i], tmp);
22886 }
22887 }
22888 }
22889
22890 if (push)
22891 {
22892 if (!TARGET_64BIT)
22893 {
22894 if (nparts == 3)
22895 {
22896 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22897 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22898 stack_pointer_rtx, GEN_INT (-4)));
22899 emit_move_insn (part[0][2], part[1][2]);
22900 }
22901 else if (nparts == 4)
22902 {
22903 emit_move_insn (part[0][3], part[1][3]);
22904 emit_move_insn (part[0][2], part[1][2]);
22905 }
22906 }
22907 else
22908 {
22909 /* In 64bit mode we don't have 32bit push available. In case this is
22910 register, it is OK - we will just use larger counterpart. We also
22911 retype memory - these comes from attempt to avoid REX prefix on
22912 moving of second half of TFmode value. */
22913 if (GET_MODE (part[1][1]) == SImode)
22914 {
22915 switch (GET_CODE (part[1][1]))
22916 {
22917 case MEM:
22918 part[1][1] = adjust_address (part[1][1], DImode, 0);
22919 break;
22920
22921 case REG:
22922 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22923 break;
22924
22925 default:
22926 gcc_unreachable ();
22927 }
22928
22929 if (GET_MODE (part[1][0]) == SImode)
22930 part[1][0] = part[1][1];
22931 }
22932 }
22933 emit_move_insn (part[0][1], part[1][1]);
22934 emit_move_insn (part[0][0], part[1][0]);
22935 return;
22936 }
22937
22938 /* Choose correct order to not overwrite the source before it is copied. */
22939 if ((REG_P (part[0][0])
22940 && REG_P (part[1][1])
22941 && (REGNO (part[0][0]) == REGNO (part[1][1])
22942 || (nparts == 3
22943 && REGNO (part[0][0]) == REGNO (part[1][2]))
22944 || (nparts == 4
22945 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22946 || (collisions > 0
22947 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22948 {
22949 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22950 {
22951 operands[2 + i] = part[0][j];
22952 operands[6 + i] = part[1][j];
22953 }
22954 }
22955 else
22956 {
22957 for (i = 0; i < nparts; i++)
22958 {
22959 operands[2 + i] = part[0][i];
22960 operands[6 + i] = part[1][i];
22961 }
22962 }
22963
22964 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22965 if (optimize_insn_for_size_p ())
22966 {
22967 for (j = 0; j < nparts - 1; j++)
22968 if (CONST_INT_P (operands[6 + j])
22969 && operands[6 + j] != const0_rtx
22970 && REG_P (operands[2 + j]))
22971 for (i = j; i < nparts - 1; i++)
22972 if (CONST_INT_P (operands[7 + i])
22973 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22974 operands[7 + i] = operands[2 + j];
22975 }
22976
22977 for (i = 0; i < nparts; i++)
22978 emit_move_insn (operands[2 + i], operands[6 + i]);
22979
22980 return;
22981 }
22982
22983 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22984 left shift by a constant, either using a single shift or
22985 a sequence of add instructions. */
22986
22987 static void
22988 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22989 {
22990 rtx (*insn)(rtx, rtx, rtx);
22991
22992 if (count == 1
22993 || (count * ix86_cost->add <= ix86_cost->shift_const
22994 && !optimize_insn_for_size_p ()))
22995 {
22996 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22997 while (count-- > 0)
22998 emit_insn (insn (operand, operand, operand));
22999 }
23000 else
23001 {
23002 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23003 emit_insn (insn (operand, operand, GEN_INT (count)));
23004 }
23005 }
23006
23007 void
23008 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23009 {
23010 rtx (*gen_ashl3)(rtx, rtx, rtx);
23011 rtx (*gen_shld)(rtx, rtx, rtx);
23012 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23013
23014 rtx low[2], high[2];
23015 int count;
23016
23017 if (CONST_INT_P (operands[2]))
23018 {
23019 split_double_mode (mode, operands, 2, low, high);
23020 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23021
23022 if (count >= half_width)
23023 {
23024 emit_move_insn (high[0], low[1]);
23025 emit_move_insn (low[0], const0_rtx);
23026
23027 if (count > half_width)
23028 ix86_expand_ashl_const (high[0], count - half_width, mode);
23029 }
23030 else
23031 {
23032 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23033
23034 if (!rtx_equal_p (operands[0], operands[1]))
23035 emit_move_insn (operands[0], operands[1]);
23036
23037 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23038 ix86_expand_ashl_const (low[0], count, mode);
23039 }
23040 return;
23041 }
23042
23043 split_double_mode (mode, operands, 1, low, high);
23044
23045 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23046
23047 if (operands[1] == const1_rtx)
23048 {
23049 /* Assuming we've chosen a QImode capable registers, then 1 << N
23050 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23051 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23052 {
23053 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23054
23055 ix86_expand_clear (low[0]);
23056 ix86_expand_clear (high[0]);
23057 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23058
23059 d = gen_lowpart (QImode, low[0]);
23060 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23061 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23062 emit_insn (gen_rtx_SET (d, s));
23063
23064 d = gen_lowpart (QImode, high[0]);
23065 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23066 s = gen_rtx_NE (QImode, flags, const0_rtx);
23067 emit_insn (gen_rtx_SET (d, s));
23068 }
23069
23070 /* Otherwise, we can get the same results by manually performing
23071 a bit extract operation on bit 5/6, and then performing the two
23072 shifts. The two methods of getting 0/1 into low/high are exactly
23073 the same size. Avoiding the shift in the bit extract case helps
23074 pentium4 a bit; no one else seems to care much either way. */
23075 else
23076 {
23077 machine_mode half_mode;
23078 rtx (*gen_lshr3)(rtx, rtx, rtx);
23079 rtx (*gen_and3)(rtx, rtx, rtx);
23080 rtx (*gen_xor3)(rtx, rtx, rtx);
23081 HOST_WIDE_INT bits;
23082 rtx x;
23083
23084 if (mode == DImode)
23085 {
23086 half_mode = SImode;
23087 gen_lshr3 = gen_lshrsi3;
23088 gen_and3 = gen_andsi3;
23089 gen_xor3 = gen_xorsi3;
23090 bits = 5;
23091 }
23092 else
23093 {
23094 half_mode = DImode;
23095 gen_lshr3 = gen_lshrdi3;
23096 gen_and3 = gen_anddi3;
23097 gen_xor3 = gen_xordi3;
23098 bits = 6;
23099 }
23100
23101 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23102 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23103 else
23104 x = gen_lowpart (half_mode, operands[2]);
23105 emit_insn (gen_rtx_SET (high[0], x));
23106
23107 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23108 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23109 emit_move_insn (low[0], high[0]);
23110 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23111 }
23112
23113 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23114 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23115 return;
23116 }
23117
23118 if (operands[1] == constm1_rtx)
23119 {
23120 /* For -1 << N, we can avoid the shld instruction, because we
23121 know that we're shifting 0...31/63 ones into a -1. */
23122 emit_move_insn (low[0], constm1_rtx);
23123 if (optimize_insn_for_size_p ())
23124 emit_move_insn (high[0], low[0]);
23125 else
23126 emit_move_insn (high[0], constm1_rtx);
23127 }
23128 else
23129 {
23130 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23131
23132 if (!rtx_equal_p (operands[0], operands[1]))
23133 emit_move_insn (operands[0], operands[1]);
23134
23135 split_double_mode (mode, operands, 1, low, high);
23136 emit_insn (gen_shld (high[0], low[0], operands[2]));
23137 }
23138
23139 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23140
23141 if (TARGET_CMOVE && scratch)
23142 {
23143 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23144 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23145
23146 ix86_expand_clear (scratch);
23147 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23148 }
23149 else
23150 {
23151 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23152 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23153
23154 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23155 }
23156 }
23157
23158 void
23159 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23160 {
23161 rtx (*gen_ashr3)(rtx, rtx, rtx)
23162 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23163 rtx (*gen_shrd)(rtx, rtx, rtx);
23164 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23165
23166 rtx low[2], high[2];
23167 int count;
23168
23169 if (CONST_INT_P (operands[2]))
23170 {
23171 split_double_mode (mode, operands, 2, low, high);
23172 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23173
23174 if (count == GET_MODE_BITSIZE (mode) - 1)
23175 {
23176 emit_move_insn (high[0], high[1]);
23177 emit_insn (gen_ashr3 (high[0], high[0],
23178 GEN_INT (half_width - 1)));
23179 emit_move_insn (low[0], high[0]);
23180
23181 }
23182 else if (count >= half_width)
23183 {
23184 emit_move_insn (low[0], high[1]);
23185 emit_move_insn (high[0], low[0]);
23186 emit_insn (gen_ashr3 (high[0], high[0],
23187 GEN_INT (half_width - 1)));
23188
23189 if (count > half_width)
23190 emit_insn (gen_ashr3 (low[0], low[0],
23191 GEN_INT (count - half_width)));
23192 }
23193 else
23194 {
23195 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23196
23197 if (!rtx_equal_p (operands[0], operands[1]))
23198 emit_move_insn (operands[0], operands[1]);
23199
23200 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23201 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23202 }
23203 }
23204 else
23205 {
23206 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23207
23208 if (!rtx_equal_p (operands[0], operands[1]))
23209 emit_move_insn (operands[0], operands[1]);
23210
23211 split_double_mode (mode, operands, 1, low, high);
23212
23213 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23214 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23215
23216 if (TARGET_CMOVE && scratch)
23217 {
23218 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23219 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23220
23221 emit_move_insn (scratch, high[0]);
23222 emit_insn (gen_ashr3 (scratch, scratch,
23223 GEN_INT (half_width - 1)));
23224 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23225 scratch));
23226 }
23227 else
23228 {
23229 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23230 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23231
23232 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23233 }
23234 }
23235 }
23236
23237 void
23238 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23239 {
23240 rtx (*gen_lshr3)(rtx, rtx, rtx)
23241 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23242 rtx (*gen_shrd)(rtx, rtx, rtx);
23243 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23244
23245 rtx low[2], high[2];
23246 int count;
23247
23248 if (CONST_INT_P (operands[2]))
23249 {
23250 split_double_mode (mode, operands, 2, low, high);
23251 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23252
23253 if (count >= half_width)
23254 {
23255 emit_move_insn (low[0], high[1]);
23256 ix86_expand_clear (high[0]);
23257
23258 if (count > half_width)
23259 emit_insn (gen_lshr3 (low[0], low[0],
23260 GEN_INT (count - half_width)));
23261 }
23262 else
23263 {
23264 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23265
23266 if (!rtx_equal_p (operands[0], operands[1]))
23267 emit_move_insn (operands[0], operands[1]);
23268
23269 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23270 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23271 }
23272 }
23273 else
23274 {
23275 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23276
23277 if (!rtx_equal_p (operands[0], operands[1]))
23278 emit_move_insn (operands[0], operands[1]);
23279
23280 split_double_mode (mode, operands, 1, low, high);
23281
23282 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23283 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23284
23285 if (TARGET_CMOVE && scratch)
23286 {
23287 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23288 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23289
23290 ix86_expand_clear (scratch);
23291 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23292 scratch));
23293 }
23294 else
23295 {
23296 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23297 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23298
23299 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23300 }
23301 }
23302 }
23303
23304 /* Predict just emitted jump instruction to be taken with probability PROB. */
23305 static void
23306 predict_jump (int prob)
23307 {
23308 rtx insn = get_last_insn ();
23309 gcc_assert (JUMP_P (insn));
23310 add_int_reg_note (insn, REG_BR_PROB, prob);
23311 }
23312
23313 /* Helper function for the string operations below. Dest VARIABLE whether
23314 it is aligned to VALUE bytes. If true, jump to the label. */
23315 static rtx_code_label *
23316 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23317 {
23318 rtx_code_label *label = gen_label_rtx ();
23319 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23320 if (GET_MODE (variable) == DImode)
23321 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23322 else
23323 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23324 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23325 1, label);
23326 if (epilogue)
23327 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23328 else
23329 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23330 return label;
23331 }
23332
23333 /* Adjust COUNTER by the VALUE. */
23334 static void
23335 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23336 {
23337 rtx (*gen_add)(rtx, rtx, rtx)
23338 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23339
23340 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23341 }
23342
23343 /* Zero extend possibly SImode EXP to Pmode register. */
23344 rtx
23345 ix86_zero_extend_to_Pmode (rtx exp)
23346 {
23347 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23348 }
23349
23350 /* Divide COUNTREG by SCALE. */
23351 static rtx
23352 scale_counter (rtx countreg, int scale)
23353 {
23354 rtx sc;
23355
23356 if (scale == 1)
23357 return countreg;
23358 if (CONST_INT_P (countreg))
23359 return GEN_INT (INTVAL (countreg) / scale);
23360 gcc_assert (REG_P (countreg));
23361
23362 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23363 GEN_INT (exact_log2 (scale)),
23364 NULL, 1, OPTAB_DIRECT);
23365 return sc;
23366 }
23367
23368 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23369 DImode for constant loop counts. */
23370
23371 static machine_mode
23372 counter_mode (rtx count_exp)
23373 {
23374 if (GET_MODE (count_exp) != VOIDmode)
23375 return GET_MODE (count_exp);
23376 if (!CONST_INT_P (count_exp))
23377 return Pmode;
23378 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23379 return DImode;
23380 return SImode;
23381 }
23382
23383 /* Copy the address to a Pmode register. This is used for x32 to
23384 truncate DImode TLS address to a SImode register. */
23385
23386 static rtx
23387 ix86_copy_addr_to_reg (rtx addr)
23388 {
23389 rtx reg;
23390 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23391 {
23392 reg = copy_addr_to_reg (addr);
23393 REG_POINTER (reg) = 1;
23394 return reg;
23395 }
23396 else
23397 {
23398 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23399 reg = copy_to_mode_reg (DImode, addr);
23400 REG_POINTER (reg) = 1;
23401 return gen_rtx_SUBREG (SImode, reg, 0);
23402 }
23403 }
23404
23405 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23406 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23407 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23408 memory by VALUE (supposed to be in MODE).
23409
23410 The size is rounded down to whole number of chunk size moved at once.
23411 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23412
23413
23414 static void
23415 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23416 rtx destptr, rtx srcptr, rtx value,
23417 rtx count, machine_mode mode, int unroll,
23418 int expected_size, bool issetmem)
23419 {
23420 rtx_code_label *out_label, *top_label;
23421 rtx iter, tmp;
23422 machine_mode iter_mode = counter_mode (count);
23423 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23424 rtx piece_size = GEN_INT (piece_size_n);
23425 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23426 rtx size;
23427 int i;
23428
23429 top_label = gen_label_rtx ();
23430 out_label = gen_label_rtx ();
23431 iter = gen_reg_rtx (iter_mode);
23432
23433 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23434 NULL, 1, OPTAB_DIRECT);
23435 /* Those two should combine. */
23436 if (piece_size == const1_rtx)
23437 {
23438 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23439 true, out_label);
23440 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23441 }
23442 emit_move_insn (iter, const0_rtx);
23443
23444 emit_label (top_label);
23445
23446 tmp = convert_modes (Pmode, iter_mode, iter, true);
23447
23448 /* This assert could be relaxed - in this case we'll need to compute
23449 smallest power of two, containing in PIECE_SIZE_N and pass it to
23450 offset_address. */
23451 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23452 destmem = offset_address (destmem, tmp, piece_size_n);
23453 destmem = adjust_address (destmem, mode, 0);
23454
23455 if (!issetmem)
23456 {
23457 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23458 srcmem = adjust_address (srcmem, mode, 0);
23459
23460 /* When unrolling for chips that reorder memory reads and writes,
23461 we can save registers by using single temporary.
23462 Also using 4 temporaries is overkill in 32bit mode. */
23463 if (!TARGET_64BIT && 0)
23464 {
23465 for (i = 0; i < unroll; i++)
23466 {
23467 if (i)
23468 {
23469 destmem =
23470 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23471 srcmem =
23472 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23473 }
23474 emit_move_insn (destmem, srcmem);
23475 }
23476 }
23477 else
23478 {
23479 rtx tmpreg[4];
23480 gcc_assert (unroll <= 4);
23481 for (i = 0; i < unroll; i++)
23482 {
23483 tmpreg[i] = gen_reg_rtx (mode);
23484 if (i)
23485 {
23486 srcmem =
23487 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23488 }
23489 emit_move_insn (tmpreg[i], srcmem);
23490 }
23491 for (i = 0; i < unroll; i++)
23492 {
23493 if (i)
23494 {
23495 destmem =
23496 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23497 }
23498 emit_move_insn (destmem, tmpreg[i]);
23499 }
23500 }
23501 }
23502 else
23503 for (i = 0; i < unroll; i++)
23504 {
23505 if (i)
23506 destmem =
23507 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23508 emit_move_insn (destmem, value);
23509 }
23510
23511 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23512 true, OPTAB_LIB_WIDEN);
23513 if (tmp != iter)
23514 emit_move_insn (iter, tmp);
23515
23516 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23517 true, top_label);
23518 if (expected_size != -1)
23519 {
23520 expected_size /= GET_MODE_SIZE (mode) * unroll;
23521 if (expected_size == 0)
23522 predict_jump (0);
23523 else if (expected_size > REG_BR_PROB_BASE)
23524 predict_jump (REG_BR_PROB_BASE - 1);
23525 else
23526 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23527 }
23528 else
23529 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23530 iter = ix86_zero_extend_to_Pmode (iter);
23531 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23532 true, OPTAB_LIB_WIDEN);
23533 if (tmp != destptr)
23534 emit_move_insn (destptr, tmp);
23535 if (!issetmem)
23536 {
23537 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23538 true, OPTAB_LIB_WIDEN);
23539 if (tmp != srcptr)
23540 emit_move_insn (srcptr, tmp);
23541 }
23542 emit_label (out_label);
23543 }
23544
23545 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23546 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23547 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23548 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23549 ORIG_VALUE is the original value passed to memset to fill the memory with.
23550 Other arguments have same meaning as for previous function. */
23551
23552 static void
23553 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23554 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23555 rtx count,
23556 machine_mode mode, bool issetmem)
23557 {
23558 rtx destexp;
23559 rtx srcexp;
23560 rtx countreg;
23561 HOST_WIDE_INT rounded_count;
23562
23563 /* If possible, it is shorter to use rep movs.
23564 TODO: Maybe it is better to move this logic to decide_alg. */
23565 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23566 && (!issetmem || orig_value == const0_rtx))
23567 mode = SImode;
23568
23569 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23570 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23571
23572 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23573 GET_MODE_SIZE (mode)));
23574 if (mode != QImode)
23575 {
23576 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23577 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23578 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23579 }
23580 else
23581 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23582 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23583 {
23584 rounded_count = (INTVAL (count)
23585 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23586 destmem = shallow_copy_rtx (destmem);
23587 set_mem_size (destmem, rounded_count);
23588 }
23589 else if (MEM_SIZE_KNOWN_P (destmem))
23590 clear_mem_size (destmem);
23591
23592 if (issetmem)
23593 {
23594 value = force_reg (mode, gen_lowpart (mode, value));
23595 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23596 }
23597 else
23598 {
23599 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23600 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23601 if (mode != QImode)
23602 {
23603 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23604 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23605 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23606 }
23607 else
23608 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23609 if (CONST_INT_P (count))
23610 {
23611 rounded_count = (INTVAL (count)
23612 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23613 srcmem = shallow_copy_rtx (srcmem);
23614 set_mem_size (srcmem, rounded_count);
23615 }
23616 else
23617 {
23618 if (MEM_SIZE_KNOWN_P (srcmem))
23619 clear_mem_size (srcmem);
23620 }
23621 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23622 destexp, srcexp));
23623 }
23624 }
23625
23626 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23627 DESTMEM.
23628 SRC is passed by pointer to be updated on return.
23629 Return value is updated DST. */
23630 static rtx
23631 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23632 HOST_WIDE_INT size_to_move)
23633 {
23634 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23635 enum insn_code code;
23636 machine_mode move_mode;
23637 int piece_size, i;
23638
23639 /* Find the widest mode in which we could perform moves.
23640 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23641 it until move of such size is supported. */
23642 piece_size = 1 << floor_log2 (size_to_move);
23643 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23644 code = optab_handler (mov_optab, move_mode);
23645 while (code == CODE_FOR_nothing && piece_size > 1)
23646 {
23647 piece_size >>= 1;
23648 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23649 code = optab_handler (mov_optab, move_mode);
23650 }
23651
23652 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23653 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23654 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23655 {
23656 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23657 move_mode = mode_for_vector (word_mode, nunits);
23658 code = optab_handler (mov_optab, move_mode);
23659 if (code == CODE_FOR_nothing)
23660 {
23661 move_mode = word_mode;
23662 piece_size = GET_MODE_SIZE (move_mode);
23663 code = optab_handler (mov_optab, move_mode);
23664 }
23665 }
23666 gcc_assert (code != CODE_FOR_nothing);
23667
23668 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23669 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23670
23671 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23672 gcc_assert (size_to_move % piece_size == 0);
23673 adjust = GEN_INT (piece_size);
23674 for (i = 0; i < size_to_move; i += piece_size)
23675 {
23676 /* We move from memory to memory, so we'll need to do it via
23677 a temporary register. */
23678 tempreg = gen_reg_rtx (move_mode);
23679 emit_insn (GEN_FCN (code) (tempreg, src));
23680 emit_insn (GEN_FCN (code) (dst, tempreg));
23681
23682 emit_move_insn (destptr,
23683 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23684 emit_move_insn (srcptr,
23685 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23686
23687 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23688 piece_size);
23689 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23690 piece_size);
23691 }
23692
23693 /* Update DST and SRC rtx. */
23694 *srcmem = src;
23695 return dst;
23696 }
23697
23698 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23699 static void
23700 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23701 rtx destptr, rtx srcptr, rtx count, int max_size)
23702 {
23703 rtx src, dest;
23704 if (CONST_INT_P (count))
23705 {
23706 HOST_WIDE_INT countval = INTVAL (count);
23707 HOST_WIDE_INT epilogue_size = countval % max_size;
23708 int i;
23709
23710 /* For now MAX_SIZE should be a power of 2. This assert could be
23711 relaxed, but it'll require a bit more complicated epilogue
23712 expanding. */
23713 gcc_assert ((max_size & (max_size - 1)) == 0);
23714 for (i = max_size; i >= 1; i >>= 1)
23715 {
23716 if (epilogue_size & i)
23717 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23718 }
23719 return;
23720 }
23721 if (max_size > 8)
23722 {
23723 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23724 count, 1, OPTAB_DIRECT);
23725 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23726 count, QImode, 1, 4, false);
23727 return;
23728 }
23729
23730 /* When there are stringops, we can cheaply increase dest and src pointers.
23731 Otherwise we save code size by maintaining offset (zero is readily
23732 available from preceding rep operation) and using x86 addressing modes.
23733 */
23734 if (TARGET_SINGLE_STRINGOP)
23735 {
23736 if (max_size > 4)
23737 {
23738 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23739 src = change_address (srcmem, SImode, srcptr);
23740 dest = change_address (destmem, SImode, destptr);
23741 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23742 emit_label (label);
23743 LABEL_NUSES (label) = 1;
23744 }
23745 if (max_size > 2)
23746 {
23747 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23748 src = change_address (srcmem, HImode, srcptr);
23749 dest = change_address (destmem, HImode, destptr);
23750 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23751 emit_label (label);
23752 LABEL_NUSES (label) = 1;
23753 }
23754 if (max_size > 1)
23755 {
23756 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23757 src = change_address (srcmem, QImode, srcptr);
23758 dest = change_address (destmem, QImode, destptr);
23759 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23760 emit_label (label);
23761 LABEL_NUSES (label) = 1;
23762 }
23763 }
23764 else
23765 {
23766 rtx offset = force_reg (Pmode, const0_rtx);
23767 rtx tmp;
23768
23769 if (max_size > 4)
23770 {
23771 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23772 src = change_address (srcmem, SImode, srcptr);
23773 dest = change_address (destmem, SImode, destptr);
23774 emit_move_insn (dest, src);
23775 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23776 true, OPTAB_LIB_WIDEN);
23777 if (tmp != offset)
23778 emit_move_insn (offset, tmp);
23779 emit_label (label);
23780 LABEL_NUSES (label) = 1;
23781 }
23782 if (max_size > 2)
23783 {
23784 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23785 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23786 src = change_address (srcmem, HImode, tmp);
23787 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23788 dest = change_address (destmem, HImode, tmp);
23789 emit_move_insn (dest, src);
23790 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23791 true, OPTAB_LIB_WIDEN);
23792 if (tmp != offset)
23793 emit_move_insn (offset, tmp);
23794 emit_label (label);
23795 LABEL_NUSES (label) = 1;
23796 }
23797 if (max_size > 1)
23798 {
23799 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23800 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23801 src = change_address (srcmem, QImode, tmp);
23802 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23803 dest = change_address (destmem, QImode, tmp);
23804 emit_move_insn (dest, src);
23805 emit_label (label);
23806 LABEL_NUSES (label) = 1;
23807 }
23808 }
23809 }
23810
23811 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23812 with value PROMOTED_VAL.
23813 SRC is passed by pointer to be updated on return.
23814 Return value is updated DST. */
23815 static rtx
23816 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23817 HOST_WIDE_INT size_to_move)
23818 {
23819 rtx dst = destmem, adjust;
23820 enum insn_code code;
23821 machine_mode move_mode;
23822 int piece_size, i;
23823
23824 /* Find the widest mode in which we could perform moves.
23825 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23826 it until move of such size is supported. */
23827 move_mode = GET_MODE (promoted_val);
23828 if (move_mode == VOIDmode)
23829 move_mode = QImode;
23830 if (size_to_move < GET_MODE_SIZE (move_mode))
23831 {
23832 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23833 promoted_val = gen_lowpart (move_mode, promoted_val);
23834 }
23835 piece_size = GET_MODE_SIZE (move_mode);
23836 code = optab_handler (mov_optab, move_mode);
23837 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23838
23839 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23840
23841 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23842 gcc_assert (size_to_move % piece_size == 0);
23843 adjust = GEN_INT (piece_size);
23844 for (i = 0; i < size_to_move; i += piece_size)
23845 {
23846 if (piece_size <= GET_MODE_SIZE (word_mode))
23847 {
23848 emit_insn (gen_strset (destptr, dst, promoted_val));
23849 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23850 piece_size);
23851 continue;
23852 }
23853
23854 emit_insn (GEN_FCN (code) (dst, promoted_val));
23855
23856 emit_move_insn (destptr,
23857 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23858
23859 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23860 piece_size);
23861 }
23862
23863 /* Update DST rtx. */
23864 return dst;
23865 }
23866 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23867 static void
23868 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23869 rtx count, int max_size)
23870 {
23871 count =
23872 expand_simple_binop (counter_mode (count), AND, count,
23873 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23874 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23875 gen_lowpart (QImode, value), count, QImode,
23876 1, max_size / 2, true);
23877 }
23878
23879 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23880 static void
23881 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23882 rtx count, int max_size)
23883 {
23884 rtx dest;
23885
23886 if (CONST_INT_P (count))
23887 {
23888 HOST_WIDE_INT countval = INTVAL (count);
23889 HOST_WIDE_INT epilogue_size = countval % max_size;
23890 int i;
23891
23892 /* For now MAX_SIZE should be a power of 2. This assert could be
23893 relaxed, but it'll require a bit more complicated epilogue
23894 expanding. */
23895 gcc_assert ((max_size & (max_size - 1)) == 0);
23896 for (i = max_size; i >= 1; i >>= 1)
23897 {
23898 if (epilogue_size & i)
23899 {
23900 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23901 destmem = emit_memset (destmem, destptr, vec_value, i);
23902 else
23903 destmem = emit_memset (destmem, destptr, value, i);
23904 }
23905 }
23906 return;
23907 }
23908 if (max_size > 32)
23909 {
23910 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23911 return;
23912 }
23913 if (max_size > 16)
23914 {
23915 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23916 if (TARGET_64BIT)
23917 {
23918 dest = change_address (destmem, DImode, destptr);
23919 emit_insn (gen_strset (destptr, dest, value));
23920 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23921 emit_insn (gen_strset (destptr, dest, value));
23922 }
23923 else
23924 {
23925 dest = change_address (destmem, SImode, destptr);
23926 emit_insn (gen_strset (destptr, dest, value));
23927 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23928 emit_insn (gen_strset (destptr, dest, value));
23929 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23930 emit_insn (gen_strset (destptr, dest, value));
23931 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23932 emit_insn (gen_strset (destptr, dest, value));
23933 }
23934 emit_label (label);
23935 LABEL_NUSES (label) = 1;
23936 }
23937 if (max_size > 8)
23938 {
23939 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23940 if (TARGET_64BIT)
23941 {
23942 dest = change_address (destmem, DImode, destptr);
23943 emit_insn (gen_strset (destptr, dest, value));
23944 }
23945 else
23946 {
23947 dest = change_address (destmem, SImode, destptr);
23948 emit_insn (gen_strset (destptr, dest, value));
23949 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23950 emit_insn (gen_strset (destptr, dest, value));
23951 }
23952 emit_label (label);
23953 LABEL_NUSES (label) = 1;
23954 }
23955 if (max_size > 4)
23956 {
23957 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23958 dest = change_address (destmem, SImode, destptr);
23959 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23960 emit_label (label);
23961 LABEL_NUSES (label) = 1;
23962 }
23963 if (max_size > 2)
23964 {
23965 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23966 dest = change_address (destmem, HImode, destptr);
23967 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23968 emit_label (label);
23969 LABEL_NUSES (label) = 1;
23970 }
23971 if (max_size > 1)
23972 {
23973 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23974 dest = change_address (destmem, QImode, destptr);
23975 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23976 emit_label (label);
23977 LABEL_NUSES (label) = 1;
23978 }
23979 }
23980
23981 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23982 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23983 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23984 ignored.
23985 Return value is updated DESTMEM. */
23986 static rtx
23987 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23988 rtx destptr, rtx srcptr, rtx value,
23989 rtx vec_value, rtx count, int align,
23990 int desired_alignment, bool issetmem)
23991 {
23992 int i;
23993 for (i = 1; i < desired_alignment; i <<= 1)
23994 {
23995 if (align <= i)
23996 {
23997 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23998 if (issetmem)
23999 {
24000 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24001 destmem = emit_memset (destmem, destptr, vec_value, i);
24002 else
24003 destmem = emit_memset (destmem, destptr, value, i);
24004 }
24005 else
24006 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24007 ix86_adjust_counter (count, i);
24008 emit_label (label);
24009 LABEL_NUSES (label) = 1;
24010 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24011 }
24012 }
24013 return destmem;
24014 }
24015
24016 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24017 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24018 and jump to DONE_LABEL. */
24019 static void
24020 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24021 rtx destptr, rtx srcptr,
24022 rtx value, rtx vec_value,
24023 rtx count, int size,
24024 rtx done_label, bool issetmem)
24025 {
24026 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24027 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24028 rtx modesize;
24029 int n;
24030
24031 /* If we do not have vector value to copy, we must reduce size. */
24032 if (issetmem)
24033 {
24034 if (!vec_value)
24035 {
24036 if (GET_MODE (value) == VOIDmode && size > 8)
24037 mode = Pmode;
24038 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24039 mode = GET_MODE (value);
24040 }
24041 else
24042 mode = GET_MODE (vec_value), value = vec_value;
24043 }
24044 else
24045 {
24046 /* Choose appropriate vector mode. */
24047 if (size >= 32)
24048 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24049 else if (size >= 16)
24050 mode = TARGET_SSE ? V16QImode : DImode;
24051 srcmem = change_address (srcmem, mode, srcptr);
24052 }
24053 destmem = change_address (destmem, mode, destptr);
24054 modesize = GEN_INT (GET_MODE_SIZE (mode));
24055 gcc_assert (GET_MODE_SIZE (mode) <= size);
24056 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24057 {
24058 if (issetmem)
24059 emit_move_insn (destmem, gen_lowpart (mode, value));
24060 else
24061 {
24062 emit_move_insn (destmem, srcmem);
24063 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24064 }
24065 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24066 }
24067
24068 destmem = offset_address (destmem, count, 1);
24069 destmem = offset_address (destmem, GEN_INT (-2 * size),
24070 GET_MODE_SIZE (mode));
24071 if (!issetmem)
24072 {
24073 srcmem = offset_address (srcmem, count, 1);
24074 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24075 GET_MODE_SIZE (mode));
24076 }
24077 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24078 {
24079 if (issetmem)
24080 emit_move_insn (destmem, gen_lowpart (mode, value));
24081 else
24082 {
24083 emit_move_insn (destmem, srcmem);
24084 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24085 }
24086 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24087 }
24088 emit_jump_insn (gen_jump (done_label));
24089 emit_barrier ();
24090
24091 emit_label (label);
24092 LABEL_NUSES (label) = 1;
24093 }
24094
24095 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24096 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24097 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24098 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24099 DONE_LABEL is a label after the whole copying sequence. The label is created
24100 on demand if *DONE_LABEL is NULL.
24101 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24102 bounds after the initial copies.
24103
24104 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24105 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24106 we will dispatch to a library call for large blocks.
24107
24108 In pseudocode we do:
24109
24110 if (COUNT < SIZE)
24111 {
24112 Assume that SIZE is 4. Bigger sizes are handled analogously
24113 if (COUNT & 4)
24114 {
24115 copy 4 bytes from SRCPTR to DESTPTR
24116 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24117 goto done_label
24118 }
24119 if (!COUNT)
24120 goto done_label;
24121 copy 1 byte from SRCPTR to DESTPTR
24122 if (COUNT & 2)
24123 {
24124 copy 2 bytes from SRCPTR to DESTPTR
24125 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24126 }
24127 }
24128 else
24129 {
24130 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24131 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24132
24133 OLD_DESPTR = DESTPTR;
24134 Align DESTPTR up to DESIRED_ALIGN
24135 SRCPTR += DESTPTR - OLD_DESTPTR
24136 COUNT -= DEST_PTR - OLD_DESTPTR
24137 if (DYNAMIC_CHECK)
24138 Round COUNT down to multiple of SIZE
24139 << optional caller supplied zero size guard is here >>
24140 << optional caller suppplied dynamic check is here >>
24141 << caller supplied main copy loop is here >>
24142 }
24143 done_label:
24144 */
24145 static void
24146 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24147 rtx *destptr, rtx *srcptr,
24148 machine_mode mode,
24149 rtx value, rtx vec_value,
24150 rtx *count,
24151 rtx_code_label **done_label,
24152 int size,
24153 int desired_align,
24154 int align,
24155 unsigned HOST_WIDE_INT *min_size,
24156 bool dynamic_check,
24157 bool issetmem)
24158 {
24159 rtx_code_label *loop_label = NULL, *label;
24160 int n;
24161 rtx modesize;
24162 int prolog_size = 0;
24163 rtx mode_value;
24164
24165 /* Chose proper value to copy. */
24166 if (issetmem && VECTOR_MODE_P (mode))
24167 mode_value = vec_value;
24168 else
24169 mode_value = value;
24170 gcc_assert (GET_MODE_SIZE (mode) <= size);
24171
24172 /* See if block is big or small, handle small blocks. */
24173 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24174 {
24175 int size2 = size;
24176 loop_label = gen_label_rtx ();
24177
24178 if (!*done_label)
24179 *done_label = gen_label_rtx ();
24180
24181 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24182 1, loop_label);
24183 size2 >>= 1;
24184
24185 /* Handle sizes > 3. */
24186 for (;size2 > 2; size2 >>= 1)
24187 expand_small_movmem_or_setmem (destmem, srcmem,
24188 *destptr, *srcptr,
24189 value, vec_value,
24190 *count,
24191 size2, *done_label, issetmem);
24192 /* Nothing to copy? Jump to DONE_LABEL if so */
24193 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24194 1, *done_label);
24195
24196 /* Do a byte copy. */
24197 destmem = change_address (destmem, QImode, *destptr);
24198 if (issetmem)
24199 emit_move_insn (destmem, gen_lowpart (QImode, value));
24200 else
24201 {
24202 srcmem = change_address (srcmem, QImode, *srcptr);
24203 emit_move_insn (destmem, srcmem);
24204 }
24205
24206 /* Handle sizes 2 and 3. */
24207 label = ix86_expand_aligntest (*count, 2, false);
24208 destmem = change_address (destmem, HImode, *destptr);
24209 destmem = offset_address (destmem, *count, 1);
24210 destmem = offset_address (destmem, GEN_INT (-2), 2);
24211 if (issetmem)
24212 emit_move_insn (destmem, gen_lowpart (HImode, value));
24213 else
24214 {
24215 srcmem = change_address (srcmem, HImode, *srcptr);
24216 srcmem = offset_address (srcmem, *count, 1);
24217 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24218 emit_move_insn (destmem, srcmem);
24219 }
24220
24221 emit_label (label);
24222 LABEL_NUSES (label) = 1;
24223 emit_jump_insn (gen_jump (*done_label));
24224 emit_barrier ();
24225 }
24226 else
24227 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24228 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24229
24230 /* Start memcpy for COUNT >= SIZE. */
24231 if (loop_label)
24232 {
24233 emit_label (loop_label);
24234 LABEL_NUSES (loop_label) = 1;
24235 }
24236
24237 /* Copy first desired_align bytes. */
24238 if (!issetmem)
24239 srcmem = change_address (srcmem, mode, *srcptr);
24240 destmem = change_address (destmem, mode, *destptr);
24241 modesize = GEN_INT (GET_MODE_SIZE (mode));
24242 for (n = 0; prolog_size < desired_align - align; n++)
24243 {
24244 if (issetmem)
24245 emit_move_insn (destmem, mode_value);
24246 else
24247 {
24248 emit_move_insn (destmem, srcmem);
24249 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24250 }
24251 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24252 prolog_size += GET_MODE_SIZE (mode);
24253 }
24254
24255
24256 /* Copy last SIZE bytes. */
24257 destmem = offset_address (destmem, *count, 1);
24258 destmem = offset_address (destmem,
24259 GEN_INT (-size - prolog_size),
24260 1);
24261 if (issetmem)
24262 emit_move_insn (destmem, mode_value);
24263 else
24264 {
24265 srcmem = offset_address (srcmem, *count, 1);
24266 srcmem = offset_address (srcmem,
24267 GEN_INT (-size - prolog_size),
24268 1);
24269 emit_move_insn (destmem, srcmem);
24270 }
24271 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24272 {
24273 destmem = offset_address (destmem, modesize, 1);
24274 if (issetmem)
24275 emit_move_insn (destmem, mode_value);
24276 else
24277 {
24278 srcmem = offset_address (srcmem, modesize, 1);
24279 emit_move_insn (destmem, srcmem);
24280 }
24281 }
24282
24283 /* Align destination. */
24284 if (desired_align > 1 && desired_align > align)
24285 {
24286 rtx saveddest = *destptr;
24287
24288 gcc_assert (desired_align <= size);
24289 /* Align destptr up, place it to new register. */
24290 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24291 GEN_INT (prolog_size),
24292 NULL_RTX, 1, OPTAB_DIRECT);
24293 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24294 REG_POINTER (*destptr) = 1;
24295 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24296 GEN_INT (-desired_align),
24297 *destptr, 1, OPTAB_DIRECT);
24298 /* See how many bytes we skipped. */
24299 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24300 *destptr,
24301 saveddest, 1, OPTAB_DIRECT);
24302 /* Adjust srcptr and count. */
24303 if (!issetmem)
24304 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24305 saveddest, *srcptr, 1, OPTAB_DIRECT);
24306 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24307 saveddest, *count, 1, OPTAB_DIRECT);
24308 /* We copied at most size + prolog_size. */
24309 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24310 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24311 else
24312 *min_size = 0;
24313
24314 /* Our loops always round down the bock size, but for dispatch to library
24315 we need precise value. */
24316 if (dynamic_check)
24317 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24318 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24319 }
24320 else
24321 {
24322 gcc_assert (prolog_size == 0);
24323 /* Decrease count, so we won't end up copying last word twice. */
24324 if (!CONST_INT_P (*count))
24325 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24326 constm1_rtx, *count, 1, OPTAB_DIRECT);
24327 else
24328 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24329 if (*min_size)
24330 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24331 }
24332 }
24333
24334
24335 /* This function is like the previous one, except here we know how many bytes
24336 need to be copied. That allows us to update alignment not only of DST, which
24337 is returned, but also of SRC, which is passed as a pointer for that
24338 reason. */
24339 static rtx
24340 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24341 rtx srcreg, rtx value, rtx vec_value,
24342 int desired_align, int align_bytes,
24343 bool issetmem)
24344 {
24345 rtx src = NULL;
24346 rtx orig_dst = dst;
24347 rtx orig_src = NULL;
24348 int piece_size = 1;
24349 int copied_bytes = 0;
24350
24351 if (!issetmem)
24352 {
24353 gcc_assert (srcp != NULL);
24354 src = *srcp;
24355 orig_src = src;
24356 }
24357
24358 for (piece_size = 1;
24359 piece_size <= desired_align && copied_bytes < align_bytes;
24360 piece_size <<= 1)
24361 {
24362 if (align_bytes & piece_size)
24363 {
24364 if (issetmem)
24365 {
24366 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24367 dst = emit_memset (dst, destreg, vec_value, piece_size);
24368 else
24369 dst = emit_memset (dst, destreg, value, piece_size);
24370 }
24371 else
24372 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24373 copied_bytes += piece_size;
24374 }
24375 }
24376 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24377 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24378 if (MEM_SIZE_KNOWN_P (orig_dst))
24379 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24380
24381 if (!issetmem)
24382 {
24383 int src_align_bytes = get_mem_align_offset (src, desired_align
24384 * BITS_PER_UNIT);
24385 if (src_align_bytes >= 0)
24386 src_align_bytes = desired_align - src_align_bytes;
24387 if (src_align_bytes >= 0)
24388 {
24389 unsigned int src_align;
24390 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24391 {
24392 if ((src_align_bytes & (src_align - 1))
24393 == (align_bytes & (src_align - 1)))
24394 break;
24395 }
24396 if (src_align > (unsigned int) desired_align)
24397 src_align = desired_align;
24398 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24399 set_mem_align (src, src_align * BITS_PER_UNIT);
24400 }
24401 if (MEM_SIZE_KNOWN_P (orig_src))
24402 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24403 *srcp = src;
24404 }
24405
24406 return dst;
24407 }
24408
24409 /* Return true if ALG can be used in current context.
24410 Assume we expand memset if MEMSET is true. */
24411 static bool
24412 alg_usable_p (enum stringop_alg alg, bool memset)
24413 {
24414 if (alg == no_stringop)
24415 return false;
24416 if (alg == vector_loop)
24417 return TARGET_SSE || TARGET_AVX;
24418 /* Algorithms using the rep prefix want at least edi and ecx;
24419 additionally, memset wants eax and memcpy wants esi. Don't
24420 consider such algorithms if the user has appropriated those
24421 registers for their own purposes. */
24422 if (alg == rep_prefix_1_byte
24423 || alg == rep_prefix_4_byte
24424 || alg == rep_prefix_8_byte)
24425 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24426 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24427 return true;
24428 }
24429
24430 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24431 static enum stringop_alg
24432 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24433 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24434 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24435 {
24436 const struct stringop_algs * algs;
24437 bool optimize_for_speed;
24438 int max = 0;
24439 const struct processor_costs *cost;
24440 int i;
24441 bool any_alg_usable_p = false;
24442
24443 *noalign = false;
24444 *dynamic_check = -1;
24445
24446 /* Even if the string operation call is cold, we still might spend a lot
24447 of time processing large blocks. */
24448 if (optimize_function_for_size_p (cfun)
24449 || (optimize_insn_for_size_p ()
24450 && (max_size < 256
24451 || (expected_size != -1 && expected_size < 256))))
24452 optimize_for_speed = false;
24453 else
24454 optimize_for_speed = true;
24455
24456 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24457 if (memset)
24458 algs = &cost->memset[TARGET_64BIT != 0];
24459 else
24460 algs = &cost->memcpy[TARGET_64BIT != 0];
24461
24462 /* See maximal size for user defined algorithm. */
24463 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24464 {
24465 enum stringop_alg candidate = algs->size[i].alg;
24466 bool usable = alg_usable_p (candidate, memset);
24467 any_alg_usable_p |= usable;
24468
24469 if (candidate != libcall && candidate && usable)
24470 max = algs->size[i].max;
24471 }
24472
24473 /* If expected size is not known but max size is small enough
24474 so inline version is a win, set expected size into
24475 the range. */
24476 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24477 && expected_size == -1)
24478 expected_size = min_size / 2 + max_size / 2;
24479
24480 /* If user specified the algorithm, honnor it if possible. */
24481 if (ix86_stringop_alg != no_stringop
24482 && alg_usable_p (ix86_stringop_alg, memset))
24483 return ix86_stringop_alg;
24484 /* rep; movq or rep; movl is the smallest variant. */
24485 else if (!optimize_for_speed)
24486 {
24487 *noalign = true;
24488 if (!count || (count & 3) || (memset && !zero_memset))
24489 return alg_usable_p (rep_prefix_1_byte, memset)
24490 ? rep_prefix_1_byte : loop_1_byte;
24491 else
24492 return alg_usable_p (rep_prefix_4_byte, memset)
24493 ? rep_prefix_4_byte : loop;
24494 }
24495 /* Very tiny blocks are best handled via the loop, REP is expensive to
24496 setup. */
24497 else if (expected_size != -1 && expected_size < 4)
24498 return loop_1_byte;
24499 else if (expected_size != -1)
24500 {
24501 enum stringop_alg alg = libcall;
24502 bool alg_noalign = false;
24503 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24504 {
24505 /* We get here if the algorithms that were not libcall-based
24506 were rep-prefix based and we are unable to use rep prefixes
24507 based on global register usage. Break out of the loop and
24508 use the heuristic below. */
24509 if (algs->size[i].max == 0)
24510 break;
24511 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24512 {
24513 enum stringop_alg candidate = algs->size[i].alg;
24514
24515 if (candidate != libcall && alg_usable_p (candidate, memset))
24516 {
24517 alg = candidate;
24518 alg_noalign = algs->size[i].noalign;
24519 }
24520 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24521 last non-libcall inline algorithm. */
24522 if (TARGET_INLINE_ALL_STRINGOPS)
24523 {
24524 /* When the current size is best to be copied by a libcall,
24525 but we are still forced to inline, run the heuristic below
24526 that will pick code for medium sized blocks. */
24527 if (alg != libcall)
24528 {
24529 *noalign = alg_noalign;
24530 return alg;
24531 }
24532 else if (!any_alg_usable_p)
24533 break;
24534 }
24535 else if (alg_usable_p (candidate, memset))
24536 {
24537 *noalign = algs->size[i].noalign;
24538 return candidate;
24539 }
24540 }
24541 }
24542 }
24543 /* When asked to inline the call anyway, try to pick meaningful choice.
24544 We look for maximal size of block that is faster to copy by hand and
24545 take blocks of at most of that size guessing that average size will
24546 be roughly half of the block.
24547
24548 If this turns out to be bad, we might simply specify the preferred
24549 choice in ix86_costs. */
24550 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24551 && (algs->unknown_size == libcall
24552 || !alg_usable_p (algs->unknown_size, memset)))
24553 {
24554 enum stringop_alg alg;
24555
24556 /* If there aren't any usable algorithms, then recursing on
24557 smaller sizes isn't going to find anything. Just return the
24558 simple byte-at-a-time copy loop. */
24559 if (!any_alg_usable_p)
24560 {
24561 /* Pick something reasonable. */
24562 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24563 *dynamic_check = 128;
24564 return loop_1_byte;
24565 }
24566 if (max <= 0)
24567 max = 4096;
24568 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24569 zero_memset, dynamic_check, noalign);
24570 gcc_assert (*dynamic_check == -1);
24571 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24572 *dynamic_check = max;
24573 else
24574 gcc_assert (alg != libcall);
24575 return alg;
24576 }
24577 return (alg_usable_p (algs->unknown_size, memset)
24578 ? algs->unknown_size : libcall);
24579 }
24580
24581 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24582 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24583 static int
24584 decide_alignment (int align,
24585 enum stringop_alg alg,
24586 int expected_size,
24587 machine_mode move_mode)
24588 {
24589 int desired_align = 0;
24590
24591 gcc_assert (alg != no_stringop);
24592
24593 if (alg == libcall)
24594 return 0;
24595 if (move_mode == VOIDmode)
24596 return 0;
24597
24598 desired_align = GET_MODE_SIZE (move_mode);
24599 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24600 copying whole cacheline at once. */
24601 if (TARGET_PENTIUMPRO
24602 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24603 desired_align = 8;
24604
24605 if (optimize_size)
24606 desired_align = 1;
24607 if (desired_align < align)
24608 desired_align = align;
24609 if (expected_size != -1 && expected_size < 4)
24610 desired_align = align;
24611
24612 return desired_align;
24613 }
24614
24615
24616 /* Helper function for memcpy. For QImode value 0xXY produce
24617 0xXYXYXYXY of wide specified by MODE. This is essentially
24618 a * 0x10101010, but we can do slightly better than
24619 synth_mult by unwinding the sequence by hand on CPUs with
24620 slow multiply. */
24621 static rtx
24622 promote_duplicated_reg (machine_mode mode, rtx val)
24623 {
24624 machine_mode valmode = GET_MODE (val);
24625 rtx tmp;
24626 int nops = mode == DImode ? 3 : 2;
24627
24628 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24629 if (val == const0_rtx)
24630 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24631 if (CONST_INT_P (val))
24632 {
24633 HOST_WIDE_INT v = INTVAL (val) & 255;
24634
24635 v |= v << 8;
24636 v |= v << 16;
24637 if (mode == DImode)
24638 v |= (v << 16) << 16;
24639 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24640 }
24641
24642 if (valmode == VOIDmode)
24643 valmode = QImode;
24644 if (valmode != QImode)
24645 val = gen_lowpart (QImode, val);
24646 if (mode == QImode)
24647 return val;
24648 if (!TARGET_PARTIAL_REG_STALL)
24649 nops--;
24650 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24651 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24652 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24653 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24654 {
24655 rtx reg = convert_modes (mode, QImode, val, true);
24656 tmp = promote_duplicated_reg (mode, const1_rtx);
24657 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24658 OPTAB_DIRECT);
24659 }
24660 else
24661 {
24662 rtx reg = convert_modes (mode, QImode, val, true);
24663
24664 if (!TARGET_PARTIAL_REG_STALL)
24665 if (mode == SImode)
24666 emit_insn (gen_movsi_insv_1 (reg, reg));
24667 else
24668 emit_insn (gen_movdi_insv_1 (reg, reg));
24669 else
24670 {
24671 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24672 NULL, 1, OPTAB_DIRECT);
24673 reg =
24674 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24675 }
24676 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24677 NULL, 1, OPTAB_DIRECT);
24678 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24679 if (mode == SImode)
24680 return reg;
24681 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24682 NULL, 1, OPTAB_DIRECT);
24683 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24684 return reg;
24685 }
24686 }
24687
24688 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24689 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24690 alignment from ALIGN to DESIRED_ALIGN. */
24691 static rtx
24692 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24693 int align)
24694 {
24695 rtx promoted_val;
24696
24697 if (TARGET_64BIT
24698 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24699 promoted_val = promote_duplicated_reg (DImode, val);
24700 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24701 promoted_val = promote_duplicated_reg (SImode, val);
24702 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24703 promoted_val = promote_duplicated_reg (HImode, val);
24704 else
24705 promoted_val = val;
24706
24707 return promoted_val;
24708 }
24709
24710 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24711 operations when profitable. The code depends upon architecture, block size
24712 and alignment, but always has one of the following overall structures:
24713
24714 Aligned move sequence:
24715
24716 1) Prologue guard: Conditional that jumps up to epilogues for small
24717 blocks that can be handled by epilogue alone. This is faster
24718 but also needed for correctness, since prologue assume the block
24719 is larger than the desired alignment.
24720
24721 Optional dynamic check for size and libcall for large
24722 blocks is emitted here too, with -minline-stringops-dynamically.
24723
24724 2) Prologue: copy first few bytes in order to get destination
24725 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24726 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24727 copied. We emit either a jump tree on power of two sized
24728 blocks, or a byte loop.
24729
24730 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24731 with specified algorithm.
24732
24733 4) Epilogue: code copying tail of the block that is too small to be
24734 handled by main body (or up to size guarded by prologue guard).
24735
24736 Misaligned move sequence
24737
24738 1) missaligned move prologue/epilogue containing:
24739 a) Prologue handling small memory blocks and jumping to done_label
24740 (skipped if blocks are known to be large enough)
24741 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24742 needed by single possibly misaligned move
24743 (skipped if alignment is not needed)
24744 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24745
24746 2) Zero size guard dispatching to done_label, if needed
24747
24748 3) dispatch to library call, if needed,
24749
24750 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24751 with specified algorithm. */
24752 bool
24753 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24754 rtx align_exp, rtx expected_align_exp,
24755 rtx expected_size_exp, rtx min_size_exp,
24756 rtx max_size_exp, rtx probable_max_size_exp,
24757 bool issetmem)
24758 {
24759 rtx destreg;
24760 rtx srcreg = NULL;
24761 rtx_code_label *label = NULL;
24762 rtx tmp;
24763 rtx_code_label *jump_around_label = NULL;
24764 HOST_WIDE_INT align = 1;
24765 unsigned HOST_WIDE_INT count = 0;
24766 HOST_WIDE_INT expected_size = -1;
24767 int size_needed = 0, epilogue_size_needed;
24768 int desired_align = 0, align_bytes = 0;
24769 enum stringop_alg alg;
24770 rtx promoted_val = NULL;
24771 rtx vec_promoted_val = NULL;
24772 bool force_loopy_epilogue = false;
24773 int dynamic_check;
24774 bool need_zero_guard = false;
24775 bool noalign;
24776 machine_mode move_mode = VOIDmode;
24777 int unroll_factor = 1;
24778 /* TODO: Once value ranges are available, fill in proper data. */
24779 unsigned HOST_WIDE_INT min_size = 0;
24780 unsigned HOST_WIDE_INT max_size = -1;
24781 unsigned HOST_WIDE_INT probable_max_size = -1;
24782 bool misaligned_prologue_used = false;
24783
24784 if (CONST_INT_P (align_exp))
24785 align = INTVAL (align_exp);
24786 /* i386 can do misaligned access on reasonably increased cost. */
24787 if (CONST_INT_P (expected_align_exp)
24788 && INTVAL (expected_align_exp) > align)
24789 align = INTVAL (expected_align_exp);
24790 /* ALIGN is the minimum of destination and source alignment, but we care here
24791 just about destination alignment. */
24792 else if (!issetmem
24793 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24794 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24795
24796 if (CONST_INT_P (count_exp))
24797 {
24798 min_size = max_size = probable_max_size = count = expected_size
24799 = INTVAL (count_exp);
24800 /* When COUNT is 0, there is nothing to do. */
24801 if (!count)
24802 return true;
24803 }
24804 else
24805 {
24806 if (min_size_exp)
24807 min_size = INTVAL (min_size_exp);
24808 if (max_size_exp)
24809 max_size = INTVAL (max_size_exp);
24810 if (probable_max_size_exp)
24811 probable_max_size = INTVAL (probable_max_size_exp);
24812 if (CONST_INT_P (expected_size_exp))
24813 expected_size = INTVAL (expected_size_exp);
24814 }
24815
24816 /* Make sure we don't need to care about overflow later on. */
24817 if (count > (HOST_WIDE_INT_1U << 30))
24818 return false;
24819
24820 /* Step 0: Decide on preferred algorithm, desired alignment and
24821 size of chunks to be copied by main loop. */
24822 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24823 issetmem,
24824 issetmem && val_exp == const0_rtx,
24825 &dynamic_check, &noalign);
24826 if (alg == libcall)
24827 return false;
24828 gcc_assert (alg != no_stringop);
24829
24830 /* For now vector-version of memset is generated only for memory zeroing, as
24831 creating of promoted vector value is very cheap in this case. */
24832 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24833 alg = unrolled_loop;
24834
24835 if (!count)
24836 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24837 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24838 if (!issetmem)
24839 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24840
24841 unroll_factor = 1;
24842 move_mode = word_mode;
24843 switch (alg)
24844 {
24845 case libcall:
24846 case no_stringop:
24847 case last_alg:
24848 gcc_unreachable ();
24849 case loop_1_byte:
24850 need_zero_guard = true;
24851 move_mode = QImode;
24852 break;
24853 case loop:
24854 need_zero_guard = true;
24855 break;
24856 case unrolled_loop:
24857 need_zero_guard = true;
24858 unroll_factor = (TARGET_64BIT ? 4 : 2);
24859 break;
24860 case vector_loop:
24861 need_zero_guard = true;
24862 unroll_factor = 4;
24863 /* Find the widest supported mode. */
24864 move_mode = word_mode;
24865 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24866 != CODE_FOR_nothing)
24867 move_mode = GET_MODE_WIDER_MODE (move_mode);
24868
24869 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24870 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24871 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24872 {
24873 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24874 move_mode = mode_for_vector (word_mode, nunits);
24875 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24876 move_mode = word_mode;
24877 }
24878 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24879 break;
24880 case rep_prefix_8_byte:
24881 move_mode = DImode;
24882 break;
24883 case rep_prefix_4_byte:
24884 move_mode = SImode;
24885 break;
24886 case rep_prefix_1_byte:
24887 move_mode = QImode;
24888 break;
24889 }
24890 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24891 epilogue_size_needed = size_needed;
24892
24893 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24894 if (!TARGET_ALIGN_STRINGOPS || noalign)
24895 align = desired_align;
24896
24897 /* Step 1: Prologue guard. */
24898
24899 /* Alignment code needs count to be in register. */
24900 if (CONST_INT_P (count_exp) && desired_align > align)
24901 {
24902 if (INTVAL (count_exp) > desired_align
24903 && INTVAL (count_exp) > size_needed)
24904 {
24905 align_bytes
24906 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24907 if (align_bytes <= 0)
24908 align_bytes = 0;
24909 else
24910 align_bytes = desired_align - align_bytes;
24911 }
24912 if (align_bytes == 0)
24913 count_exp = force_reg (counter_mode (count_exp), count_exp);
24914 }
24915 gcc_assert (desired_align >= 1 && align >= 1);
24916
24917 /* Misaligned move sequences handle both prologue and epilogue at once.
24918 Default code generation results in a smaller code for large alignments
24919 and also avoids redundant job when sizes are known precisely. */
24920 misaligned_prologue_used
24921 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24922 && MAX (desired_align, epilogue_size_needed) <= 32
24923 && desired_align <= epilogue_size_needed
24924 && ((desired_align > align && !align_bytes)
24925 || (!count && epilogue_size_needed > 1)));
24926
24927 /* Do the cheap promotion to allow better CSE across the
24928 main loop and epilogue (ie one load of the big constant in the
24929 front of all code.
24930 For now the misaligned move sequences do not have fast path
24931 without broadcasting. */
24932 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24933 {
24934 if (alg == vector_loop)
24935 {
24936 gcc_assert (val_exp == const0_rtx);
24937 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24938 promoted_val = promote_duplicated_reg_to_size (val_exp,
24939 GET_MODE_SIZE (word_mode),
24940 desired_align, align);
24941 }
24942 else
24943 {
24944 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24945 desired_align, align);
24946 }
24947 }
24948 /* Misaligned move sequences handles both prologues and epilogues at once.
24949 Default code generation results in smaller code for large alignments and
24950 also avoids redundant job when sizes are known precisely. */
24951 if (misaligned_prologue_used)
24952 {
24953 /* Misaligned move prologue handled small blocks by itself. */
24954 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24955 (dst, src, &destreg, &srcreg,
24956 move_mode, promoted_val, vec_promoted_val,
24957 &count_exp,
24958 &jump_around_label,
24959 desired_align < align
24960 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24961 desired_align, align, &min_size, dynamic_check, issetmem);
24962 if (!issetmem)
24963 src = change_address (src, BLKmode, srcreg);
24964 dst = change_address (dst, BLKmode, destreg);
24965 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24966 epilogue_size_needed = 0;
24967 if (need_zero_guard && !min_size)
24968 {
24969 /* It is possible that we copied enough so the main loop will not
24970 execute. */
24971 gcc_assert (size_needed > 1);
24972 if (jump_around_label == NULL_RTX)
24973 jump_around_label = gen_label_rtx ();
24974 emit_cmp_and_jump_insns (count_exp,
24975 GEN_INT (size_needed),
24976 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24977 if (expected_size == -1
24978 || expected_size < (desired_align - align) / 2 + size_needed)
24979 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24980 else
24981 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24982 }
24983 }
24984 /* Ensure that alignment prologue won't copy past end of block. */
24985 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24986 {
24987 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24988 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24989 Make sure it is power of 2. */
24990 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24991
24992 /* To improve performance of small blocks, we jump around the VAL
24993 promoting mode. This mean that if the promoted VAL is not constant,
24994 we might not use it in the epilogue and have to use byte
24995 loop variant. */
24996 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24997 force_loopy_epilogue = true;
24998 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24999 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25000 {
25001 /* If main algorithm works on QImode, no epilogue is needed.
25002 For small sizes just don't align anything. */
25003 if (size_needed == 1)
25004 desired_align = align;
25005 else
25006 goto epilogue;
25007 }
25008 else if (!count
25009 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25010 {
25011 label = gen_label_rtx ();
25012 emit_cmp_and_jump_insns (count_exp,
25013 GEN_INT (epilogue_size_needed),
25014 LTU, 0, counter_mode (count_exp), 1, label);
25015 if (expected_size == -1 || expected_size < epilogue_size_needed)
25016 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25017 else
25018 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25019 }
25020 }
25021
25022 /* Emit code to decide on runtime whether library call or inline should be
25023 used. */
25024 if (dynamic_check != -1)
25025 {
25026 if (!issetmem && CONST_INT_P (count_exp))
25027 {
25028 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25029 {
25030 emit_block_move_via_libcall (dst, src, count_exp, false);
25031 count_exp = const0_rtx;
25032 goto epilogue;
25033 }
25034 }
25035 else
25036 {
25037 rtx_code_label *hot_label = gen_label_rtx ();
25038 if (jump_around_label == NULL_RTX)
25039 jump_around_label = gen_label_rtx ();
25040 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25041 LEU, 0, counter_mode (count_exp),
25042 1, hot_label);
25043 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25044 if (issetmem)
25045 set_storage_via_libcall (dst, count_exp, val_exp, false);
25046 else
25047 emit_block_move_via_libcall (dst, src, count_exp, false);
25048 emit_jump (jump_around_label);
25049 emit_label (hot_label);
25050 }
25051 }
25052
25053 /* Step 2: Alignment prologue. */
25054 /* Do the expensive promotion once we branched off the small blocks. */
25055 if (issetmem && !promoted_val)
25056 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25057 desired_align, align);
25058
25059 if (desired_align > align && !misaligned_prologue_used)
25060 {
25061 if (align_bytes == 0)
25062 {
25063 /* Except for the first move in prologue, we no longer know
25064 constant offset in aliasing info. It don't seems to worth
25065 the pain to maintain it for the first move, so throw away
25066 the info early. */
25067 dst = change_address (dst, BLKmode, destreg);
25068 if (!issetmem)
25069 src = change_address (src, BLKmode, srcreg);
25070 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25071 promoted_val, vec_promoted_val,
25072 count_exp, align, desired_align,
25073 issetmem);
25074 /* At most desired_align - align bytes are copied. */
25075 if (min_size < (unsigned)(desired_align - align))
25076 min_size = 0;
25077 else
25078 min_size -= desired_align - align;
25079 }
25080 else
25081 {
25082 /* If we know how many bytes need to be stored before dst is
25083 sufficiently aligned, maintain aliasing info accurately. */
25084 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25085 srcreg,
25086 promoted_val,
25087 vec_promoted_val,
25088 desired_align,
25089 align_bytes,
25090 issetmem);
25091
25092 count_exp = plus_constant (counter_mode (count_exp),
25093 count_exp, -align_bytes);
25094 count -= align_bytes;
25095 min_size -= align_bytes;
25096 max_size -= align_bytes;
25097 }
25098 if (need_zero_guard
25099 && !min_size
25100 && (count < (unsigned HOST_WIDE_INT) size_needed
25101 || (align_bytes == 0
25102 && count < ((unsigned HOST_WIDE_INT) size_needed
25103 + desired_align - align))))
25104 {
25105 /* It is possible that we copied enough so the main loop will not
25106 execute. */
25107 gcc_assert (size_needed > 1);
25108 if (label == NULL_RTX)
25109 label = gen_label_rtx ();
25110 emit_cmp_and_jump_insns (count_exp,
25111 GEN_INT (size_needed),
25112 LTU, 0, counter_mode (count_exp), 1, label);
25113 if (expected_size == -1
25114 || expected_size < (desired_align - align) / 2 + size_needed)
25115 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25116 else
25117 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25118 }
25119 }
25120 if (label && size_needed == 1)
25121 {
25122 emit_label (label);
25123 LABEL_NUSES (label) = 1;
25124 label = NULL;
25125 epilogue_size_needed = 1;
25126 if (issetmem)
25127 promoted_val = val_exp;
25128 }
25129 else if (label == NULL_RTX && !misaligned_prologue_used)
25130 epilogue_size_needed = size_needed;
25131
25132 /* Step 3: Main loop. */
25133
25134 switch (alg)
25135 {
25136 case libcall:
25137 case no_stringop:
25138 case last_alg:
25139 gcc_unreachable ();
25140 case loop_1_byte:
25141 case loop:
25142 case unrolled_loop:
25143 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25144 count_exp, move_mode, unroll_factor,
25145 expected_size, issetmem);
25146 break;
25147 case vector_loop:
25148 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25149 vec_promoted_val, count_exp, move_mode,
25150 unroll_factor, expected_size, issetmem);
25151 break;
25152 case rep_prefix_8_byte:
25153 case rep_prefix_4_byte:
25154 case rep_prefix_1_byte:
25155 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25156 val_exp, count_exp, move_mode, issetmem);
25157 break;
25158 }
25159 /* Adjust properly the offset of src and dest memory for aliasing. */
25160 if (CONST_INT_P (count_exp))
25161 {
25162 if (!issetmem)
25163 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25164 (count / size_needed) * size_needed);
25165 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25166 (count / size_needed) * size_needed);
25167 }
25168 else
25169 {
25170 if (!issetmem)
25171 src = change_address (src, BLKmode, srcreg);
25172 dst = change_address (dst, BLKmode, destreg);
25173 }
25174
25175 /* Step 4: Epilogue to copy the remaining bytes. */
25176 epilogue:
25177 if (label)
25178 {
25179 /* When the main loop is done, COUNT_EXP might hold original count,
25180 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25181 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25182 bytes. Compensate if needed. */
25183
25184 if (size_needed < epilogue_size_needed)
25185 {
25186 tmp =
25187 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25188 GEN_INT (size_needed - 1), count_exp, 1,
25189 OPTAB_DIRECT);
25190 if (tmp != count_exp)
25191 emit_move_insn (count_exp, tmp);
25192 }
25193 emit_label (label);
25194 LABEL_NUSES (label) = 1;
25195 }
25196
25197 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25198 {
25199 if (force_loopy_epilogue)
25200 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25201 epilogue_size_needed);
25202 else
25203 {
25204 if (issetmem)
25205 expand_setmem_epilogue (dst, destreg, promoted_val,
25206 vec_promoted_val, count_exp,
25207 epilogue_size_needed);
25208 else
25209 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25210 epilogue_size_needed);
25211 }
25212 }
25213 if (jump_around_label)
25214 emit_label (jump_around_label);
25215 return true;
25216 }
25217
25218
25219 /* Expand the appropriate insns for doing strlen if not just doing
25220 repnz; scasb
25221
25222 out = result, initialized with the start address
25223 align_rtx = alignment of the address.
25224 scratch = scratch register, initialized with the startaddress when
25225 not aligned, otherwise undefined
25226
25227 This is just the body. It needs the initializations mentioned above and
25228 some address computing at the end. These things are done in i386.md. */
25229
25230 static void
25231 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25232 {
25233 int align;
25234 rtx tmp;
25235 rtx_code_label *align_2_label = NULL;
25236 rtx_code_label *align_3_label = NULL;
25237 rtx_code_label *align_4_label = gen_label_rtx ();
25238 rtx_code_label *end_0_label = gen_label_rtx ();
25239 rtx mem;
25240 rtx tmpreg = gen_reg_rtx (SImode);
25241 rtx scratch = gen_reg_rtx (SImode);
25242 rtx cmp;
25243
25244 align = 0;
25245 if (CONST_INT_P (align_rtx))
25246 align = INTVAL (align_rtx);
25247
25248 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25249
25250 /* Is there a known alignment and is it less than 4? */
25251 if (align < 4)
25252 {
25253 rtx scratch1 = gen_reg_rtx (Pmode);
25254 emit_move_insn (scratch1, out);
25255 /* Is there a known alignment and is it not 2? */
25256 if (align != 2)
25257 {
25258 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25259 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25260
25261 /* Leave just the 3 lower bits. */
25262 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25263 NULL_RTX, 0, OPTAB_WIDEN);
25264
25265 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25266 Pmode, 1, align_4_label);
25267 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25268 Pmode, 1, align_2_label);
25269 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25270 Pmode, 1, align_3_label);
25271 }
25272 else
25273 {
25274 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25275 check if is aligned to 4 - byte. */
25276
25277 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25278 NULL_RTX, 0, OPTAB_WIDEN);
25279
25280 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25281 Pmode, 1, align_4_label);
25282 }
25283
25284 mem = change_address (src, QImode, out);
25285
25286 /* Now compare the bytes. */
25287
25288 /* Compare the first n unaligned byte on a byte per byte basis. */
25289 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25290 QImode, 1, end_0_label);
25291
25292 /* Increment the address. */
25293 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25294
25295 /* Not needed with an alignment of 2 */
25296 if (align != 2)
25297 {
25298 emit_label (align_2_label);
25299
25300 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25301 end_0_label);
25302
25303 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25304
25305 emit_label (align_3_label);
25306 }
25307
25308 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25309 end_0_label);
25310
25311 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25312 }
25313
25314 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25315 align this loop. It gives only huge programs, but does not help to
25316 speed up. */
25317 emit_label (align_4_label);
25318
25319 mem = change_address (src, SImode, out);
25320 emit_move_insn (scratch, mem);
25321 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25322
25323 /* This formula yields a nonzero result iff one of the bytes is zero.
25324 This saves three branches inside loop and many cycles. */
25325
25326 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25327 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25328 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25329 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25330 gen_int_mode (0x80808080, SImode)));
25331 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25332 align_4_label);
25333
25334 if (TARGET_CMOVE)
25335 {
25336 rtx reg = gen_reg_rtx (SImode);
25337 rtx reg2 = gen_reg_rtx (Pmode);
25338 emit_move_insn (reg, tmpreg);
25339 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25340
25341 /* If zero is not in the first two bytes, move two bytes forward. */
25342 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25343 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25344 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25345 emit_insn (gen_rtx_SET (tmpreg,
25346 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25347 reg,
25348 tmpreg)));
25349 /* Emit lea manually to avoid clobbering of flags. */
25350 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25351
25352 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25353 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25354 emit_insn (gen_rtx_SET (out,
25355 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25356 reg2,
25357 out)));
25358 }
25359 else
25360 {
25361 rtx_code_label *end_2_label = gen_label_rtx ();
25362 /* Is zero in the first two bytes? */
25363
25364 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25365 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25366 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25367 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25368 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25369 pc_rtx);
25370 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25371 JUMP_LABEL (tmp) = end_2_label;
25372
25373 /* Not in the first two. Move two bytes forward. */
25374 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25375 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25376
25377 emit_label (end_2_label);
25378
25379 }
25380
25381 /* Avoid branch in fixing the byte. */
25382 tmpreg = gen_lowpart (QImode, tmpreg);
25383 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25384 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25385 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25386 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25387
25388 emit_label (end_0_label);
25389 }
25390
25391 /* Expand strlen. */
25392
25393 bool
25394 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25395 {
25396 rtx addr, scratch1, scratch2, scratch3, scratch4;
25397
25398 /* The generic case of strlen expander is long. Avoid it's
25399 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25400
25401 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25402 && !TARGET_INLINE_ALL_STRINGOPS
25403 && !optimize_insn_for_size_p ()
25404 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25405 return false;
25406
25407 addr = force_reg (Pmode, XEXP (src, 0));
25408 scratch1 = gen_reg_rtx (Pmode);
25409
25410 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25411 && !optimize_insn_for_size_p ())
25412 {
25413 /* Well it seems that some optimizer does not combine a call like
25414 foo(strlen(bar), strlen(bar));
25415 when the move and the subtraction is done here. It does calculate
25416 the length just once when these instructions are done inside of
25417 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25418 often used and I use one fewer register for the lifetime of
25419 output_strlen_unroll() this is better. */
25420
25421 emit_move_insn (out, addr);
25422
25423 ix86_expand_strlensi_unroll_1 (out, src, align);
25424
25425 /* strlensi_unroll_1 returns the address of the zero at the end of
25426 the string, like memchr(), so compute the length by subtracting
25427 the start address. */
25428 emit_insn (ix86_gen_sub3 (out, out, addr));
25429 }
25430 else
25431 {
25432 rtx unspec;
25433
25434 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25435 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25436 return false;
25437
25438 scratch2 = gen_reg_rtx (Pmode);
25439 scratch3 = gen_reg_rtx (Pmode);
25440 scratch4 = force_reg (Pmode, constm1_rtx);
25441
25442 emit_move_insn (scratch3, addr);
25443 eoschar = force_reg (QImode, eoschar);
25444
25445 src = replace_equiv_address_nv (src, scratch3);
25446
25447 /* If .md starts supporting :P, this can be done in .md. */
25448 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25449 scratch4), UNSPEC_SCAS);
25450 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25451 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25452 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25453 }
25454 return true;
25455 }
25456
25457 /* For given symbol (function) construct code to compute address of it's PLT
25458 entry in large x86-64 PIC model. */
25459 static rtx
25460 construct_plt_address (rtx symbol)
25461 {
25462 rtx tmp, unspec;
25463
25464 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25465 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25466 gcc_assert (Pmode == DImode);
25467
25468 tmp = gen_reg_rtx (Pmode);
25469 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25470
25471 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25472 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25473 return tmp;
25474 }
25475
25476 rtx
25477 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25478 rtx callarg2,
25479 rtx pop, bool sibcall)
25480 {
25481 rtx vec[3];
25482 rtx use = NULL, call;
25483 unsigned int vec_len = 0;
25484
25485 if (pop == const0_rtx)
25486 pop = NULL;
25487 gcc_assert (!TARGET_64BIT || !pop);
25488
25489 if (TARGET_MACHO && !TARGET_64BIT)
25490 {
25491 #if TARGET_MACHO
25492 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25493 fnaddr = machopic_indirect_call_target (fnaddr);
25494 #endif
25495 }
25496 else
25497 {
25498 /* Static functions and indirect calls don't need the pic register. */
25499 if (flag_pic
25500 && (!TARGET_64BIT
25501 || (ix86_cmodel == CM_LARGE_PIC
25502 && DEFAULT_ABI != MS_ABI))
25503 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25504 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25505 {
25506 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25507 if (ix86_use_pseudo_pic_reg ())
25508 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25509 pic_offset_table_rtx);
25510 }
25511 }
25512
25513 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25514 parameters passed in vector registers. */
25515 if (TARGET_64BIT
25516 && (INTVAL (callarg2) > 0
25517 || (INTVAL (callarg2) == 0
25518 && (TARGET_SSE || !flag_skip_rax_setup))))
25519 {
25520 rtx al = gen_rtx_REG (QImode, AX_REG);
25521 emit_move_insn (al, callarg2);
25522 use_reg (&use, al);
25523 }
25524
25525 if (ix86_cmodel == CM_LARGE_PIC
25526 && !TARGET_PECOFF
25527 && MEM_P (fnaddr)
25528 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25529 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25530 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25531 else if (sibcall
25532 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25533 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25534 {
25535 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25536 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25537 }
25538
25539 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25540
25541 if (retval)
25542 {
25543 /* We should add bounds as destination register in case
25544 pointer with bounds may be returned. */
25545 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25546 {
25547 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25548 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25549 if (GET_CODE (retval) == PARALLEL)
25550 {
25551 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25552 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25553 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25554 retval = chkp_join_splitted_slot (retval, par);
25555 }
25556 else
25557 {
25558 retval = gen_rtx_PARALLEL (VOIDmode,
25559 gen_rtvec (3, retval, b0, b1));
25560 chkp_put_regs_to_expr_list (retval);
25561 }
25562 }
25563
25564 call = gen_rtx_SET (retval, call);
25565 }
25566 vec[vec_len++] = call;
25567
25568 if (pop)
25569 {
25570 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25571 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25572 vec[vec_len++] = pop;
25573 }
25574
25575 if (TARGET_64BIT_MS_ABI
25576 && (!callarg2 || INTVAL (callarg2) != -2))
25577 {
25578 int const cregs_size
25579 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25580 int i;
25581
25582 for (i = 0; i < cregs_size; i++)
25583 {
25584 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25585 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25586
25587 clobber_reg (&use, gen_rtx_REG (mode, regno));
25588 }
25589 }
25590
25591 if (vec_len > 1)
25592 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25593 call = emit_call_insn (call);
25594 if (use)
25595 CALL_INSN_FUNCTION_USAGE (call) = use;
25596
25597 return call;
25598 }
25599
25600 /* Output the assembly for a call instruction. */
25601
25602 const char *
25603 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25604 {
25605 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25606 bool seh_nop_p = false;
25607 const char *xasm;
25608
25609 if (SIBLING_CALL_P (insn))
25610 {
25611 if (direct_p)
25612 xasm = "%!jmp\t%P0";
25613 /* SEH epilogue detection requires the indirect branch case
25614 to include REX.W. */
25615 else if (TARGET_SEH)
25616 xasm = "%!rex.W jmp %A0";
25617 else
25618 xasm = "%!jmp\t%A0";
25619
25620 output_asm_insn (xasm, &call_op);
25621 return "";
25622 }
25623
25624 /* SEH unwinding can require an extra nop to be emitted in several
25625 circumstances. Determine if we have one of those. */
25626 if (TARGET_SEH)
25627 {
25628 rtx_insn *i;
25629
25630 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25631 {
25632 /* If we get to another real insn, we don't need the nop. */
25633 if (INSN_P (i))
25634 break;
25635
25636 /* If we get to the epilogue note, prevent a catch region from
25637 being adjacent to the standard epilogue sequence. If non-
25638 call-exceptions, we'll have done this during epilogue emission. */
25639 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25640 && !flag_non_call_exceptions
25641 && !can_throw_internal (insn))
25642 {
25643 seh_nop_p = true;
25644 break;
25645 }
25646 }
25647
25648 /* If we didn't find a real insn following the call, prevent the
25649 unwinder from looking into the next function. */
25650 if (i == NULL)
25651 seh_nop_p = true;
25652 }
25653
25654 if (direct_p)
25655 xasm = "%!call\t%P0";
25656 else
25657 xasm = "%!call\t%A0";
25658
25659 output_asm_insn (xasm, &call_op);
25660
25661 if (seh_nop_p)
25662 return "nop";
25663
25664 return "";
25665 }
25666 \f
25667 /* Clear stack slot assignments remembered from previous functions.
25668 This is called from INIT_EXPANDERS once before RTL is emitted for each
25669 function. */
25670
25671 static struct machine_function *
25672 ix86_init_machine_status (void)
25673 {
25674 struct machine_function *f;
25675
25676 f = ggc_cleared_alloc<machine_function> ();
25677 f->use_fast_prologue_epilogue_nregs = -1;
25678 f->call_abi = ix86_abi;
25679
25680 return f;
25681 }
25682
25683 /* Return a MEM corresponding to a stack slot with mode MODE.
25684 Allocate a new slot if necessary.
25685
25686 The RTL for a function can have several slots available: N is
25687 which slot to use. */
25688
25689 rtx
25690 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25691 {
25692 struct stack_local_entry *s;
25693
25694 gcc_assert (n < MAX_386_STACK_LOCALS);
25695
25696 for (s = ix86_stack_locals; s; s = s->next)
25697 if (s->mode == mode && s->n == n)
25698 return validize_mem (copy_rtx (s->rtl));
25699
25700 s = ggc_alloc<stack_local_entry> ();
25701 s->n = n;
25702 s->mode = mode;
25703 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25704
25705 s->next = ix86_stack_locals;
25706 ix86_stack_locals = s;
25707 return validize_mem (copy_rtx (s->rtl));
25708 }
25709
25710 static void
25711 ix86_instantiate_decls (void)
25712 {
25713 struct stack_local_entry *s;
25714
25715 for (s = ix86_stack_locals; s; s = s->next)
25716 if (s->rtl != NULL_RTX)
25717 instantiate_decl_rtl (s->rtl);
25718 }
25719 \f
25720 /* Check whether x86 address PARTS is a pc-relative address. */
25721
25722 static bool
25723 rip_relative_addr_p (struct ix86_address *parts)
25724 {
25725 rtx base, index, disp;
25726
25727 base = parts->base;
25728 index = parts->index;
25729 disp = parts->disp;
25730
25731 if (disp && !base && !index)
25732 {
25733 if (TARGET_64BIT)
25734 {
25735 rtx symbol = disp;
25736
25737 if (GET_CODE (disp) == CONST)
25738 symbol = XEXP (disp, 0);
25739 if (GET_CODE (symbol) == PLUS
25740 && CONST_INT_P (XEXP (symbol, 1)))
25741 symbol = XEXP (symbol, 0);
25742
25743 if (GET_CODE (symbol) == LABEL_REF
25744 || (GET_CODE (symbol) == SYMBOL_REF
25745 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25746 || (GET_CODE (symbol) == UNSPEC
25747 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25748 || XINT (symbol, 1) == UNSPEC_PCREL
25749 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25750 return true;
25751 }
25752 }
25753 return false;
25754 }
25755
25756 /* Calculate the length of the memory address in the instruction encoding.
25757 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25758 or other prefixes. We never generate addr32 prefix for LEA insn. */
25759
25760 int
25761 memory_address_length (rtx addr, bool lea)
25762 {
25763 struct ix86_address parts;
25764 rtx base, index, disp;
25765 int len;
25766 int ok;
25767
25768 if (GET_CODE (addr) == PRE_DEC
25769 || GET_CODE (addr) == POST_INC
25770 || GET_CODE (addr) == PRE_MODIFY
25771 || GET_CODE (addr) == POST_MODIFY)
25772 return 0;
25773
25774 ok = ix86_decompose_address (addr, &parts);
25775 gcc_assert (ok);
25776
25777 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25778
25779 /* If this is not LEA instruction, add the length of addr32 prefix. */
25780 if (TARGET_64BIT && !lea
25781 && (SImode_address_operand (addr, VOIDmode)
25782 || (parts.base && GET_MODE (parts.base) == SImode)
25783 || (parts.index && GET_MODE (parts.index) == SImode)))
25784 len++;
25785
25786 base = parts.base;
25787 index = parts.index;
25788 disp = parts.disp;
25789
25790 if (base && GET_CODE (base) == SUBREG)
25791 base = SUBREG_REG (base);
25792 if (index && GET_CODE (index) == SUBREG)
25793 index = SUBREG_REG (index);
25794
25795 gcc_assert (base == NULL_RTX || REG_P (base));
25796 gcc_assert (index == NULL_RTX || REG_P (index));
25797
25798 /* Rule of thumb:
25799 - esp as the base always wants an index,
25800 - ebp as the base always wants a displacement,
25801 - r12 as the base always wants an index,
25802 - r13 as the base always wants a displacement. */
25803
25804 /* Register Indirect. */
25805 if (base && !index && !disp)
25806 {
25807 /* esp (for its index) and ebp (for its displacement) need
25808 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25809 code. */
25810 if (base == arg_pointer_rtx
25811 || base == frame_pointer_rtx
25812 || REGNO (base) == SP_REG
25813 || REGNO (base) == BP_REG
25814 || REGNO (base) == R12_REG
25815 || REGNO (base) == R13_REG)
25816 len++;
25817 }
25818
25819 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25820 is not disp32, but disp32(%rip), so for disp32
25821 SIB byte is needed, unless print_operand_address
25822 optimizes it into disp32(%rip) or (%rip) is implied
25823 by UNSPEC. */
25824 else if (disp && !base && !index)
25825 {
25826 len += 4;
25827 if (rip_relative_addr_p (&parts))
25828 len++;
25829 }
25830 else
25831 {
25832 /* Find the length of the displacement constant. */
25833 if (disp)
25834 {
25835 if (base && satisfies_constraint_K (disp))
25836 len += 1;
25837 else
25838 len += 4;
25839 }
25840 /* ebp always wants a displacement. Similarly r13. */
25841 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25842 len++;
25843
25844 /* An index requires the two-byte modrm form.... */
25845 if (index
25846 /* ...like esp (or r12), which always wants an index. */
25847 || base == arg_pointer_rtx
25848 || base == frame_pointer_rtx
25849 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25850 len++;
25851 }
25852
25853 return len;
25854 }
25855
25856 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25857 is set, expect that insn have 8bit immediate alternative. */
25858 int
25859 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25860 {
25861 int len = 0;
25862 int i;
25863 extract_insn_cached (insn);
25864 for (i = recog_data.n_operands - 1; i >= 0; --i)
25865 if (CONSTANT_P (recog_data.operand[i]))
25866 {
25867 enum attr_mode mode = get_attr_mode (insn);
25868
25869 gcc_assert (!len);
25870 if (shortform && CONST_INT_P (recog_data.operand[i]))
25871 {
25872 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25873 switch (mode)
25874 {
25875 case MODE_QI:
25876 len = 1;
25877 continue;
25878 case MODE_HI:
25879 ival = trunc_int_for_mode (ival, HImode);
25880 break;
25881 case MODE_SI:
25882 ival = trunc_int_for_mode (ival, SImode);
25883 break;
25884 default:
25885 break;
25886 }
25887 if (IN_RANGE (ival, -128, 127))
25888 {
25889 len = 1;
25890 continue;
25891 }
25892 }
25893 switch (mode)
25894 {
25895 case MODE_QI:
25896 len = 1;
25897 break;
25898 case MODE_HI:
25899 len = 2;
25900 break;
25901 case MODE_SI:
25902 len = 4;
25903 break;
25904 /* Immediates for DImode instructions are encoded
25905 as 32bit sign extended values. */
25906 case MODE_DI:
25907 len = 4;
25908 break;
25909 default:
25910 fatal_insn ("unknown insn mode", insn);
25911 }
25912 }
25913 return len;
25914 }
25915
25916 /* Compute default value for "length_address" attribute. */
25917 int
25918 ix86_attr_length_address_default (rtx_insn *insn)
25919 {
25920 int i;
25921
25922 if (get_attr_type (insn) == TYPE_LEA)
25923 {
25924 rtx set = PATTERN (insn), addr;
25925
25926 if (GET_CODE (set) == PARALLEL)
25927 set = XVECEXP (set, 0, 0);
25928
25929 gcc_assert (GET_CODE (set) == SET);
25930
25931 addr = SET_SRC (set);
25932
25933 return memory_address_length (addr, true);
25934 }
25935
25936 extract_insn_cached (insn);
25937 for (i = recog_data.n_operands - 1; i >= 0; --i)
25938 if (MEM_P (recog_data.operand[i]))
25939 {
25940 constrain_operands_cached (insn, reload_completed);
25941 if (which_alternative != -1)
25942 {
25943 const char *constraints = recog_data.constraints[i];
25944 int alt = which_alternative;
25945
25946 while (*constraints == '=' || *constraints == '+')
25947 constraints++;
25948 while (alt-- > 0)
25949 while (*constraints++ != ',')
25950 ;
25951 /* Skip ignored operands. */
25952 if (*constraints == 'X')
25953 continue;
25954 }
25955 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25956 }
25957 return 0;
25958 }
25959
25960 /* Compute default value for "length_vex" attribute. It includes
25961 2 or 3 byte VEX prefix and 1 opcode byte. */
25962
25963 int
25964 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25965 bool has_vex_w)
25966 {
25967 int i;
25968
25969 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25970 byte VEX prefix. */
25971 if (!has_0f_opcode || has_vex_w)
25972 return 3 + 1;
25973
25974 /* We can always use 2 byte VEX prefix in 32bit. */
25975 if (!TARGET_64BIT)
25976 return 2 + 1;
25977
25978 extract_insn_cached (insn);
25979
25980 for (i = recog_data.n_operands - 1; i >= 0; --i)
25981 if (REG_P (recog_data.operand[i]))
25982 {
25983 /* REX.W bit uses 3 byte VEX prefix. */
25984 if (GET_MODE (recog_data.operand[i]) == DImode
25985 && GENERAL_REG_P (recog_data.operand[i]))
25986 return 3 + 1;
25987 }
25988 else
25989 {
25990 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25991 if (MEM_P (recog_data.operand[i])
25992 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25993 return 3 + 1;
25994 }
25995
25996 return 2 + 1;
25997 }
25998 \f
25999 /* Return the maximum number of instructions a cpu can issue. */
26000
26001 static int
26002 ix86_issue_rate (void)
26003 {
26004 switch (ix86_tune)
26005 {
26006 case PROCESSOR_PENTIUM:
26007 case PROCESSOR_BONNELL:
26008 case PROCESSOR_SILVERMONT:
26009 case PROCESSOR_KNL:
26010 case PROCESSOR_INTEL:
26011 case PROCESSOR_K6:
26012 case PROCESSOR_BTVER2:
26013 case PROCESSOR_PENTIUM4:
26014 case PROCESSOR_NOCONA:
26015 return 2;
26016
26017 case PROCESSOR_PENTIUMPRO:
26018 case PROCESSOR_ATHLON:
26019 case PROCESSOR_K8:
26020 case PROCESSOR_AMDFAM10:
26021 case PROCESSOR_GENERIC:
26022 case PROCESSOR_BTVER1:
26023 return 3;
26024
26025 case PROCESSOR_BDVER1:
26026 case PROCESSOR_BDVER2:
26027 case PROCESSOR_BDVER3:
26028 case PROCESSOR_BDVER4:
26029 case PROCESSOR_CORE2:
26030 case PROCESSOR_NEHALEM:
26031 case PROCESSOR_SANDYBRIDGE:
26032 case PROCESSOR_HASWELL:
26033 return 4;
26034
26035 default:
26036 return 1;
26037 }
26038 }
26039
26040 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26041 by DEP_INSN and nothing set by DEP_INSN. */
26042
26043 static bool
26044 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26045 {
26046 rtx set, set2;
26047
26048 /* Simplify the test for uninteresting insns. */
26049 if (insn_type != TYPE_SETCC
26050 && insn_type != TYPE_ICMOV
26051 && insn_type != TYPE_FCMOV
26052 && insn_type != TYPE_IBR)
26053 return false;
26054
26055 if ((set = single_set (dep_insn)) != 0)
26056 {
26057 set = SET_DEST (set);
26058 set2 = NULL_RTX;
26059 }
26060 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26061 && XVECLEN (PATTERN (dep_insn), 0) == 2
26062 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26063 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26064 {
26065 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26066 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26067 }
26068 else
26069 return false;
26070
26071 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26072 return false;
26073
26074 /* This test is true if the dependent insn reads the flags but
26075 not any other potentially set register. */
26076 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26077 return false;
26078
26079 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26080 return false;
26081
26082 return true;
26083 }
26084
26085 /* Return true iff USE_INSN has a memory address with operands set by
26086 SET_INSN. */
26087
26088 bool
26089 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26090 {
26091 int i;
26092 extract_insn_cached (use_insn);
26093 for (i = recog_data.n_operands - 1; i >= 0; --i)
26094 if (MEM_P (recog_data.operand[i]))
26095 {
26096 rtx addr = XEXP (recog_data.operand[i], 0);
26097 return modified_in_p (addr, set_insn) != 0;
26098 }
26099 return false;
26100 }
26101
26102 /* Helper function for exact_store_load_dependency.
26103 Return true if addr is found in insn. */
26104 static bool
26105 exact_dependency_1 (rtx addr, rtx insn)
26106 {
26107 enum rtx_code code;
26108 const char *format_ptr;
26109 int i, j;
26110
26111 code = GET_CODE (insn);
26112 switch (code)
26113 {
26114 case MEM:
26115 if (rtx_equal_p (addr, insn))
26116 return true;
26117 break;
26118 case REG:
26119 CASE_CONST_ANY:
26120 case SYMBOL_REF:
26121 case CODE_LABEL:
26122 case PC:
26123 case CC0:
26124 case EXPR_LIST:
26125 return false;
26126 default:
26127 break;
26128 }
26129
26130 format_ptr = GET_RTX_FORMAT (code);
26131 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26132 {
26133 switch (*format_ptr++)
26134 {
26135 case 'e':
26136 if (exact_dependency_1 (addr, XEXP (insn, i)))
26137 return true;
26138 break;
26139 case 'E':
26140 for (j = 0; j < XVECLEN (insn, i); j++)
26141 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26142 return true;
26143 break;
26144 }
26145 }
26146 return false;
26147 }
26148
26149 /* Return true if there exists exact dependency for store & load, i.e.
26150 the same memory address is used in them. */
26151 static bool
26152 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26153 {
26154 rtx set1, set2;
26155
26156 set1 = single_set (store);
26157 if (!set1)
26158 return false;
26159 if (!MEM_P (SET_DEST (set1)))
26160 return false;
26161 set2 = single_set (load);
26162 if (!set2)
26163 return false;
26164 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26165 return true;
26166 return false;
26167 }
26168
26169 static int
26170 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26171 {
26172 enum attr_type insn_type, dep_insn_type;
26173 enum attr_memory memory;
26174 rtx set, set2;
26175 int dep_insn_code_number;
26176
26177 /* Anti and output dependencies have zero cost on all CPUs. */
26178 if (REG_NOTE_KIND (link) != 0)
26179 return 0;
26180
26181 dep_insn_code_number = recog_memoized (dep_insn);
26182
26183 /* If we can't recognize the insns, we can't really do anything. */
26184 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26185 return cost;
26186
26187 insn_type = get_attr_type (insn);
26188 dep_insn_type = get_attr_type (dep_insn);
26189
26190 switch (ix86_tune)
26191 {
26192 case PROCESSOR_PENTIUM:
26193 /* Address Generation Interlock adds a cycle of latency. */
26194 if (insn_type == TYPE_LEA)
26195 {
26196 rtx addr = PATTERN (insn);
26197
26198 if (GET_CODE (addr) == PARALLEL)
26199 addr = XVECEXP (addr, 0, 0);
26200
26201 gcc_assert (GET_CODE (addr) == SET);
26202
26203 addr = SET_SRC (addr);
26204 if (modified_in_p (addr, dep_insn))
26205 cost += 1;
26206 }
26207 else if (ix86_agi_dependent (dep_insn, insn))
26208 cost += 1;
26209
26210 /* ??? Compares pair with jump/setcc. */
26211 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26212 cost = 0;
26213
26214 /* Floating point stores require value to be ready one cycle earlier. */
26215 if (insn_type == TYPE_FMOV
26216 && get_attr_memory (insn) == MEMORY_STORE
26217 && !ix86_agi_dependent (dep_insn, insn))
26218 cost += 1;
26219 break;
26220
26221 case PROCESSOR_PENTIUMPRO:
26222 /* INT->FP conversion is expensive. */
26223 if (get_attr_fp_int_src (dep_insn))
26224 cost += 5;
26225
26226 /* There is one cycle extra latency between an FP op and a store. */
26227 if (insn_type == TYPE_FMOV
26228 && (set = single_set (dep_insn)) != NULL_RTX
26229 && (set2 = single_set (insn)) != NULL_RTX
26230 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26231 && MEM_P (SET_DEST (set2)))
26232 cost += 1;
26233
26234 memory = get_attr_memory (insn);
26235
26236 /* Show ability of reorder buffer to hide latency of load by executing
26237 in parallel with previous instruction in case
26238 previous instruction is not needed to compute the address. */
26239 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26240 && !ix86_agi_dependent (dep_insn, insn))
26241 {
26242 /* Claim moves to take one cycle, as core can issue one load
26243 at time and the next load can start cycle later. */
26244 if (dep_insn_type == TYPE_IMOV
26245 || dep_insn_type == TYPE_FMOV)
26246 cost = 1;
26247 else if (cost > 1)
26248 cost--;
26249 }
26250 break;
26251
26252 case PROCESSOR_K6:
26253 /* The esp dependency is resolved before
26254 the instruction is really finished. */
26255 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26256 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26257 return 1;
26258
26259 /* INT->FP conversion is expensive. */
26260 if (get_attr_fp_int_src (dep_insn))
26261 cost += 5;
26262
26263 memory = get_attr_memory (insn);
26264
26265 /* Show ability of reorder buffer to hide latency of load by executing
26266 in parallel with previous instruction in case
26267 previous instruction is not needed to compute the address. */
26268 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26269 && !ix86_agi_dependent (dep_insn, insn))
26270 {
26271 /* Claim moves to take one cycle, as core can issue one load
26272 at time and the next load can start cycle later. */
26273 if (dep_insn_type == TYPE_IMOV
26274 || dep_insn_type == TYPE_FMOV)
26275 cost = 1;
26276 else if (cost > 2)
26277 cost -= 2;
26278 else
26279 cost = 1;
26280 }
26281 break;
26282
26283 case PROCESSOR_AMDFAM10:
26284 case PROCESSOR_BDVER1:
26285 case PROCESSOR_BDVER2:
26286 case PROCESSOR_BDVER3:
26287 case PROCESSOR_BDVER4:
26288 case PROCESSOR_BTVER1:
26289 case PROCESSOR_BTVER2:
26290 case PROCESSOR_GENERIC:
26291 /* Stack engine allows to execute push&pop instructions in parall. */
26292 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26293 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26294 return 0;
26295 /* FALLTHRU */
26296
26297 case PROCESSOR_ATHLON:
26298 case PROCESSOR_K8:
26299 memory = get_attr_memory (insn);
26300
26301 /* Show ability of reorder buffer to hide latency of load by executing
26302 in parallel with previous instruction in case
26303 previous instruction is not needed to compute the address. */
26304 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26305 && !ix86_agi_dependent (dep_insn, insn))
26306 {
26307 enum attr_unit unit = get_attr_unit (insn);
26308 int loadcost = 3;
26309
26310 /* Because of the difference between the length of integer and
26311 floating unit pipeline preparation stages, the memory operands
26312 for floating point are cheaper.
26313
26314 ??? For Athlon it the difference is most probably 2. */
26315 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26316 loadcost = 3;
26317 else
26318 loadcost = TARGET_ATHLON ? 2 : 0;
26319
26320 if (cost >= loadcost)
26321 cost -= loadcost;
26322 else
26323 cost = 0;
26324 }
26325 break;
26326
26327 case PROCESSOR_CORE2:
26328 case PROCESSOR_NEHALEM:
26329 case PROCESSOR_SANDYBRIDGE:
26330 case PROCESSOR_HASWELL:
26331 /* Stack engine allows to execute push&pop instructions in parall. */
26332 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26333 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26334 return 0;
26335
26336 memory = get_attr_memory (insn);
26337
26338 /* Show ability of reorder buffer to hide latency of load by executing
26339 in parallel with previous instruction in case
26340 previous instruction is not needed to compute the address. */
26341 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26342 && !ix86_agi_dependent (dep_insn, insn))
26343 {
26344 if (cost >= 4)
26345 cost -= 4;
26346 else
26347 cost = 0;
26348 }
26349 break;
26350
26351 case PROCESSOR_SILVERMONT:
26352 case PROCESSOR_KNL:
26353 case PROCESSOR_INTEL:
26354 if (!reload_completed)
26355 return cost;
26356
26357 /* Increase cost of integer loads. */
26358 memory = get_attr_memory (dep_insn);
26359 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26360 {
26361 enum attr_unit unit = get_attr_unit (dep_insn);
26362 if (unit == UNIT_INTEGER && cost == 1)
26363 {
26364 if (memory == MEMORY_LOAD)
26365 cost = 3;
26366 else
26367 {
26368 /* Increase cost of ld/st for short int types only
26369 because of store forwarding issue. */
26370 rtx set = single_set (dep_insn);
26371 if (set && (GET_MODE (SET_DEST (set)) == QImode
26372 || GET_MODE (SET_DEST (set)) == HImode))
26373 {
26374 /* Increase cost of store/load insn if exact
26375 dependence exists and it is load insn. */
26376 enum attr_memory insn_memory = get_attr_memory (insn);
26377 if (insn_memory == MEMORY_LOAD
26378 && exact_store_load_dependency (dep_insn, insn))
26379 cost = 3;
26380 }
26381 }
26382 }
26383 }
26384
26385 default:
26386 break;
26387 }
26388
26389 return cost;
26390 }
26391
26392 /* How many alternative schedules to try. This should be as wide as the
26393 scheduling freedom in the DFA, but no wider. Making this value too
26394 large results extra work for the scheduler. */
26395
26396 static int
26397 ia32_multipass_dfa_lookahead (void)
26398 {
26399 switch (ix86_tune)
26400 {
26401 case PROCESSOR_PENTIUM:
26402 return 2;
26403
26404 case PROCESSOR_PENTIUMPRO:
26405 case PROCESSOR_K6:
26406 return 1;
26407
26408 case PROCESSOR_BDVER1:
26409 case PROCESSOR_BDVER2:
26410 case PROCESSOR_BDVER3:
26411 case PROCESSOR_BDVER4:
26412 /* We use lookahead value 4 for BD both before and after reload
26413 schedules. Plan is to have value 8 included for O3. */
26414 return 4;
26415
26416 case PROCESSOR_CORE2:
26417 case PROCESSOR_NEHALEM:
26418 case PROCESSOR_SANDYBRIDGE:
26419 case PROCESSOR_HASWELL:
26420 case PROCESSOR_BONNELL:
26421 case PROCESSOR_SILVERMONT:
26422 case PROCESSOR_KNL:
26423 case PROCESSOR_INTEL:
26424 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26425 as many instructions can be executed on a cycle, i.e.,
26426 issue_rate. I wonder why tuning for many CPUs does not do this. */
26427 if (reload_completed)
26428 return ix86_issue_rate ();
26429 /* Don't use lookahead for pre-reload schedule to save compile time. */
26430 return 0;
26431
26432 default:
26433 return 0;
26434 }
26435 }
26436
26437 /* Return true if target platform supports macro-fusion. */
26438
26439 static bool
26440 ix86_macro_fusion_p ()
26441 {
26442 return TARGET_FUSE_CMP_AND_BRANCH;
26443 }
26444
26445 /* Check whether current microarchitecture support macro fusion
26446 for insn pair "CONDGEN + CONDJMP". Refer to
26447 "Intel Architectures Optimization Reference Manual". */
26448
26449 static bool
26450 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26451 {
26452 rtx src, dest;
26453 enum rtx_code ccode;
26454 rtx compare_set = NULL_RTX, test_if, cond;
26455 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26456
26457 if (!any_condjump_p (condjmp))
26458 return false;
26459
26460 if (get_attr_type (condgen) != TYPE_TEST
26461 && get_attr_type (condgen) != TYPE_ICMP
26462 && get_attr_type (condgen) != TYPE_INCDEC
26463 && get_attr_type (condgen) != TYPE_ALU)
26464 return false;
26465
26466 compare_set = single_set (condgen);
26467 if (compare_set == NULL_RTX
26468 && !TARGET_FUSE_ALU_AND_BRANCH)
26469 return false;
26470
26471 if (compare_set == NULL_RTX)
26472 {
26473 int i;
26474 rtx pat = PATTERN (condgen);
26475 for (i = 0; i < XVECLEN (pat, 0); i++)
26476 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26477 {
26478 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26479 if (GET_CODE (set_src) == COMPARE)
26480 compare_set = XVECEXP (pat, 0, i);
26481 else
26482 alu_set = XVECEXP (pat, 0, i);
26483 }
26484 }
26485 if (compare_set == NULL_RTX)
26486 return false;
26487 src = SET_SRC (compare_set);
26488 if (GET_CODE (src) != COMPARE)
26489 return false;
26490
26491 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26492 supported. */
26493 if ((MEM_P (XEXP (src, 0))
26494 && CONST_INT_P (XEXP (src, 1)))
26495 || (MEM_P (XEXP (src, 1))
26496 && CONST_INT_P (XEXP (src, 0))))
26497 return false;
26498
26499 /* No fusion for RIP-relative address. */
26500 if (MEM_P (XEXP (src, 0)))
26501 addr = XEXP (XEXP (src, 0), 0);
26502 else if (MEM_P (XEXP (src, 1)))
26503 addr = XEXP (XEXP (src, 1), 0);
26504
26505 if (addr) {
26506 ix86_address parts;
26507 int ok = ix86_decompose_address (addr, &parts);
26508 gcc_assert (ok);
26509
26510 if (rip_relative_addr_p (&parts))
26511 return false;
26512 }
26513
26514 test_if = SET_SRC (pc_set (condjmp));
26515 cond = XEXP (test_if, 0);
26516 ccode = GET_CODE (cond);
26517 /* Check whether conditional jump use Sign or Overflow Flags. */
26518 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26519 && (ccode == GE
26520 || ccode == GT
26521 || ccode == LE
26522 || ccode == LT))
26523 return false;
26524
26525 /* Return true for TYPE_TEST and TYPE_ICMP. */
26526 if (get_attr_type (condgen) == TYPE_TEST
26527 || get_attr_type (condgen) == TYPE_ICMP)
26528 return true;
26529
26530 /* The following is the case that macro-fusion for alu + jmp. */
26531 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26532 return false;
26533
26534 /* No fusion for alu op with memory destination operand. */
26535 dest = SET_DEST (alu_set);
26536 if (MEM_P (dest))
26537 return false;
26538
26539 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26540 supported. */
26541 if (get_attr_type (condgen) == TYPE_INCDEC
26542 && (ccode == GEU
26543 || ccode == GTU
26544 || ccode == LEU
26545 || ccode == LTU))
26546 return false;
26547
26548 return true;
26549 }
26550
26551 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26552 execution. It is applied if
26553 (1) IMUL instruction is on the top of list;
26554 (2) There exists the only producer of independent IMUL instruction in
26555 ready list.
26556 Return index of IMUL producer if it was found and -1 otherwise. */
26557 static int
26558 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26559 {
26560 rtx_insn *insn;
26561 rtx set, insn1, insn2;
26562 sd_iterator_def sd_it;
26563 dep_t dep;
26564 int index = -1;
26565 int i;
26566
26567 if (!TARGET_BONNELL)
26568 return index;
26569
26570 /* Check that IMUL instruction is on the top of ready list. */
26571 insn = ready[n_ready - 1];
26572 set = single_set (insn);
26573 if (!set)
26574 return index;
26575 if (!(GET_CODE (SET_SRC (set)) == MULT
26576 && GET_MODE (SET_SRC (set)) == SImode))
26577 return index;
26578
26579 /* Search for producer of independent IMUL instruction. */
26580 for (i = n_ready - 2; i >= 0; i--)
26581 {
26582 insn = ready[i];
26583 if (!NONDEBUG_INSN_P (insn))
26584 continue;
26585 /* Skip IMUL instruction. */
26586 insn2 = PATTERN (insn);
26587 if (GET_CODE (insn2) == PARALLEL)
26588 insn2 = XVECEXP (insn2, 0, 0);
26589 if (GET_CODE (insn2) == SET
26590 && GET_CODE (SET_SRC (insn2)) == MULT
26591 && GET_MODE (SET_SRC (insn2)) == SImode)
26592 continue;
26593
26594 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26595 {
26596 rtx con;
26597 con = DEP_CON (dep);
26598 if (!NONDEBUG_INSN_P (con))
26599 continue;
26600 insn1 = PATTERN (con);
26601 if (GET_CODE (insn1) == PARALLEL)
26602 insn1 = XVECEXP (insn1, 0, 0);
26603
26604 if (GET_CODE (insn1) == SET
26605 && GET_CODE (SET_SRC (insn1)) == MULT
26606 && GET_MODE (SET_SRC (insn1)) == SImode)
26607 {
26608 sd_iterator_def sd_it1;
26609 dep_t dep1;
26610 /* Check if there is no other dependee for IMUL. */
26611 index = i;
26612 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26613 {
26614 rtx pro;
26615 pro = DEP_PRO (dep1);
26616 if (!NONDEBUG_INSN_P (pro))
26617 continue;
26618 if (pro != insn)
26619 index = -1;
26620 }
26621 if (index >= 0)
26622 break;
26623 }
26624 }
26625 if (index >= 0)
26626 break;
26627 }
26628 return index;
26629 }
26630
26631 /* Try to find the best candidate on the top of ready list if two insns
26632 have the same priority - candidate is best if its dependees were
26633 scheduled earlier. Applied for Silvermont only.
26634 Return true if top 2 insns must be interchanged. */
26635 static bool
26636 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26637 {
26638 rtx_insn *top = ready[n_ready - 1];
26639 rtx_insn *next = ready[n_ready - 2];
26640 rtx set;
26641 sd_iterator_def sd_it;
26642 dep_t dep;
26643 int clock1 = -1;
26644 int clock2 = -1;
26645 #define INSN_TICK(INSN) (HID (INSN)->tick)
26646
26647 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26648 return false;
26649
26650 if (!NONDEBUG_INSN_P (top))
26651 return false;
26652 if (!NONJUMP_INSN_P (top))
26653 return false;
26654 if (!NONDEBUG_INSN_P (next))
26655 return false;
26656 if (!NONJUMP_INSN_P (next))
26657 return false;
26658 set = single_set (top);
26659 if (!set)
26660 return false;
26661 set = single_set (next);
26662 if (!set)
26663 return false;
26664
26665 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26666 {
26667 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26668 return false;
26669 /* Determine winner more precise. */
26670 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26671 {
26672 rtx pro;
26673 pro = DEP_PRO (dep);
26674 if (!NONDEBUG_INSN_P (pro))
26675 continue;
26676 if (INSN_TICK (pro) > clock1)
26677 clock1 = INSN_TICK (pro);
26678 }
26679 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26680 {
26681 rtx pro;
26682 pro = DEP_PRO (dep);
26683 if (!NONDEBUG_INSN_P (pro))
26684 continue;
26685 if (INSN_TICK (pro) > clock2)
26686 clock2 = INSN_TICK (pro);
26687 }
26688
26689 if (clock1 == clock2)
26690 {
26691 /* Determine winner - load must win. */
26692 enum attr_memory memory1, memory2;
26693 memory1 = get_attr_memory (top);
26694 memory2 = get_attr_memory (next);
26695 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26696 return true;
26697 }
26698 return (bool) (clock2 < clock1);
26699 }
26700 return false;
26701 #undef INSN_TICK
26702 }
26703
26704 /* Perform possible reodering of ready list for Atom/Silvermont only.
26705 Return issue rate. */
26706 static int
26707 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26708 int *pn_ready, int clock_var)
26709 {
26710 int issue_rate = -1;
26711 int n_ready = *pn_ready;
26712 int i;
26713 rtx_insn *insn;
26714 int index = -1;
26715
26716 /* Set up issue rate. */
26717 issue_rate = ix86_issue_rate ();
26718
26719 /* Do reodering for BONNELL/SILVERMONT only. */
26720 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26721 return issue_rate;
26722
26723 /* Nothing to do if ready list contains only 1 instruction. */
26724 if (n_ready <= 1)
26725 return issue_rate;
26726
26727 /* Do reodering for post-reload scheduler only. */
26728 if (!reload_completed)
26729 return issue_rate;
26730
26731 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26732 {
26733 if (sched_verbose > 1)
26734 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26735 INSN_UID (ready[index]));
26736
26737 /* Put IMUL producer (ready[index]) at the top of ready list. */
26738 insn = ready[index];
26739 for (i = index; i < n_ready - 1; i++)
26740 ready[i] = ready[i + 1];
26741 ready[n_ready - 1] = insn;
26742 return issue_rate;
26743 }
26744
26745 /* Skip selective scheduling since HID is not populated in it. */
26746 if (clock_var != 0
26747 && !sel_sched_p ()
26748 && swap_top_of_ready_list (ready, n_ready))
26749 {
26750 if (sched_verbose > 1)
26751 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26752 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26753 /* Swap 2 top elements of ready list. */
26754 insn = ready[n_ready - 1];
26755 ready[n_ready - 1] = ready[n_ready - 2];
26756 ready[n_ready - 2] = insn;
26757 }
26758 return issue_rate;
26759 }
26760
26761 static bool
26762 ix86_class_likely_spilled_p (reg_class_t);
26763
26764 /* Returns true if lhs of insn is HW function argument register and set up
26765 is_spilled to true if it is likely spilled HW register. */
26766 static bool
26767 insn_is_function_arg (rtx insn, bool* is_spilled)
26768 {
26769 rtx dst;
26770
26771 if (!NONDEBUG_INSN_P (insn))
26772 return false;
26773 /* Call instructions are not movable, ignore it. */
26774 if (CALL_P (insn))
26775 return false;
26776 insn = PATTERN (insn);
26777 if (GET_CODE (insn) == PARALLEL)
26778 insn = XVECEXP (insn, 0, 0);
26779 if (GET_CODE (insn) != SET)
26780 return false;
26781 dst = SET_DEST (insn);
26782 if (REG_P (dst) && HARD_REGISTER_P (dst)
26783 && ix86_function_arg_regno_p (REGNO (dst)))
26784 {
26785 /* Is it likely spilled HW register? */
26786 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26787 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26788 *is_spilled = true;
26789 return true;
26790 }
26791 return false;
26792 }
26793
26794 /* Add output dependencies for chain of function adjacent arguments if only
26795 there is a move to likely spilled HW register. Return first argument
26796 if at least one dependence was added or NULL otherwise. */
26797 static rtx_insn *
26798 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26799 {
26800 rtx_insn *insn;
26801 rtx_insn *last = call;
26802 rtx_insn *first_arg = NULL;
26803 bool is_spilled = false;
26804
26805 head = PREV_INSN (head);
26806
26807 /* Find nearest to call argument passing instruction. */
26808 while (true)
26809 {
26810 last = PREV_INSN (last);
26811 if (last == head)
26812 return NULL;
26813 if (!NONDEBUG_INSN_P (last))
26814 continue;
26815 if (insn_is_function_arg (last, &is_spilled))
26816 break;
26817 return NULL;
26818 }
26819
26820 first_arg = last;
26821 while (true)
26822 {
26823 insn = PREV_INSN (last);
26824 if (!INSN_P (insn))
26825 break;
26826 if (insn == head)
26827 break;
26828 if (!NONDEBUG_INSN_P (insn))
26829 {
26830 last = insn;
26831 continue;
26832 }
26833 if (insn_is_function_arg (insn, &is_spilled))
26834 {
26835 /* Add output depdendence between two function arguments if chain
26836 of output arguments contains likely spilled HW registers. */
26837 if (is_spilled)
26838 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26839 first_arg = last = insn;
26840 }
26841 else
26842 break;
26843 }
26844 if (!is_spilled)
26845 return NULL;
26846 return first_arg;
26847 }
26848
26849 /* Add output or anti dependency from insn to first_arg to restrict its code
26850 motion. */
26851 static void
26852 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26853 {
26854 rtx set;
26855 rtx tmp;
26856
26857 /* Add anti dependencies for bounds stores. */
26858 if (INSN_P (insn)
26859 && GET_CODE (PATTERN (insn)) == PARALLEL
26860 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26861 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26862 {
26863 add_dependence (first_arg, insn, REG_DEP_ANTI);
26864 return;
26865 }
26866
26867 set = single_set (insn);
26868 if (!set)
26869 return;
26870 tmp = SET_DEST (set);
26871 if (REG_P (tmp))
26872 {
26873 /* Add output dependency to the first function argument. */
26874 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26875 return;
26876 }
26877 /* Add anti dependency. */
26878 add_dependence (first_arg, insn, REG_DEP_ANTI);
26879 }
26880
26881 /* Avoid cross block motion of function argument through adding dependency
26882 from the first non-jump instruction in bb. */
26883 static void
26884 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26885 {
26886 rtx_insn *insn = BB_END (bb);
26887
26888 while (insn)
26889 {
26890 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26891 {
26892 rtx set = single_set (insn);
26893 if (set)
26894 {
26895 avoid_func_arg_motion (arg, insn);
26896 return;
26897 }
26898 }
26899 if (insn == BB_HEAD (bb))
26900 return;
26901 insn = PREV_INSN (insn);
26902 }
26903 }
26904
26905 /* Hook for pre-reload schedule - avoid motion of function arguments
26906 passed in likely spilled HW registers. */
26907 static void
26908 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26909 {
26910 rtx_insn *insn;
26911 rtx_insn *first_arg = NULL;
26912 if (reload_completed)
26913 return;
26914 while (head != tail && DEBUG_INSN_P (head))
26915 head = NEXT_INSN (head);
26916 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26917 if (INSN_P (insn) && CALL_P (insn))
26918 {
26919 first_arg = add_parameter_dependencies (insn, head);
26920 if (first_arg)
26921 {
26922 /* Add dependee for first argument to predecessors if only
26923 region contains more than one block. */
26924 basic_block bb = BLOCK_FOR_INSN (insn);
26925 int rgn = CONTAINING_RGN (bb->index);
26926 int nr_blks = RGN_NR_BLOCKS (rgn);
26927 /* Skip trivial regions and region head blocks that can have
26928 predecessors outside of region. */
26929 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26930 {
26931 edge e;
26932 edge_iterator ei;
26933
26934 /* Regions are SCCs with the exception of selective
26935 scheduling with pipelining of outer blocks enabled.
26936 So also check that immediate predecessors of a non-head
26937 block are in the same region. */
26938 FOR_EACH_EDGE (e, ei, bb->preds)
26939 {
26940 /* Avoid creating of loop-carried dependencies through
26941 using topological ordering in the region. */
26942 if (rgn == CONTAINING_RGN (e->src->index)
26943 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26944 add_dependee_for_func_arg (first_arg, e->src);
26945 }
26946 }
26947 insn = first_arg;
26948 if (insn == head)
26949 break;
26950 }
26951 }
26952 else if (first_arg)
26953 avoid_func_arg_motion (first_arg, insn);
26954 }
26955
26956 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26957 HW registers to maximum, to schedule them at soon as possible. These are
26958 moves from function argument registers at the top of the function entry
26959 and moves from function return value registers after call. */
26960 static int
26961 ix86_adjust_priority (rtx_insn *insn, int priority)
26962 {
26963 rtx set;
26964
26965 if (reload_completed)
26966 return priority;
26967
26968 if (!NONDEBUG_INSN_P (insn))
26969 return priority;
26970
26971 set = single_set (insn);
26972 if (set)
26973 {
26974 rtx tmp = SET_SRC (set);
26975 if (REG_P (tmp)
26976 && HARD_REGISTER_P (tmp)
26977 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26978 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26979 return current_sched_info->sched_max_insns_priority;
26980 }
26981
26982 return priority;
26983 }
26984
26985 /* Model decoder of Core 2/i7.
26986 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26987 track the instruction fetch block boundaries and make sure that long
26988 (9+ bytes) instructions are assigned to D0. */
26989
26990 /* Maximum length of an insn that can be handled by
26991 a secondary decoder unit. '8' for Core 2/i7. */
26992 static int core2i7_secondary_decoder_max_insn_size;
26993
26994 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26995 '16' for Core 2/i7. */
26996 static int core2i7_ifetch_block_size;
26997
26998 /* Maximum number of instructions decoder can handle per cycle.
26999 '6' for Core 2/i7. */
27000 static int core2i7_ifetch_block_max_insns;
27001
27002 typedef struct ix86_first_cycle_multipass_data_ *
27003 ix86_first_cycle_multipass_data_t;
27004 typedef const struct ix86_first_cycle_multipass_data_ *
27005 const_ix86_first_cycle_multipass_data_t;
27006
27007 /* A variable to store target state across calls to max_issue within
27008 one cycle. */
27009 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27010 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27011
27012 /* Initialize DATA. */
27013 static void
27014 core2i7_first_cycle_multipass_init (void *_data)
27015 {
27016 ix86_first_cycle_multipass_data_t data
27017 = (ix86_first_cycle_multipass_data_t) _data;
27018
27019 data->ifetch_block_len = 0;
27020 data->ifetch_block_n_insns = 0;
27021 data->ready_try_change = NULL;
27022 data->ready_try_change_size = 0;
27023 }
27024
27025 /* Advancing the cycle; reset ifetch block counts. */
27026 static void
27027 core2i7_dfa_post_advance_cycle (void)
27028 {
27029 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27030
27031 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27032
27033 data->ifetch_block_len = 0;
27034 data->ifetch_block_n_insns = 0;
27035 }
27036
27037 static int min_insn_size (rtx_insn *);
27038
27039 /* Filter out insns from ready_try that the core will not be able to issue
27040 on current cycle due to decoder. */
27041 static void
27042 core2i7_first_cycle_multipass_filter_ready_try
27043 (const_ix86_first_cycle_multipass_data_t data,
27044 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27045 {
27046 while (n_ready--)
27047 {
27048 rtx_insn *insn;
27049 int insn_size;
27050
27051 if (ready_try[n_ready])
27052 continue;
27053
27054 insn = get_ready_element (n_ready);
27055 insn_size = min_insn_size (insn);
27056
27057 if (/* If this is a too long an insn for a secondary decoder ... */
27058 (!first_cycle_insn_p
27059 && insn_size > core2i7_secondary_decoder_max_insn_size)
27060 /* ... or it would not fit into the ifetch block ... */
27061 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27062 /* ... or the decoder is full already ... */
27063 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27064 /* ... mask the insn out. */
27065 {
27066 ready_try[n_ready] = 1;
27067
27068 if (data->ready_try_change)
27069 bitmap_set_bit (data->ready_try_change, n_ready);
27070 }
27071 }
27072 }
27073
27074 /* Prepare for a new round of multipass lookahead scheduling. */
27075 static void
27076 core2i7_first_cycle_multipass_begin (void *_data,
27077 signed char *ready_try, int n_ready,
27078 bool first_cycle_insn_p)
27079 {
27080 ix86_first_cycle_multipass_data_t data
27081 = (ix86_first_cycle_multipass_data_t) _data;
27082 const_ix86_first_cycle_multipass_data_t prev_data
27083 = ix86_first_cycle_multipass_data;
27084
27085 /* Restore the state from the end of the previous round. */
27086 data->ifetch_block_len = prev_data->ifetch_block_len;
27087 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27088
27089 /* Filter instructions that cannot be issued on current cycle due to
27090 decoder restrictions. */
27091 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27092 first_cycle_insn_p);
27093 }
27094
27095 /* INSN is being issued in current solution. Account for its impact on
27096 the decoder model. */
27097 static void
27098 core2i7_first_cycle_multipass_issue (void *_data,
27099 signed char *ready_try, int n_ready,
27100 rtx_insn *insn, const void *_prev_data)
27101 {
27102 ix86_first_cycle_multipass_data_t data
27103 = (ix86_first_cycle_multipass_data_t) _data;
27104 const_ix86_first_cycle_multipass_data_t prev_data
27105 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27106
27107 int insn_size = min_insn_size (insn);
27108
27109 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27110 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27111 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27112 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27113
27114 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27115 if (!data->ready_try_change)
27116 {
27117 data->ready_try_change = sbitmap_alloc (n_ready);
27118 data->ready_try_change_size = n_ready;
27119 }
27120 else if (data->ready_try_change_size < n_ready)
27121 {
27122 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27123 n_ready, 0);
27124 data->ready_try_change_size = n_ready;
27125 }
27126 bitmap_clear (data->ready_try_change);
27127
27128 /* Filter out insns from ready_try that the core will not be able to issue
27129 on current cycle due to decoder. */
27130 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27131 false);
27132 }
27133
27134 /* Revert the effect on ready_try. */
27135 static void
27136 core2i7_first_cycle_multipass_backtrack (const void *_data,
27137 signed char *ready_try,
27138 int n_ready ATTRIBUTE_UNUSED)
27139 {
27140 const_ix86_first_cycle_multipass_data_t data
27141 = (const_ix86_first_cycle_multipass_data_t) _data;
27142 unsigned int i = 0;
27143 sbitmap_iterator sbi;
27144
27145 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27146 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27147 {
27148 ready_try[i] = 0;
27149 }
27150 }
27151
27152 /* Save the result of multipass lookahead scheduling for the next round. */
27153 static void
27154 core2i7_first_cycle_multipass_end (const void *_data)
27155 {
27156 const_ix86_first_cycle_multipass_data_t data
27157 = (const_ix86_first_cycle_multipass_data_t) _data;
27158 ix86_first_cycle_multipass_data_t next_data
27159 = ix86_first_cycle_multipass_data;
27160
27161 if (data != NULL)
27162 {
27163 next_data->ifetch_block_len = data->ifetch_block_len;
27164 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27165 }
27166 }
27167
27168 /* Deallocate target data. */
27169 static void
27170 core2i7_first_cycle_multipass_fini (void *_data)
27171 {
27172 ix86_first_cycle_multipass_data_t data
27173 = (ix86_first_cycle_multipass_data_t) _data;
27174
27175 if (data->ready_try_change)
27176 {
27177 sbitmap_free (data->ready_try_change);
27178 data->ready_try_change = NULL;
27179 data->ready_try_change_size = 0;
27180 }
27181 }
27182
27183 /* Prepare for scheduling pass. */
27184 static void
27185 ix86_sched_init_global (FILE *, int, int)
27186 {
27187 /* Install scheduling hooks for current CPU. Some of these hooks are used
27188 in time-critical parts of the scheduler, so we only set them up when
27189 they are actually used. */
27190 switch (ix86_tune)
27191 {
27192 case PROCESSOR_CORE2:
27193 case PROCESSOR_NEHALEM:
27194 case PROCESSOR_SANDYBRIDGE:
27195 case PROCESSOR_HASWELL:
27196 /* Do not perform multipass scheduling for pre-reload schedule
27197 to save compile time. */
27198 if (reload_completed)
27199 {
27200 targetm.sched.dfa_post_advance_cycle
27201 = core2i7_dfa_post_advance_cycle;
27202 targetm.sched.first_cycle_multipass_init
27203 = core2i7_first_cycle_multipass_init;
27204 targetm.sched.first_cycle_multipass_begin
27205 = core2i7_first_cycle_multipass_begin;
27206 targetm.sched.first_cycle_multipass_issue
27207 = core2i7_first_cycle_multipass_issue;
27208 targetm.sched.first_cycle_multipass_backtrack
27209 = core2i7_first_cycle_multipass_backtrack;
27210 targetm.sched.first_cycle_multipass_end
27211 = core2i7_first_cycle_multipass_end;
27212 targetm.sched.first_cycle_multipass_fini
27213 = core2i7_first_cycle_multipass_fini;
27214
27215 /* Set decoder parameters. */
27216 core2i7_secondary_decoder_max_insn_size = 8;
27217 core2i7_ifetch_block_size = 16;
27218 core2i7_ifetch_block_max_insns = 6;
27219 break;
27220 }
27221 /* ... Fall through ... */
27222 default:
27223 targetm.sched.dfa_post_advance_cycle = NULL;
27224 targetm.sched.first_cycle_multipass_init = NULL;
27225 targetm.sched.first_cycle_multipass_begin = NULL;
27226 targetm.sched.first_cycle_multipass_issue = NULL;
27227 targetm.sched.first_cycle_multipass_backtrack = NULL;
27228 targetm.sched.first_cycle_multipass_end = NULL;
27229 targetm.sched.first_cycle_multipass_fini = NULL;
27230 break;
27231 }
27232 }
27233
27234 \f
27235 /* Compute the alignment given to a constant that is being placed in memory.
27236 EXP is the constant and ALIGN is the alignment that the object would
27237 ordinarily have.
27238 The value of this function is used instead of that alignment to align
27239 the object. */
27240
27241 int
27242 ix86_constant_alignment (tree exp, int align)
27243 {
27244 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27245 || TREE_CODE (exp) == INTEGER_CST)
27246 {
27247 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27248 return 64;
27249 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27250 return 128;
27251 }
27252 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27253 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27254 return BITS_PER_WORD;
27255
27256 return align;
27257 }
27258
27259 /* Compute the alignment for a static variable.
27260 TYPE is the data type, and ALIGN is the alignment that
27261 the object would ordinarily have. The value of this function is used
27262 instead of that alignment to align the object. */
27263
27264 int
27265 ix86_data_alignment (tree type, int align, bool opt)
27266 {
27267 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27268 for symbols from other compilation units or symbols that don't need
27269 to bind locally. In order to preserve some ABI compatibility with
27270 those compilers, ensure we don't decrease alignment from what we
27271 used to assume. */
27272
27273 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27274
27275 /* A data structure, equal or greater than the size of a cache line
27276 (64 bytes in the Pentium 4 and other recent Intel processors, including
27277 processors based on Intel Core microarchitecture) should be aligned
27278 so that its base address is a multiple of a cache line size. */
27279
27280 int max_align
27281 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27282
27283 if (max_align < BITS_PER_WORD)
27284 max_align = BITS_PER_WORD;
27285
27286 switch (ix86_align_data_type)
27287 {
27288 case ix86_align_data_type_abi: opt = false; break;
27289 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27290 case ix86_align_data_type_cacheline: break;
27291 }
27292
27293 if (opt
27294 && AGGREGATE_TYPE_P (type)
27295 && TYPE_SIZE (type)
27296 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27297 {
27298 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27299 && align < max_align_compat)
27300 align = max_align_compat;
27301 if (wi::geu_p (TYPE_SIZE (type), max_align)
27302 && align < max_align)
27303 align = max_align;
27304 }
27305
27306 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27307 to 16byte boundary. */
27308 if (TARGET_64BIT)
27309 {
27310 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27311 && TYPE_SIZE (type)
27312 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27313 && wi::geu_p (TYPE_SIZE (type), 128)
27314 && align < 128)
27315 return 128;
27316 }
27317
27318 if (!opt)
27319 return align;
27320
27321 if (TREE_CODE (type) == ARRAY_TYPE)
27322 {
27323 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27324 return 64;
27325 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27326 return 128;
27327 }
27328 else if (TREE_CODE (type) == COMPLEX_TYPE)
27329 {
27330
27331 if (TYPE_MODE (type) == DCmode && align < 64)
27332 return 64;
27333 if ((TYPE_MODE (type) == XCmode
27334 || TYPE_MODE (type) == TCmode) && align < 128)
27335 return 128;
27336 }
27337 else if ((TREE_CODE (type) == RECORD_TYPE
27338 || TREE_CODE (type) == UNION_TYPE
27339 || TREE_CODE (type) == QUAL_UNION_TYPE)
27340 && TYPE_FIELDS (type))
27341 {
27342 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27343 return 64;
27344 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27345 return 128;
27346 }
27347 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27348 || TREE_CODE (type) == INTEGER_TYPE)
27349 {
27350 if (TYPE_MODE (type) == DFmode && align < 64)
27351 return 64;
27352 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27353 return 128;
27354 }
27355
27356 return align;
27357 }
27358
27359 /* Compute the alignment for a local variable or a stack slot. EXP is
27360 the data type or decl itself, MODE is the widest mode available and
27361 ALIGN is the alignment that the object would ordinarily have. The
27362 value of this macro is used instead of that alignment to align the
27363 object. */
27364
27365 unsigned int
27366 ix86_local_alignment (tree exp, machine_mode mode,
27367 unsigned int align)
27368 {
27369 tree type, decl;
27370
27371 if (exp && DECL_P (exp))
27372 {
27373 type = TREE_TYPE (exp);
27374 decl = exp;
27375 }
27376 else
27377 {
27378 type = exp;
27379 decl = NULL;
27380 }
27381
27382 /* Don't do dynamic stack realignment for long long objects with
27383 -mpreferred-stack-boundary=2. */
27384 if (!TARGET_64BIT
27385 && align == 64
27386 && ix86_preferred_stack_boundary < 64
27387 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27388 && (!type || !TYPE_USER_ALIGN (type))
27389 && (!decl || !DECL_USER_ALIGN (decl)))
27390 align = 32;
27391
27392 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27393 register in MODE. We will return the largest alignment of XF
27394 and DF. */
27395 if (!type)
27396 {
27397 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27398 align = GET_MODE_ALIGNMENT (DFmode);
27399 return align;
27400 }
27401
27402 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27403 to 16byte boundary. Exact wording is:
27404
27405 An array uses the same alignment as its elements, except that a local or
27406 global array variable of length at least 16 bytes or
27407 a C99 variable-length array variable always has alignment of at least 16 bytes.
27408
27409 This was added to allow use of aligned SSE instructions at arrays. This
27410 rule is meant for static storage (where compiler can not do the analysis
27411 by itself). We follow it for automatic variables only when convenient.
27412 We fully control everything in the function compiled and functions from
27413 other unit can not rely on the alignment.
27414
27415 Exclude va_list type. It is the common case of local array where
27416 we can not benefit from the alignment.
27417
27418 TODO: Probably one should optimize for size only when var is not escaping. */
27419 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27420 && TARGET_SSE)
27421 {
27422 if (AGGREGATE_TYPE_P (type)
27423 && (va_list_type_node == NULL_TREE
27424 || (TYPE_MAIN_VARIANT (type)
27425 != TYPE_MAIN_VARIANT (va_list_type_node)))
27426 && TYPE_SIZE (type)
27427 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27428 && wi::geu_p (TYPE_SIZE (type), 16)
27429 && align < 128)
27430 return 128;
27431 }
27432 if (TREE_CODE (type) == ARRAY_TYPE)
27433 {
27434 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27435 return 64;
27436 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27437 return 128;
27438 }
27439 else if (TREE_CODE (type) == COMPLEX_TYPE)
27440 {
27441 if (TYPE_MODE (type) == DCmode && align < 64)
27442 return 64;
27443 if ((TYPE_MODE (type) == XCmode
27444 || TYPE_MODE (type) == TCmode) && align < 128)
27445 return 128;
27446 }
27447 else if ((TREE_CODE (type) == RECORD_TYPE
27448 || TREE_CODE (type) == UNION_TYPE
27449 || TREE_CODE (type) == QUAL_UNION_TYPE)
27450 && TYPE_FIELDS (type))
27451 {
27452 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27453 return 64;
27454 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27455 return 128;
27456 }
27457 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27458 || TREE_CODE (type) == INTEGER_TYPE)
27459 {
27460
27461 if (TYPE_MODE (type) == DFmode && align < 64)
27462 return 64;
27463 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27464 return 128;
27465 }
27466 return align;
27467 }
27468
27469 /* Compute the minimum required alignment for dynamic stack realignment
27470 purposes for a local variable, parameter or a stack slot. EXP is
27471 the data type or decl itself, MODE is its mode and ALIGN is the
27472 alignment that the object would ordinarily have. */
27473
27474 unsigned int
27475 ix86_minimum_alignment (tree exp, machine_mode mode,
27476 unsigned int align)
27477 {
27478 tree type, decl;
27479
27480 if (exp && DECL_P (exp))
27481 {
27482 type = TREE_TYPE (exp);
27483 decl = exp;
27484 }
27485 else
27486 {
27487 type = exp;
27488 decl = NULL;
27489 }
27490
27491 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27492 return align;
27493
27494 /* Don't do dynamic stack realignment for long long objects with
27495 -mpreferred-stack-boundary=2. */
27496 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27497 && (!type || !TYPE_USER_ALIGN (type))
27498 && (!decl || !DECL_USER_ALIGN (decl)))
27499 return 32;
27500
27501 return align;
27502 }
27503 \f
27504 /* Find a location for the static chain incoming to a nested function.
27505 This is a register, unless all free registers are used by arguments. */
27506
27507 static rtx
27508 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27509 {
27510 unsigned regno;
27511
27512 /* While this function won't be called by the middle-end when a static
27513 chain isn't needed, it's also used throughout the backend so it's
27514 easiest to keep this check centralized. */
27515 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27516 return NULL;
27517
27518 if (TARGET_64BIT)
27519 {
27520 /* We always use R10 in 64-bit mode. */
27521 regno = R10_REG;
27522 }
27523 else
27524 {
27525 const_tree fntype, fndecl;
27526 unsigned int ccvt;
27527
27528 /* By default in 32-bit mode we use ECX to pass the static chain. */
27529 regno = CX_REG;
27530
27531 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27532 {
27533 fntype = TREE_TYPE (fndecl_or_type);
27534 fndecl = fndecl_or_type;
27535 }
27536 else
27537 {
27538 fntype = fndecl_or_type;
27539 fndecl = NULL;
27540 }
27541
27542 ccvt = ix86_get_callcvt (fntype);
27543 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27544 {
27545 /* Fastcall functions use ecx/edx for arguments, which leaves
27546 us with EAX for the static chain.
27547 Thiscall functions use ecx for arguments, which also
27548 leaves us with EAX for the static chain. */
27549 regno = AX_REG;
27550 }
27551 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27552 {
27553 /* Thiscall functions use ecx for arguments, which leaves
27554 us with EAX and EDX for the static chain.
27555 We are using for abi-compatibility EAX. */
27556 regno = AX_REG;
27557 }
27558 else if (ix86_function_regparm (fntype, fndecl) == 3)
27559 {
27560 /* For regparm 3, we have no free call-clobbered registers in
27561 which to store the static chain. In order to implement this,
27562 we have the trampoline push the static chain to the stack.
27563 However, we can't push a value below the return address when
27564 we call the nested function directly, so we have to use an
27565 alternate entry point. For this we use ESI, and have the
27566 alternate entry point push ESI, so that things appear the
27567 same once we're executing the nested function. */
27568 if (incoming_p)
27569 {
27570 if (fndecl == current_function_decl)
27571 ix86_static_chain_on_stack = true;
27572 return gen_frame_mem (SImode,
27573 plus_constant (Pmode,
27574 arg_pointer_rtx, -8));
27575 }
27576 regno = SI_REG;
27577 }
27578 }
27579
27580 return gen_rtx_REG (Pmode, regno);
27581 }
27582
27583 /* Emit RTL insns to initialize the variable parts of a trampoline.
27584 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27585 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27586 to be passed to the target function. */
27587
27588 static void
27589 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27590 {
27591 rtx mem, fnaddr;
27592 int opcode;
27593 int offset = 0;
27594
27595 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27596
27597 if (TARGET_64BIT)
27598 {
27599 int size;
27600
27601 /* Load the function address to r11. Try to load address using
27602 the shorter movl instead of movabs. We may want to support
27603 movq for kernel mode, but kernel does not use trampolines at
27604 the moment. FNADDR is a 32bit address and may not be in
27605 DImode when ptr_mode == SImode. Always use movl in this
27606 case. */
27607 if (ptr_mode == SImode
27608 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27609 {
27610 fnaddr = copy_addr_to_reg (fnaddr);
27611
27612 mem = adjust_address (m_tramp, HImode, offset);
27613 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27614
27615 mem = adjust_address (m_tramp, SImode, offset + 2);
27616 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27617 offset += 6;
27618 }
27619 else
27620 {
27621 mem = adjust_address (m_tramp, HImode, offset);
27622 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27623
27624 mem = adjust_address (m_tramp, DImode, offset + 2);
27625 emit_move_insn (mem, fnaddr);
27626 offset += 10;
27627 }
27628
27629 /* Load static chain using movabs to r10. Use the shorter movl
27630 instead of movabs when ptr_mode == SImode. */
27631 if (ptr_mode == SImode)
27632 {
27633 opcode = 0xba41;
27634 size = 6;
27635 }
27636 else
27637 {
27638 opcode = 0xba49;
27639 size = 10;
27640 }
27641
27642 mem = adjust_address (m_tramp, HImode, offset);
27643 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27644
27645 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27646 emit_move_insn (mem, chain_value);
27647 offset += size;
27648
27649 /* Jump to r11; the last (unused) byte is a nop, only there to
27650 pad the write out to a single 32-bit store. */
27651 mem = adjust_address (m_tramp, SImode, offset);
27652 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27653 offset += 4;
27654 }
27655 else
27656 {
27657 rtx disp, chain;
27658
27659 /* Depending on the static chain location, either load a register
27660 with a constant, or push the constant to the stack. All of the
27661 instructions are the same size. */
27662 chain = ix86_static_chain (fndecl, true);
27663 if (REG_P (chain))
27664 {
27665 switch (REGNO (chain))
27666 {
27667 case AX_REG:
27668 opcode = 0xb8; break;
27669 case CX_REG:
27670 opcode = 0xb9; break;
27671 default:
27672 gcc_unreachable ();
27673 }
27674 }
27675 else
27676 opcode = 0x68;
27677
27678 mem = adjust_address (m_tramp, QImode, offset);
27679 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27680
27681 mem = adjust_address (m_tramp, SImode, offset + 1);
27682 emit_move_insn (mem, chain_value);
27683 offset += 5;
27684
27685 mem = adjust_address (m_tramp, QImode, offset);
27686 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27687
27688 mem = adjust_address (m_tramp, SImode, offset + 1);
27689
27690 /* Compute offset from the end of the jmp to the target function.
27691 In the case in which the trampoline stores the static chain on
27692 the stack, we need to skip the first insn which pushes the
27693 (call-saved) register static chain; this push is 1 byte. */
27694 offset += 5;
27695 disp = expand_binop (SImode, sub_optab, fnaddr,
27696 plus_constant (Pmode, XEXP (m_tramp, 0),
27697 offset - (MEM_P (chain) ? 1 : 0)),
27698 NULL_RTX, 1, OPTAB_DIRECT);
27699 emit_move_insn (mem, disp);
27700 }
27701
27702 gcc_assert (offset <= TRAMPOLINE_SIZE);
27703
27704 #ifdef HAVE_ENABLE_EXECUTE_STACK
27705 #ifdef CHECK_EXECUTE_STACK_ENABLED
27706 if (CHECK_EXECUTE_STACK_ENABLED)
27707 #endif
27708 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27709 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27710 #endif
27711 }
27712 \f
27713 /* The following file contains several enumerations and data structures
27714 built from the definitions in i386-builtin-types.def. */
27715
27716 #include "i386-builtin-types.inc"
27717
27718 /* Table for the ix86 builtin non-function types. */
27719 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27720
27721 /* Retrieve an element from the above table, building some of
27722 the types lazily. */
27723
27724 static tree
27725 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27726 {
27727 unsigned int index;
27728 tree type, itype;
27729
27730 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27731
27732 type = ix86_builtin_type_tab[(int) tcode];
27733 if (type != NULL)
27734 return type;
27735
27736 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27737 if (tcode <= IX86_BT_LAST_VECT)
27738 {
27739 machine_mode mode;
27740
27741 index = tcode - IX86_BT_LAST_PRIM - 1;
27742 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27743 mode = ix86_builtin_type_vect_mode[index];
27744
27745 type = build_vector_type_for_mode (itype, mode);
27746 }
27747 else
27748 {
27749 int quals;
27750
27751 index = tcode - IX86_BT_LAST_VECT - 1;
27752 if (tcode <= IX86_BT_LAST_PTR)
27753 quals = TYPE_UNQUALIFIED;
27754 else
27755 quals = TYPE_QUAL_CONST;
27756
27757 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27758 if (quals != TYPE_UNQUALIFIED)
27759 itype = build_qualified_type (itype, quals);
27760
27761 type = build_pointer_type (itype);
27762 }
27763
27764 ix86_builtin_type_tab[(int) tcode] = type;
27765 return type;
27766 }
27767
27768 /* Table for the ix86 builtin function types. */
27769 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27770
27771 /* Retrieve an element from the above table, building some of
27772 the types lazily. */
27773
27774 static tree
27775 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27776 {
27777 tree type;
27778
27779 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27780
27781 type = ix86_builtin_func_type_tab[(int) tcode];
27782 if (type != NULL)
27783 return type;
27784
27785 if (tcode <= IX86_BT_LAST_FUNC)
27786 {
27787 unsigned start = ix86_builtin_func_start[(int) tcode];
27788 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27789 tree rtype, atype, args = void_list_node;
27790 unsigned i;
27791
27792 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27793 for (i = after - 1; i > start; --i)
27794 {
27795 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27796 args = tree_cons (NULL, atype, args);
27797 }
27798
27799 type = build_function_type (rtype, args);
27800 }
27801 else
27802 {
27803 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27804 enum ix86_builtin_func_type icode;
27805
27806 icode = ix86_builtin_func_alias_base[index];
27807 type = ix86_get_builtin_func_type (icode);
27808 }
27809
27810 ix86_builtin_func_type_tab[(int) tcode] = type;
27811 return type;
27812 }
27813
27814
27815 /* Codes for all the SSE/MMX builtins. */
27816 enum ix86_builtins
27817 {
27818 IX86_BUILTIN_ADDPS,
27819 IX86_BUILTIN_ADDSS,
27820 IX86_BUILTIN_DIVPS,
27821 IX86_BUILTIN_DIVSS,
27822 IX86_BUILTIN_MULPS,
27823 IX86_BUILTIN_MULSS,
27824 IX86_BUILTIN_SUBPS,
27825 IX86_BUILTIN_SUBSS,
27826
27827 IX86_BUILTIN_CMPEQPS,
27828 IX86_BUILTIN_CMPLTPS,
27829 IX86_BUILTIN_CMPLEPS,
27830 IX86_BUILTIN_CMPGTPS,
27831 IX86_BUILTIN_CMPGEPS,
27832 IX86_BUILTIN_CMPNEQPS,
27833 IX86_BUILTIN_CMPNLTPS,
27834 IX86_BUILTIN_CMPNLEPS,
27835 IX86_BUILTIN_CMPNGTPS,
27836 IX86_BUILTIN_CMPNGEPS,
27837 IX86_BUILTIN_CMPORDPS,
27838 IX86_BUILTIN_CMPUNORDPS,
27839 IX86_BUILTIN_CMPEQSS,
27840 IX86_BUILTIN_CMPLTSS,
27841 IX86_BUILTIN_CMPLESS,
27842 IX86_BUILTIN_CMPNEQSS,
27843 IX86_BUILTIN_CMPNLTSS,
27844 IX86_BUILTIN_CMPNLESS,
27845 IX86_BUILTIN_CMPORDSS,
27846 IX86_BUILTIN_CMPUNORDSS,
27847
27848 IX86_BUILTIN_COMIEQSS,
27849 IX86_BUILTIN_COMILTSS,
27850 IX86_BUILTIN_COMILESS,
27851 IX86_BUILTIN_COMIGTSS,
27852 IX86_BUILTIN_COMIGESS,
27853 IX86_BUILTIN_COMINEQSS,
27854 IX86_BUILTIN_UCOMIEQSS,
27855 IX86_BUILTIN_UCOMILTSS,
27856 IX86_BUILTIN_UCOMILESS,
27857 IX86_BUILTIN_UCOMIGTSS,
27858 IX86_BUILTIN_UCOMIGESS,
27859 IX86_BUILTIN_UCOMINEQSS,
27860
27861 IX86_BUILTIN_CVTPI2PS,
27862 IX86_BUILTIN_CVTPS2PI,
27863 IX86_BUILTIN_CVTSI2SS,
27864 IX86_BUILTIN_CVTSI642SS,
27865 IX86_BUILTIN_CVTSS2SI,
27866 IX86_BUILTIN_CVTSS2SI64,
27867 IX86_BUILTIN_CVTTPS2PI,
27868 IX86_BUILTIN_CVTTSS2SI,
27869 IX86_BUILTIN_CVTTSS2SI64,
27870
27871 IX86_BUILTIN_MAXPS,
27872 IX86_BUILTIN_MAXSS,
27873 IX86_BUILTIN_MINPS,
27874 IX86_BUILTIN_MINSS,
27875
27876 IX86_BUILTIN_LOADUPS,
27877 IX86_BUILTIN_STOREUPS,
27878 IX86_BUILTIN_MOVSS,
27879
27880 IX86_BUILTIN_MOVHLPS,
27881 IX86_BUILTIN_MOVLHPS,
27882 IX86_BUILTIN_LOADHPS,
27883 IX86_BUILTIN_LOADLPS,
27884 IX86_BUILTIN_STOREHPS,
27885 IX86_BUILTIN_STORELPS,
27886
27887 IX86_BUILTIN_MASKMOVQ,
27888 IX86_BUILTIN_MOVMSKPS,
27889 IX86_BUILTIN_PMOVMSKB,
27890
27891 IX86_BUILTIN_MOVNTPS,
27892 IX86_BUILTIN_MOVNTQ,
27893
27894 IX86_BUILTIN_LOADDQU,
27895 IX86_BUILTIN_STOREDQU,
27896
27897 IX86_BUILTIN_PACKSSWB,
27898 IX86_BUILTIN_PACKSSDW,
27899 IX86_BUILTIN_PACKUSWB,
27900
27901 IX86_BUILTIN_PADDB,
27902 IX86_BUILTIN_PADDW,
27903 IX86_BUILTIN_PADDD,
27904 IX86_BUILTIN_PADDQ,
27905 IX86_BUILTIN_PADDSB,
27906 IX86_BUILTIN_PADDSW,
27907 IX86_BUILTIN_PADDUSB,
27908 IX86_BUILTIN_PADDUSW,
27909 IX86_BUILTIN_PSUBB,
27910 IX86_BUILTIN_PSUBW,
27911 IX86_BUILTIN_PSUBD,
27912 IX86_BUILTIN_PSUBQ,
27913 IX86_BUILTIN_PSUBSB,
27914 IX86_BUILTIN_PSUBSW,
27915 IX86_BUILTIN_PSUBUSB,
27916 IX86_BUILTIN_PSUBUSW,
27917
27918 IX86_BUILTIN_PAND,
27919 IX86_BUILTIN_PANDN,
27920 IX86_BUILTIN_POR,
27921 IX86_BUILTIN_PXOR,
27922
27923 IX86_BUILTIN_PAVGB,
27924 IX86_BUILTIN_PAVGW,
27925
27926 IX86_BUILTIN_PCMPEQB,
27927 IX86_BUILTIN_PCMPEQW,
27928 IX86_BUILTIN_PCMPEQD,
27929 IX86_BUILTIN_PCMPGTB,
27930 IX86_BUILTIN_PCMPGTW,
27931 IX86_BUILTIN_PCMPGTD,
27932
27933 IX86_BUILTIN_PMADDWD,
27934
27935 IX86_BUILTIN_PMAXSW,
27936 IX86_BUILTIN_PMAXUB,
27937 IX86_BUILTIN_PMINSW,
27938 IX86_BUILTIN_PMINUB,
27939
27940 IX86_BUILTIN_PMULHUW,
27941 IX86_BUILTIN_PMULHW,
27942 IX86_BUILTIN_PMULLW,
27943
27944 IX86_BUILTIN_PSADBW,
27945 IX86_BUILTIN_PSHUFW,
27946
27947 IX86_BUILTIN_PSLLW,
27948 IX86_BUILTIN_PSLLD,
27949 IX86_BUILTIN_PSLLQ,
27950 IX86_BUILTIN_PSRAW,
27951 IX86_BUILTIN_PSRAD,
27952 IX86_BUILTIN_PSRLW,
27953 IX86_BUILTIN_PSRLD,
27954 IX86_BUILTIN_PSRLQ,
27955 IX86_BUILTIN_PSLLWI,
27956 IX86_BUILTIN_PSLLDI,
27957 IX86_BUILTIN_PSLLQI,
27958 IX86_BUILTIN_PSRAWI,
27959 IX86_BUILTIN_PSRADI,
27960 IX86_BUILTIN_PSRLWI,
27961 IX86_BUILTIN_PSRLDI,
27962 IX86_BUILTIN_PSRLQI,
27963
27964 IX86_BUILTIN_PUNPCKHBW,
27965 IX86_BUILTIN_PUNPCKHWD,
27966 IX86_BUILTIN_PUNPCKHDQ,
27967 IX86_BUILTIN_PUNPCKLBW,
27968 IX86_BUILTIN_PUNPCKLWD,
27969 IX86_BUILTIN_PUNPCKLDQ,
27970
27971 IX86_BUILTIN_SHUFPS,
27972
27973 IX86_BUILTIN_RCPPS,
27974 IX86_BUILTIN_RCPSS,
27975 IX86_BUILTIN_RSQRTPS,
27976 IX86_BUILTIN_RSQRTPS_NR,
27977 IX86_BUILTIN_RSQRTSS,
27978 IX86_BUILTIN_RSQRTF,
27979 IX86_BUILTIN_SQRTPS,
27980 IX86_BUILTIN_SQRTPS_NR,
27981 IX86_BUILTIN_SQRTSS,
27982
27983 IX86_BUILTIN_UNPCKHPS,
27984 IX86_BUILTIN_UNPCKLPS,
27985
27986 IX86_BUILTIN_ANDPS,
27987 IX86_BUILTIN_ANDNPS,
27988 IX86_BUILTIN_ORPS,
27989 IX86_BUILTIN_XORPS,
27990
27991 IX86_BUILTIN_EMMS,
27992 IX86_BUILTIN_LDMXCSR,
27993 IX86_BUILTIN_STMXCSR,
27994 IX86_BUILTIN_SFENCE,
27995
27996 IX86_BUILTIN_FXSAVE,
27997 IX86_BUILTIN_FXRSTOR,
27998 IX86_BUILTIN_FXSAVE64,
27999 IX86_BUILTIN_FXRSTOR64,
28000
28001 IX86_BUILTIN_XSAVE,
28002 IX86_BUILTIN_XRSTOR,
28003 IX86_BUILTIN_XSAVE64,
28004 IX86_BUILTIN_XRSTOR64,
28005
28006 IX86_BUILTIN_XSAVEOPT,
28007 IX86_BUILTIN_XSAVEOPT64,
28008
28009 IX86_BUILTIN_XSAVEC,
28010 IX86_BUILTIN_XSAVEC64,
28011
28012 IX86_BUILTIN_XSAVES,
28013 IX86_BUILTIN_XRSTORS,
28014 IX86_BUILTIN_XSAVES64,
28015 IX86_BUILTIN_XRSTORS64,
28016
28017 /* 3DNow! Original */
28018 IX86_BUILTIN_FEMMS,
28019 IX86_BUILTIN_PAVGUSB,
28020 IX86_BUILTIN_PF2ID,
28021 IX86_BUILTIN_PFACC,
28022 IX86_BUILTIN_PFADD,
28023 IX86_BUILTIN_PFCMPEQ,
28024 IX86_BUILTIN_PFCMPGE,
28025 IX86_BUILTIN_PFCMPGT,
28026 IX86_BUILTIN_PFMAX,
28027 IX86_BUILTIN_PFMIN,
28028 IX86_BUILTIN_PFMUL,
28029 IX86_BUILTIN_PFRCP,
28030 IX86_BUILTIN_PFRCPIT1,
28031 IX86_BUILTIN_PFRCPIT2,
28032 IX86_BUILTIN_PFRSQIT1,
28033 IX86_BUILTIN_PFRSQRT,
28034 IX86_BUILTIN_PFSUB,
28035 IX86_BUILTIN_PFSUBR,
28036 IX86_BUILTIN_PI2FD,
28037 IX86_BUILTIN_PMULHRW,
28038
28039 /* 3DNow! Athlon Extensions */
28040 IX86_BUILTIN_PF2IW,
28041 IX86_BUILTIN_PFNACC,
28042 IX86_BUILTIN_PFPNACC,
28043 IX86_BUILTIN_PI2FW,
28044 IX86_BUILTIN_PSWAPDSI,
28045 IX86_BUILTIN_PSWAPDSF,
28046
28047 /* SSE2 */
28048 IX86_BUILTIN_ADDPD,
28049 IX86_BUILTIN_ADDSD,
28050 IX86_BUILTIN_DIVPD,
28051 IX86_BUILTIN_DIVSD,
28052 IX86_BUILTIN_MULPD,
28053 IX86_BUILTIN_MULSD,
28054 IX86_BUILTIN_SUBPD,
28055 IX86_BUILTIN_SUBSD,
28056
28057 IX86_BUILTIN_CMPEQPD,
28058 IX86_BUILTIN_CMPLTPD,
28059 IX86_BUILTIN_CMPLEPD,
28060 IX86_BUILTIN_CMPGTPD,
28061 IX86_BUILTIN_CMPGEPD,
28062 IX86_BUILTIN_CMPNEQPD,
28063 IX86_BUILTIN_CMPNLTPD,
28064 IX86_BUILTIN_CMPNLEPD,
28065 IX86_BUILTIN_CMPNGTPD,
28066 IX86_BUILTIN_CMPNGEPD,
28067 IX86_BUILTIN_CMPORDPD,
28068 IX86_BUILTIN_CMPUNORDPD,
28069 IX86_BUILTIN_CMPEQSD,
28070 IX86_BUILTIN_CMPLTSD,
28071 IX86_BUILTIN_CMPLESD,
28072 IX86_BUILTIN_CMPNEQSD,
28073 IX86_BUILTIN_CMPNLTSD,
28074 IX86_BUILTIN_CMPNLESD,
28075 IX86_BUILTIN_CMPORDSD,
28076 IX86_BUILTIN_CMPUNORDSD,
28077
28078 IX86_BUILTIN_COMIEQSD,
28079 IX86_BUILTIN_COMILTSD,
28080 IX86_BUILTIN_COMILESD,
28081 IX86_BUILTIN_COMIGTSD,
28082 IX86_BUILTIN_COMIGESD,
28083 IX86_BUILTIN_COMINEQSD,
28084 IX86_BUILTIN_UCOMIEQSD,
28085 IX86_BUILTIN_UCOMILTSD,
28086 IX86_BUILTIN_UCOMILESD,
28087 IX86_BUILTIN_UCOMIGTSD,
28088 IX86_BUILTIN_UCOMIGESD,
28089 IX86_BUILTIN_UCOMINEQSD,
28090
28091 IX86_BUILTIN_MAXPD,
28092 IX86_BUILTIN_MAXSD,
28093 IX86_BUILTIN_MINPD,
28094 IX86_BUILTIN_MINSD,
28095
28096 IX86_BUILTIN_ANDPD,
28097 IX86_BUILTIN_ANDNPD,
28098 IX86_BUILTIN_ORPD,
28099 IX86_BUILTIN_XORPD,
28100
28101 IX86_BUILTIN_SQRTPD,
28102 IX86_BUILTIN_SQRTSD,
28103
28104 IX86_BUILTIN_UNPCKHPD,
28105 IX86_BUILTIN_UNPCKLPD,
28106
28107 IX86_BUILTIN_SHUFPD,
28108
28109 IX86_BUILTIN_LOADUPD,
28110 IX86_BUILTIN_STOREUPD,
28111 IX86_BUILTIN_MOVSD,
28112
28113 IX86_BUILTIN_LOADHPD,
28114 IX86_BUILTIN_LOADLPD,
28115
28116 IX86_BUILTIN_CVTDQ2PD,
28117 IX86_BUILTIN_CVTDQ2PS,
28118
28119 IX86_BUILTIN_CVTPD2DQ,
28120 IX86_BUILTIN_CVTPD2PI,
28121 IX86_BUILTIN_CVTPD2PS,
28122 IX86_BUILTIN_CVTTPD2DQ,
28123 IX86_BUILTIN_CVTTPD2PI,
28124
28125 IX86_BUILTIN_CVTPI2PD,
28126 IX86_BUILTIN_CVTSI2SD,
28127 IX86_BUILTIN_CVTSI642SD,
28128
28129 IX86_BUILTIN_CVTSD2SI,
28130 IX86_BUILTIN_CVTSD2SI64,
28131 IX86_BUILTIN_CVTSD2SS,
28132 IX86_BUILTIN_CVTSS2SD,
28133 IX86_BUILTIN_CVTTSD2SI,
28134 IX86_BUILTIN_CVTTSD2SI64,
28135
28136 IX86_BUILTIN_CVTPS2DQ,
28137 IX86_BUILTIN_CVTPS2PD,
28138 IX86_BUILTIN_CVTTPS2DQ,
28139
28140 IX86_BUILTIN_MOVNTI,
28141 IX86_BUILTIN_MOVNTI64,
28142 IX86_BUILTIN_MOVNTPD,
28143 IX86_BUILTIN_MOVNTDQ,
28144
28145 IX86_BUILTIN_MOVQ128,
28146
28147 /* SSE2 MMX */
28148 IX86_BUILTIN_MASKMOVDQU,
28149 IX86_BUILTIN_MOVMSKPD,
28150 IX86_BUILTIN_PMOVMSKB128,
28151
28152 IX86_BUILTIN_PACKSSWB128,
28153 IX86_BUILTIN_PACKSSDW128,
28154 IX86_BUILTIN_PACKUSWB128,
28155
28156 IX86_BUILTIN_PADDB128,
28157 IX86_BUILTIN_PADDW128,
28158 IX86_BUILTIN_PADDD128,
28159 IX86_BUILTIN_PADDQ128,
28160 IX86_BUILTIN_PADDSB128,
28161 IX86_BUILTIN_PADDSW128,
28162 IX86_BUILTIN_PADDUSB128,
28163 IX86_BUILTIN_PADDUSW128,
28164 IX86_BUILTIN_PSUBB128,
28165 IX86_BUILTIN_PSUBW128,
28166 IX86_BUILTIN_PSUBD128,
28167 IX86_BUILTIN_PSUBQ128,
28168 IX86_BUILTIN_PSUBSB128,
28169 IX86_BUILTIN_PSUBSW128,
28170 IX86_BUILTIN_PSUBUSB128,
28171 IX86_BUILTIN_PSUBUSW128,
28172
28173 IX86_BUILTIN_PAND128,
28174 IX86_BUILTIN_PANDN128,
28175 IX86_BUILTIN_POR128,
28176 IX86_BUILTIN_PXOR128,
28177
28178 IX86_BUILTIN_PAVGB128,
28179 IX86_BUILTIN_PAVGW128,
28180
28181 IX86_BUILTIN_PCMPEQB128,
28182 IX86_BUILTIN_PCMPEQW128,
28183 IX86_BUILTIN_PCMPEQD128,
28184 IX86_BUILTIN_PCMPGTB128,
28185 IX86_BUILTIN_PCMPGTW128,
28186 IX86_BUILTIN_PCMPGTD128,
28187
28188 IX86_BUILTIN_PMADDWD128,
28189
28190 IX86_BUILTIN_PMAXSW128,
28191 IX86_BUILTIN_PMAXUB128,
28192 IX86_BUILTIN_PMINSW128,
28193 IX86_BUILTIN_PMINUB128,
28194
28195 IX86_BUILTIN_PMULUDQ,
28196 IX86_BUILTIN_PMULUDQ128,
28197 IX86_BUILTIN_PMULHUW128,
28198 IX86_BUILTIN_PMULHW128,
28199 IX86_BUILTIN_PMULLW128,
28200
28201 IX86_BUILTIN_PSADBW128,
28202 IX86_BUILTIN_PSHUFHW,
28203 IX86_BUILTIN_PSHUFLW,
28204 IX86_BUILTIN_PSHUFD,
28205
28206 IX86_BUILTIN_PSLLDQI128,
28207 IX86_BUILTIN_PSLLWI128,
28208 IX86_BUILTIN_PSLLDI128,
28209 IX86_BUILTIN_PSLLQI128,
28210 IX86_BUILTIN_PSRAWI128,
28211 IX86_BUILTIN_PSRADI128,
28212 IX86_BUILTIN_PSRLDQI128,
28213 IX86_BUILTIN_PSRLWI128,
28214 IX86_BUILTIN_PSRLDI128,
28215 IX86_BUILTIN_PSRLQI128,
28216
28217 IX86_BUILTIN_PSLLDQ128,
28218 IX86_BUILTIN_PSLLW128,
28219 IX86_BUILTIN_PSLLD128,
28220 IX86_BUILTIN_PSLLQ128,
28221 IX86_BUILTIN_PSRAW128,
28222 IX86_BUILTIN_PSRAD128,
28223 IX86_BUILTIN_PSRLW128,
28224 IX86_BUILTIN_PSRLD128,
28225 IX86_BUILTIN_PSRLQ128,
28226
28227 IX86_BUILTIN_PUNPCKHBW128,
28228 IX86_BUILTIN_PUNPCKHWD128,
28229 IX86_BUILTIN_PUNPCKHDQ128,
28230 IX86_BUILTIN_PUNPCKHQDQ128,
28231 IX86_BUILTIN_PUNPCKLBW128,
28232 IX86_BUILTIN_PUNPCKLWD128,
28233 IX86_BUILTIN_PUNPCKLDQ128,
28234 IX86_BUILTIN_PUNPCKLQDQ128,
28235
28236 IX86_BUILTIN_CLFLUSH,
28237 IX86_BUILTIN_MFENCE,
28238 IX86_BUILTIN_LFENCE,
28239 IX86_BUILTIN_PAUSE,
28240
28241 IX86_BUILTIN_FNSTENV,
28242 IX86_BUILTIN_FLDENV,
28243 IX86_BUILTIN_FNSTSW,
28244 IX86_BUILTIN_FNCLEX,
28245
28246 IX86_BUILTIN_BSRSI,
28247 IX86_BUILTIN_BSRDI,
28248 IX86_BUILTIN_RDPMC,
28249 IX86_BUILTIN_RDTSC,
28250 IX86_BUILTIN_RDTSCP,
28251 IX86_BUILTIN_ROLQI,
28252 IX86_BUILTIN_ROLHI,
28253 IX86_BUILTIN_RORQI,
28254 IX86_BUILTIN_RORHI,
28255
28256 /* SSE3. */
28257 IX86_BUILTIN_ADDSUBPS,
28258 IX86_BUILTIN_HADDPS,
28259 IX86_BUILTIN_HSUBPS,
28260 IX86_BUILTIN_MOVSHDUP,
28261 IX86_BUILTIN_MOVSLDUP,
28262 IX86_BUILTIN_ADDSUBPD,
28263 IX86_BUILTIN_HADDPD,
28264 IX86_BUILTIN_HSUBPD,
28265 IX86_BUILTIN_LDDQU,
28266
28267 IX86_BUILTIN_MONITOR,
28268 IX86_BUILTIN_MWAIT,
28269
28270 /* SSSE3. */
28271 IX86_BUILTIN_PHADDW,
28272 IX86_BUILTIN_PHADDD,
28273 IX86_BUILTIN_PHADDSW,
28274 IX86_BUILTIN_PHSUBW,
28275 IX86_BUILTIN_PHSUBD,
28276 IX86_BUILTIN_PHSUBSW,
28277 IX86_BUILTIN_PMADDUBSW,
28278 IX86_BUILTIN_PMULHRSW,
28279 IX86_BUILTIN_PSHUFB,
28280 IX86_BUILTIN_PSIGNB,
28281 IX86_BUILTIN_PSIGNW,
28282 IX86_BUILTIN_PSIGND,
28283 IX86_BUILTIN_PALIGNR,
28284 IX86_BUILTIN_PABSB,
28285 IX86_BUILTIN_PABSW,
28286 IX86_BUILTIN_PABSD,
28287
28288 IX86_BUILTIN_PHADDW128,
28289 IX86_BUILTIN_PHADDD128,
28290 IX86_BUILTIN_PHADDSW128,
28291 IX86_BUILTIN_PHSUBW128,
28292 IX86_BUILTIN_PHSUBD128,
28293 IX86_BUILTIN_PHSUBSW128,
28294 IX86_BUILTIN_PMADDUBSW128,
28295 IX86_BUILTIN_PMULHRSW128,
28296 IX86_BUILTIN_PSHUFB128,
28297 IX86_BUILTIN_PSIGNB128,
28298 IX86_BUILTIN_PSIGNW128,
28299 IX86_BUILTIN_PSIGND128,
28300 IX86_BUILTIN_PALIGNR128,
28301 IX86_BUILTIN_PABSB128,
28302 IX86_BUILTIN_PABSW128,
28303 IX86_BUILTIN_PABSD128,
28304
28305 /* AMDFAM10 - SSE4A New Instructions. */
28306 IX86_BUILTIN_MOVNTSD,
28307 IX86_BUILTIN_MOVNTSS,
28308 IX86_BUILTIN_EXTRQI,
28309 IX86_BUILTIN_EXTRQ,
28310 IX86_BUILTIN_INSERTQI,
28311 IX86_BUILTIN_INSERTQ,
28312
28313 /* SSE4.1. */
28314 IX86_BUILTIN_BLENDPD,
28315 IX86_BUILTIN_BLENDPS,
28316 IX86_BUILTIN_BLENDVPD,
28317 IX86_BUILTIN_BLENDVPS,
28318 IX86_BUILTIN_PBLENDVB128,
28319 IX86_BUILTIN_PBLENDW128,
28320
28321 IX86_BUILTIN_DPPD,
28322 IX86_BUILTIN_DPPS,
28323
28324 IX86_BUILTIN_INSERTPS128,
28325
28326 IX86_BUILTIN_MOVNTDQA,
28327 IX86_BUILTIN_MPSADBW128,
28328 IX86_BUILTIN_PACKUSDW128,
28329 IX86_BUILTIN_PCMPEQQ,
28330 IX86_BUILTIN_PHMINPOSUW128,
28331
28332 IX86_BUILTIN_PMAXSB128,
28333 IX86_BUILTIN_PMAXSD128,
28334 IX86_BUILTIN_PMAXUD128,
28335 IX86_BUILTIN_PMAXUW128,
28336
28337 IX86_BUILTIN_PMINSB128,
28338 IX86_BUILTIN_PMINSD128,
28339 IX86_BUILTIN_PMINUD128,
28340 IX86_BUILTIN_PMINUW128,
28341
28342 IX86_BUILTIN_PMOVSXBW128,
28343 IX86_BUILTIN_PMOVSXBD128,
28344 IX86_BUILTIN_PMOVSXBQ128,
28345 IX86_BUILTIN_PMOVSXWD128,
28346 IX86_BUILTIN_PMOVSXWQ128,
28347 IX86_BUILTIN_PMOVSXDQ128,
28348
28349 IX86_BUILTIN_PMOVZXBW128,
28350 IX86_BUILTIN_PMOVZXBD128,
28351 IX86_BUILTIN_PMOVZXBQ128,
28352 IX86_BUILTIN_PMOVZXWD128,
28353 IX86_BUILTIN_PMOVZXWQ128,
28354 IX86_BUILTIN_PMOVZXDQ128,
28355
28356 IX86_BUILTIN_PMULDQ128,
28357 IX86_BUILTIN_PMULLD128,
28358
28359 IX86_BUILTIN_ROUNDSD,
28360 IX86_BUILTIN_ROUNDSS,
28361
28362 IX86_BUILTIN_ROUNDPD,
28363 IX86_BUILTIN_ROUNDPS,
28364
28365 IX86_BUILTIN_FLOORPD,
28366 IX86_BUILTIN_CEILPD,
28367 IX86_BUILTIN_TRUNCPD,
28368 IX86_BUILTIN_RINTPD,
28369 IX86_BUILTIN_ROUNDPD_AZ,
28370
28371 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28372 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28373 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28374
28375 IX86_BUILTIN_FLOORPS,
28376 IX86_BUILTIN_CEILPS,
28377 IX86_BUILTIN_TRUNCPS,
28378 IX86_BUILTIN_RINTPS,
28379 IX86_BUILTIN_ROUNDPS_AZ,
28380
28381 IX86_BUILTIN_FLOORPS_SFIX,
28382 IX86_BUILTIN_CEILPS_SFIX,
28383 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28384
28385 IX86_BUILTIN_PTESTZ,
28386 IX86_BUILTIN_PTESTC,
28387 IX86_BUILTIN_PTESTNZC,
28388
28389 IX86_BUILTIN_VEC_INIT_V2SI,
28390 IX86_BUILTIN_VEC_INIT_V4HI,
28391 IX86_BUILTIN_VEC_INIT_V8QI,
28392 IX86_BUILTIN_VEC_EXT_V2DF,
28393 IX86_BUILTIN_VEC_EXT_V2DI,
28394 IX86_BUILTIN_VEC_EXT_V4SF,
28395 IX86_BUILTIN_VEC_EXT_V4SI,
28396 IX86_BUILTIN_VEC_EXT_V8HI,
28397 IX86_BUILTIN_VEC_EXT_V2SI,
28398 IX86_BUILTIN_VEC_EXT_V4HI,
28399 IX86_BUILTIN_VEC_EXT_V16QI,
28400 IX86_BUILTIN_VEC_SET_V2DI,
28401 IX86_BUILTIN_VEC_SET_V4SF,
28402 IX86_BUILTIN_VEC_SET_V4SI,
28403 IX86_BUILTIN_VEC_SET_V8HI,
28404 IX86_BUILTIN_VEC_SET_V4HI,
28405 IX86_BUILTIN_VEC_SET_V16QI,
28406
28407 IX86_BUILTIN_VEC_PACK_SFIX,
28408 IX86_BUILTIN_VEC_PACK_SFIX256,
28409
28410 /* SSE4.2. */
28411 IX86_BUILTIN_CRC32QI,
28412 IX86_BUILTIN_CRC32HI,
28413 IX86_BUILTIN_CRC32SI,
28414 IX86_BUILTIN_CRC32DI,
28415
28416 IX86_BUILTIN_PCMPESTRI128,
28417 IX86_BUILTIN_PCMPESTRM128,
28418 IX86_BUILTIN_PCMPESTRA128,
28419 IX86_BUILTIN_PCMPESTRC128,
28420 IX86_BUILTIN_PCMPESTRO128,
28421 IX86_BUILTIN_PCMPESTRS128,
28422 IX86_BUILTIN_PCMPESTRZ128,
28423 IX86_BUILTIN_PCMPISTRI128,
28424 IX86_BUILTIN_PCMPISTRM128,
28425 IX86_BUILTIN_PCMPISTRA128,
28426 IX86_BUILTIN_PCMPISTRC128,
28427 IX86_BUILTIN_PCMPISTRO128,
28428 IX86_BUILTIN_PCMPISTRS128,
28429 IX86_BUILTIN_PCMPISTRZ128,
28430
28431 IX86_BUILTIN_PCMPGTQ,
28432
28433 /* AES instructions */
28434 IX86_BUILTIN_AESENC128,
28435 IX86_BUILTIN_AESENCLAST128,
28436 IX86_BUILTIN_AESDEC128,
28437 IX86_BUILTIN_AESDECLAST128,
28438 IX86_BUILTIN_AESIMC128,
28439 IX86_BUILTIN_AESKEYGENASSIST128,
28440
28441 /* PCLMUL instruction */
28442 IX86_BUILTIN_PCLMULQDQ128,
28443
28444 /* AVX */
28445 IX86_BUILTIN_ADDPD256,
28446 IX86_BUILTIN_ADDPS256,
28447 IX86_BUILTIN_ADDSUBPD256,
28448 IX86_BUILTIN_ADDSUBPS256,
28449 IX86_BUILTIN_ANDPD256,
28450 IX86_BUILTIN_ANDPS256,
28451 IX86_BUILTIN_ANDNPD256,
28452 IX86_BUILTIN_ANDNPS256,
28453 IX86_BUILTIN_BLENDPD256,
28454 IX86_BUILTIN_BLENDPS256,
28455 IX86_BUILTIN_BLENDVPD256,
28456 IX86_BUILTIN_BLENDVPS256,
28457 IX86_BUILTIN_DIVPD256,
28458 IX86_BUILTIN_DIVPS256,
28459 IX86_BUILTIN_DPPS256,
28460 IX86_BUILTIN_HADDPD256,
28461 IX86_BUILTIN_HADDPS256,
28462 IX86_BUILTIN_HSUBPD256,
28463 IX86_BUILTIN_HSUBPS256,
28464 IX86_BUILTIN_MAXPD256,
28465 IX86_BUILTIN_MAXPS256,
28466 IX86_BUILTIN_MINPD256,
28467 IX86_BUILTIN_MINPS256,
28468 IX86_BUILTIN_MULPD256,
28469 IX86_BUILTIN_MULPS256,
28470 IX86_BUILTIN_ORPD256,
28471 IX86_BUILTIN_ORPS256,
28472 IX86_BUILTIN_SHUFPD256,
28473 IX86_BUILTIN_SHUFPS256,
28474 IX86_BUILTIN_SUBPD256,
28475 IX86_BUILTIN_SUBPS256,
28476 IX86_BUILTIN_XORPD256,
28477 IX86_BUILTIN_XORPS256,
28478 IX86_BUILTIN_CMPSD,
28479 IX86_BUILTIN_CMPSS,
28480 IX86_BUILTIN_CMPPD,
28481 IX86_BUILTIN_CMPPS,
28482 IX86_BUILTIN_CMPPD256,
28483 IX86_BUILTIN_CMPPS256,
28484 IX86_BUILTIN_CVTDQ2PD256,
28485 IX86_BUILTIN_CVTDQ2PS256,
28486 IX86_BUILTIN_CVTPD2PS256,
28487 IX86_BUILTIN_CVTPS2DQ256,
28488 IX86_BUILTIN_CVTPS2PD256,
28489 IX86_BUILTIN_CVTTPD2DQ256,
28490 IX86_BUILTIN_CVTPD2DQ256,
28491 IX86_BUILTIN_CVTTPS2DQ256,
28492 IX86_BUILTIN_EXTRACTF128PD256,
28493 IX86_BUILTIN_EXTRACTF128PS256,
28494 IX86_BUILTIN_EXTRACTF128SI256,
28495 IX86_BUILTIN_VZEROALL,
28496 IX86_BUILTIN_VZEROUPPER,
28497 IX86_BUILTIN_VPERMILVARPD,
28498 IX86_BUILTIN_VPERMILVARPS,
28499 IX86_BUILTIN_VPERMILVARPD256,
28500 IX86_BUILTIN_VPERMILVARPS256,
28501 IX86_BUILTIN_VPERMILPD,
28502 IX86_BUILTIN_VPERMILPS,
28503 IX86_BUILTIN_VPERMILPD256,
28504 IX86_BUILTIN_VPERMILPS256,
28505 IX86_BUILTIN_VPERMIL2PD,
28506 IX86_BUILTIN_VPERMIL2PS,
28507 IX86_BUILTIN_VPERMIL2PD256,
28508 IX86_BUILTIN_VPERMIL2PS256,
28509 IX86_BUILTIN_VPERM2F128PD256,
28510 IX86_BUILTIN_VPERM2F128PS256,
28511 IX86_BUILTIN_VPERM2F128SI256,
28512 IX86_BUILTIN_VBROADCASTSS,
28513 IX86_BUILTIN_VBROADCASTSD256,
28514 IX86_BUILTIN_VBROADCASTSS256,
28515 IX86_BUILTIN_VBROADCASTPD256,
28516 IX86_BUILTIN_VBROADCASTPS256,
28517 IX86_BUILTIN_VINSERTF128PD256,
28518 IX86_BUILTIN_VINSERTF128PS256,
28519 IX86_BUILTIN_VINSERTF128SI256,
28520 IX86_BUILTIN_LOADUPD256,
28521 IX86_BUILTIN_LOADUPS256,
28522 IX86_BUILTIN_STOREUPD256,
28523 IX86_BUILTIN_STOREUPS256,
28524 IX86_BUILTIN_LDDQU256,
28525 IX86_BUILTIN_MOVNTDQ256,
28526 IX86_BUILTIN_MOVNTPD256,
28527 IX86_BUILTIN_MOVNTPS256,
28528 IX86_BUILTIN_LOADDQU256,
28529 IX86_BUILTIN_STOREDQU256,
28530 IX86_BUILTIN_MASKLOADPD,
28531 IX86_BUILTIN_MASKLOADPS,
28532 IX86_BUILTIN_MASKSTOREPD,
28533 IX86_BUILTIN_MASKSTOREPS,
28534 IX86_BUILTIN_MASKLOADPD256,
28535 IX86_BUILTIN_MASKLOADPS256,
28536 IX86_BUILTIN_MASKSTOREPD256,
28537 IX86_BUILTIN_MASKSTOREPS256,
28538 IX86_BUILTIN_MOVSHDUP256,
28539 IX86_BUILTIN_MOVSLDUP256,
28540 IX86_BUILTIN_MOVDDUP256,
28541
28542 IX86_BUILTIN_SQRTPD256,
28543 IX86_BUILTIN_SQRTPS256,
28544 IX86_BUILTIN_SQRTPS_NR256,
28545 IX86_BUILTIN_RSQRTPS256,
28546 IX86_BUILTIN_RSQRTPS_NR256,
28547
28548 IX86_BUILTIN_RCPPS256,
28549
28550 IX86_BUILTIN_ROUNDPD256,
28551 IX86_BUILTIN_ROUNDPS256,
28552
28553 IX86_BUILTIN_FLOORPD256,
28554 IX86_BUILTIN_CEILPD256,
28555 IX86_BUILTIN_TRUNCPD256,
28556 IX86_BUILTIN_RINTPD256,
28557 IX86_BUILTIN_ROUNDPD_AZ256,
28558
28559 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28560 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28561 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28562
28563 IX86_BUILTIN_FLOORPS256,
28564 IX86_BUILTIN_CEILPS256,
28565 IX86_BUILTIN_TRUNCPS256,
28566 IX86_BUILTIN_RINTPS256,
28567 IX86_BUILTIN_ROUNDPS_AZ256,
28568
28569 IX86_BUILTIN_FLOORPS_SFIX256,
28570 IX86_BUILTIN_CEILPS_SFIX256,
28571 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28572
28573 IX86_BUILTIN_UNPCKHPD256,
28574 IX86_BUILTIN_UNPCKLPD256,
28575 IX86_BUILTIN_UNPCKHPS256,
28576 IX86_BUILTIN_UNPCKLPS256,
28577
28578 IX86_BUILTIN_SI256_SI,
28579 IX86_BUILTIN_PS256_PS,
28580 IX86_BUILTIN_PD256_PD,
28581 IX86_BUILTIN_SI_SI256,
28582 IX86_BUILTIN_PS_PS256,
28583 IX86_BUILTIN_PD_PD256,
28584
28585 IX86_BUILTIN_VTESTZPD,
28586 IX86_BUILTIN_VTESTCPD,
28587 IX86_BUILTIN_VTESTNZCPD,
28588 IX86_BUILTIN_VTESTZPS,
28589 IX86_BUILTIN_VTESTCPS,
28590 IX86_BUILTIN_VTESTNZCPS,
28591 IX86_BUILTIN_VTESTZPD256,
28592 IX86_BUILTIN_VTESTCPD256,
28593 IX86_BUILTIN_VTESTNZCPD256,
28594 IX86_BUILTIN_VTESTZPS256,
28595 IX86_BUILTIN_VTESTCPS256,
28596 IX86_BUILTIN_VTESTNZCPS256,
28597 IX86_BUILTIN_PTESTZ256,
28598 IX86_BUILTIN_PTESTC256,
28599 IX86_BUILTIN_PTESTNZC256,
28600
28601 IX86_BUILTIN_MOVMSKPD256,
28602 IX86_BUILTIN_MOVMSKPS256,
28603
28604 /* AVX2 */
28605 IX86_BUILTIN_MPSADBW256,
28606 IX86_BUILTIN_PABSB256,
28607 IX86_BUILTIN_PABSW256,
28608 IX86_BUILTIN_PABSD256,
28609 IX86_BUILTIN_PACKSSDW256,
28610 IX86_BUILTIN_PACKSSWB256,
28611 IX86_BUILTIN_PACKUSDW256,
28612 IX86_BUILTIN_PACKUSWB256,
28613 IX86_BUILTIN_PADDB256,
28614 IX86_BUILTIN_PADDW256,
28615 IX86_BUILTIN_PADDD256,
28616 IX86_BUILTIN_PADDQ256,
28617 IX86_BUILTIN_PADDSB256,
28618 IX86_BUILTIN_PADDSW256,
28619 IX86_BUILTIN_PADDUSB256,
28620 IX86_BUILTIN_PADDUSW256,
28621 IX86_BUILTIN_PALIGNR256,
28622 IX86_BUILTIN_AND256I,
28623 IX86_BUILTIN_ANDNOT256I,
28624 IX86_BUILTIN_PAVGB256,
28625 IX86_BUILTIN_PAVGW256,
28626 IX86_BUILTIN_PBLENDVB256,
28627 IX86_BUILTIN_PBLENDVW256,
28628 IX86_BUILTIN_PCMPEQB256,
28629 IX86_BUILTIN_PCMPEQW256,
28630 IX86_BUILTIN_PCMPEQD256,
28631 IX86_BUILTIN_PCMPEQQ256,
28632 IX86_BUILTIN_PCMPGTB256,
28633 IX86_BUILTIN_PCMPGTW256,
28634 IX86_BUILTIN_PCMPGTD256,
28635 IX86_BUILTIN_PCMPGTQ256,
28636 IX86_BUILTIN_PHADDW256,
28637 IX86_BUILTIN_PHADDD256,
28638 IX86_BUILTIN_PHADDSW256,
28639 IX86_BUILTIN_PHSUBW256,
28640 IX86_BUILTIN_PHSUBD256,
28641 IX86_BUILTIN_PHSUBSW256,
28642 IX86_BUILTIN_PMADDUBSW256,
28643 IX86_BUILTIN_PMADDWD256,
28644 IX86_BUILTIN_PMAXSB256,
28645 IX86_BUILTIN_PMAXSW256,
28646 IX86_BUILTIN_PMAXSD256,
28647 IX86_BUILTIN_PMAXUB256,
28648 IX86_BUILTIN_PMAXUW256,
28649 IX86_BUILTIN_PMAXUD256,
28650 IX86_BUILTIN_PMINSB256,
28651 IX86_BUILTIN_PMINSW256,
28652 IX86_BUILTIN_PMINSD256,
28653 IX86_BUILTIN_PMINUB256,
28654 IX86_BUILTIN_PMINUW256,
28655 IX86_BUILTIN_PMINUD256,
28656 IX86_BUILTIN_PMOVMSKB256,
28657 IX86_BUILTIN_PMOVSXBW256,
28658 IX86_BUILTIN_PMOVSXBD256,
28659 IX86_BUILTIN_PMOVSXBQ256,
28660 IX86_BUILTIN_PMOVSXWD256,
28661 IX86_BUILTIN_PMOVSXWQ256,
28662 IX86_BUILTIN_PMOVSXDQ256,
28663 IX86_BUILTIN_PMOVZXBW256,
28664 IX86_BUILTIN_PMOVZXBD256,
28665 IX86_BUILTIN_PMOVZXBQ256,
28666 IX86_BUILTIN_PMOVZXWD256,
28667 IX86_BUILTIN_PMOVZXWQ256,
28668 IX86_BUILTIN_PMOVZXDQ256,
28669 IX86_BUILTIN_PMULDQ256,
28670 IX86_BUILTIN_PMULHRSW256,
28671 IX86_BUILTIN_PMULHUW256,
28672 IX86_BUILTIN_PMULHW256,
28673 IX86_BUILTIN_PMULLW256,
28674 IX86_BUILTIN_PMULLD256,
28675 IX86_BUILTIN_PMULUDQ256,
28676 IX86_BUILTIN_POR256,
28677 IX86_BUILTIN_PSADBW256,
28678 IX86_BUILTIN_PSHUFB256,
28679 IX86_BUILTIN_PSHUFD256,
28680 IX86_BUILTIN_PSHUFHW256,
28681 IX86_BUILTIN_PSHUFLW256,
28682 IX86_BUILTIN_PSIGNB256,
28683 IX86_BUILTIN_PSIGNW256,
28684 IX86_BUILTIN_PSIGND256,
28685 IX86_BUILTIN_PSLLDQI256,
28686 IX86_BUILTIN_PSLLWI256,
28687 IX86_BUILTIN_PSLLW256,
28688 IX86_BUILTIN_PSLLDI256,
28689 IX86_BUILTIN_PSLLD256,
28690 IX86_BUILTIN_PSLLQI256,
28691 IX86_BUILTIN_PSLLQ256,
28692 IX86_BUILTIN_PSRAWI256,
28693 IX86_BUILTIN_PSRAW256,
28694 IX86_BUILTIN_PSRADI256,
28695 IX86_BUILTIN_PSRAD256,
28696 IX86_BUILTIN_PSRLDQI256,
28697 IX86_BUILTIN_PSRLWI256,
28698 IX86_BUILTIN_PSRLW256,
28699 IX86_BUILTIN_PSRLDI256,
28700 IX86_BUILTIN_PSRLD256,
28701 IX86_BUILTIN_PSRLQI256,
28702 IX86_BUILTIN_PSRLQ256,
28703 IX86_BUILTIN_PSUBB256,
28704 IX86_BUILTIN_PSUBW256,
28705 IX86_BUILTIN_PSUBD256,
28706 IX86_BUILTIN_PSUBQ256,
28707 IX86_BUILTIN_PSUBSB256,
28708 IX86_BUILTIN_PSUBSW256,
28709 IX86_BUILTIN_PSUBUSB256,
28710 IX86_BUILTIN_PSUBUSW256,
28711 IX86_BUILTIN_PUNPCKHBW256,
28712 IX86_BUILTIN_PUNPCKHWD256,
28713 IX86_BUILTIN_PUNPCKHDQ256,
28714 IX86_BUILTIN_PUNPCKHQDQ256,
28715 IX86_BUILTIN_PUNPCKLBW256,
28716 IX86_BUILTIN_PUNPCKLWD256,
28717 IX86_BUILTIN_PUNPCKLDQ256,
28718 IX86_BUILTIN_PUNPCKLQDQ256,
28719 IX86_BUILTIN_PXOR256,
28720 IX86_BUILTIN_MOVNTDQA256,
28721 IX86_BUILTIN_VBROADCASTSS_PS,
28722 IX86_BUILTIN_VBROADCASTSS_PS256,
28723 IX86_BUILTIN_VBROADCASTSD_PD256,
28724 IX86_BUILTIN_VBROADCASTSI256,
28725 IX86_BUILTIN_PBLENDD256,
28726 IX86_BUILTIN_PBLENDD128,
28727 IX86_BUILTIN_PBROADCASTB256,
28728 IX86_BUILTIN_PBROADCASTW256,
28729 IX86_BUILTIN_PBROADCASTD256,
28730 IX86_BUILTIN_PBROADCASTQ256,
28731 IX86_BUILTIN_PBROADCASTB128,
28732 IX86_BUILTIN_PBROADCASTW128,
28733 IX86_BUILTIN_PBROADCASTD128,
28734 IX86_BUILTIN_PBROADCASTQ128,
28735 IX86_BUILTIN_VPERMVARSI256,
28736 IX86_BUILTIN_VPERMDF256,
28737 IX86_BUILTIN_VPERMVARSF256,
28738 IX86_BUILTIN_VPERMDI256,
28739 IX86_BUILTIN_VPERMTI256,
28740 IX86_BUILTIN_VEXTRACT128I256,
28741 IX86_BUILTIN_VINSERT128I256,
28742 IX86_BUILTIN_MASKLOADD,
28743 IX86_BUILTIN_MASKLOADQ,
28744 IX86_BUILTIN_MASKLOADD256,
28745 IX86_BUILTIN_MASKLOADQ256,
28746 IX86_BUILTIN_MASKSTORED,
28747 IX86_BUILTIN_MASKSTOREQ,
28748 IX86_BUILTIN_MASKSTORED256,
28749 IX86_BUILTIN_MASKSTOREQ256,
28750 IX86_BUILTIN_PSLLVV4DI,
28751 IX86_BUILTIN_PSLLVV2DI,
28752 IX86_BUILTIN_PSLLVV8SI,
28753 IX86_BUILTIN_PSLLVV4SI,
28754 IX86_BUILTIN_PSRAVV8SI,
28755 IX86_BUILTIN_PSRAVV4SI,
28756 IX86_BUILTIN_PSRLVV4DI,
28757 IX86_BUILTIN_PSRLVV2DI,
28758 IX86_BUILTIN_PSRLVV8SI,
28759 IX86_BUILTIN_PSRLVV4SI,
28760
28761 IX86_BUILTIN_GATHERSIV2DF,
28762 IX86_BUILTIN_GATHERSIV4DF,
28763 IX86_BUILTIN_GATHERDIV2DF,
28764 IX86_BUILTIN_GATHERDIV4DF,
28765 IX86_BUILTIN_GATHERSIV4SF,
28766 IX86_BUILTIN_GATHERSIV8SF,
28767 IX86_BUILTIN_GATHERDIV4SF,
28768 IX86_BUILTIN_GATHERDIV8SF,
28769 IX86_BUILTIN_GATHERSIV2DI,
28770 IX86_BUILTIN_GATHERSIV4DI,
28771 IX86_BUILTIN_GATHERDIV2DI,
28772 IX86_BUILTIN_GATHERDIV4DI,
28773 IX86_BUILTIN_GATHERSIV4SI,
28774 IX86_BUILTIN_GATHERSIV8SI,
28775 IX86_BUILTIN_GATHERDIV4SI,
28776 IX86_BUILTIN_GATHERDIV8SI,
28777
28778 /* AVX512F */
28779 IX86_BUILTIN_SI512_SI256,
28780 IX86_BUILTIN_PD512_PD256,
28781 IX86_BUILTIN_PS512_PS256,
28782 IX86_BUILTIN_SI512_SI,
28783 IX86_BUILTIN_PD512_PD,
28784 IX86_BUILTIN_PS512_PS,
28785 IX86_BUILTIN_ADDPD512,
28786 IX86_BUILTIN_ADDPS512,
28787 IX86_BUILTIN_ADDSD_ROUND,
28788 IX86_BUILTIN_ADDSS_ROUND,
28789 IX86_BUILTIN_ALIGND512,
28790 IX86_BUILTIN_ALIGNQ512,
28791 IX86_BUILTIN_BLENDMD512,
28792 IX86_BUILTIN_BLENDMPD512,
28793 IX86_BUILTIN_BLENDMPS512,
28794 IX86_BUILTIN_BLENDMQ512,
28795 IX86_BUILTIN_BROADCASTF32X4_512,
28796 IX86_BUILTIN_BROADCASTF64X4_512,
28797 IX86_BUILTIN_BROADCASTI32X4_512,
28798 IX86_BUILTIN_BROADCASTI64X4_512,
28799 IX86_BUILTIN_BROADCASTSD512,
28800 IX86_BUILTIN_BROADCASTSS512,
28801 IX86_BUILTIN_CMPD512,
28802 IX86_BUILTIN_CMPPD512,
28803 IX86_BUILTIN_CMPPS512,
28804 IX86_BUILTIN_CMPQ512,
28805 IX86_BUILTIN_CMPSD_MASK,
28806 IX86_BUILTIN_CMPSS_MASK,
28807 IX86_BUILTIN_COMIDF,
28808 IX86_BUILTIN_COMISF,
28809 IX86_BUILTIN_COMPRESSPD512,
28810 IX86_BUILTIN_COMPRESSPDSTORE512,
28811 IX86_BUILTIN_COMPRESSPS512,
28812 IX86_BUILTIN_COMPRESSPSSTORE512,
28813 IX86_BUILTIN_CVTDQ2PD512,
28814 IX86_BUILTIN_CVTDQ2PS512,
28815 IX86_BUILTIN_CVTPD2DQ512,
28816 IX86_BUILTIN_CVTPD2PS512,
28817 IX86_BUILTIN_CVTPD2UDQ512,
28818 IX86_BUILTIN_CVTPH2PS512,
28819 IX86_BUILTIN_CVTPS2DQ512,
28820 IX86_BUILTIN_CVTPS2PD512,
28821 IX86_BUILTIN_CVTPS2PH512,
28822 IX86_BUILTIN_CVTPS2UDQ512,
28823 IX86_BUILTIN_CVTSD2SS_ROUND,
28824 IX86_BUILTIN_CVTSI2SD64,
28825 IX86_BUILTIN_CVTSI2SS32,
28826 IX86_BUILTIN_CVTSI2SS64,
28827 IX86_BUILTIN_CVTSS2SD_ROUND,
28828 IX86_BUILTIN_CVTTPD2DQ512,
28829 IX86_BUILTIN_CVTTPD2UDQ512,
28830 IX86_BUILTIN_CVTTPS2DQ512,
28831 IX86_BUILTIN_CVTTPS2UDQ512,
28832 IX86_BUILTIN_CVTUDQ2PD512,
28833 IX86_BUILTIN_CVTUDQ2PS512,
28834 IX86_BUILTIN_CVTUSI2SD32,
28835 IX86_BUILTIN_CVTUSI2SD64,
28836 IX86_BUILTIN_CVTUSI2SS32,
28837 IX86_BUILTIN_CVTUSI2SS64,
28838 IX86_BUILTIN_DIVPD512,
28839 IX86_BUILTIN_DIVPS512,
28840 IX86_BUILTIN_DIVSD_ROUND,
28841 IX86_BUILTIN_DIVSS_ROUND,
28842 IX86_BUILTIN_EXPANDPD512,
28843 IX86_BUILTIN_EXPANDPD512Z,
28844 IX86_BUILTIN_EXPANDPDLOAD512,
28845 IX86_BUILTIN_EXPANDPDLOAD512Z,
28846 IX86_BUILTIN_EXPANDPS512,
28847 IX86_BUILTIN_EXPANDPS512Z,
28848 IX86_BUILTIN_EXPANDPSLOAD512,
28849 IX86_BUILTIN_EXPANDPSLOAD512Z,
28850 IX86_BUILTIN_EXTRACTF32X4,
28851 IX86_BUILTIN_EXTRACTF64X4,
28852 IX86_BUILTIN_EXTRACTI32X4,
28853 IX86_BUILTIN_EXTRACTI64X4,
28854 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28855 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28856 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28857 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28858 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28859 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28860 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28861 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28862 IX86_BUILTIN_GETEXPPD512,
28863 IX86_BUILTIN_GETEXPPS512,
28864 IX86_BUILTIN_GETEXPSD128,
28865 IX86_BUILTIN_GETEXPSS128,
28866 IX86_BUILTIN_GETMANTPD512,
28867 IX86_BUILTIN_GETMANTPS512,
28868 IX86_BUILTIN_GETMANTSD128,
28869 IX86_BUILTIN_GETMANTSS128,
28870 IX86_BUILTIN_INSERTF32X4,
28871 IX86_BUILTIN_INSERTF64X4,
28872 IX86_BUILTIN_INSERTI32X4,
28873 IX86_BUILTIN_INSERTI64X4,
28874 IX86_BUILTIN_LOADAPD512,
28875 IX86_BUILTIN_LOADAPS512,
28876 IX86_BUILTIN_LOADDQUDI512,
28877 IX86_BUILTIN_LOADDQUSI512,
28878 IX86_BUILTIN_LOADUPD512,
28879 IX86_BUILTIN_LOADUPS512,
28880 IX86_BUILTIN_MAXPD512,
28881 IX86_BUILTIN_MAXPS512,
28882 IX86_BUILTIN_MAXSD_ROUND,
28883 IX86_BUILTIN_MAXSS_ROUND,
28884 IX86_BUILTIN_MINPD512,
28885 IX86_BUILTIN_MINPS512,
28886 IX86_BUILTIN_MINSD_ROUND,
28887 IX86_BUILTIN_MINSS_ROUND,
28888 IX86_BUILTIN_MOVAPD512,
28889 IX86_BUILTIN_MOVAPS512,
28890 IX86_BUILTIN_MOVDDUP512,
28891 IX86_BUILTIN_MOVDQA32LOAD512,
28892 IX86_BUILTIN_MOVDQA32STORE512,
28893 IX86_BUILTIN_MOVDQA32_512,
28894 IX86_BUILTIN_MOVDQA64LOAD512,
28895 IX86_BUILTIN_MOVDQA64STORE512,
28896 IX86_BUILTIN_MOVDQA64_512,
28897 IX86_BUILTIN_MOVNTDQ512,
28898 IX86_BUILTIN_MOVNTDQA512,
28899 IX86_BUILTIN_MOVNTPD512,
28900 IX86_BUILTIN_MOVNTPS512,
28901 IX86_BUILTIN_MOVSHDUP512,
28902 IX86_BUILTIN_MOVSLDUP512,
28903 IX86_BUILTIN_MULPD512,
28904 IX86_BUILTIN_MULPS512,
28905 IX86_BUILTIN_MULSD_ROUND,
28906 IX86_BUILTIN_MULSS_ROUND,
28907 IX86_BUILTIN_PABSD512,
28908 IX86_BUILTIN_PABSQ512,
28909 IX86_BUILTIN_PADDD512,
28910 IX86_BUILTIN_PADDQ512,
28911 IX86_BUILTIN_PANDD512,
28912 IX86_BUILTIN_PANDND512,
28913 IX86_BUILTIN_PANDNQ512,
28914 IX86_BUILTIN_PANDQ512,
28915 IX86_BUILTIN_PBROADCASTD512,
28916 IX86_BUILTIN_PBROADCASTD512_GPR,
28917 IX86_BUILTIN_PBROADCASTMB512,
28918 IX86_BUILTIN_PBROADCASTMW512,
28919 IX86_BUILTIN_PBROADCASTQ512,
28920 IX86_BUILTIN_PBROADCASTQ512_GPR,
28921 IX86_BUILTIN_PCMPEQD512_MASK,
28922 IX86_BUILTIN_PCMPEQQ512_MASK,
28923 IX86_BUILTIN_PCMPGTD512_MASK,
28924 IX86_BUILTIN_PCMPGTQ512_MASK,
28925 IX86_BUILTIN_PCOMPRESSD512,
28926 IX86_BUILTIN_PCOMPRESSDSTORE512,
28927 IX86_BUILTIN_PCOMPRESSQ512,
28928 IX86_BUILTIN_PCOMPRESSQSTORE512,
28929 IX86_BUILTIN_PEXPANDD512,
28930 IX86_BUILTIN_PEXPANDD512Z,
28931 IX86_BUILTIN_PEXPANDDLOAD512,
28932 IX86_BUILTIN_PEXPANDDLOAD512Z,
28933 IX86_BUILTIN_PEXPANDQ512,
28934 IX86_BUILTIN_PEXPANDQ512Z,
28935 IX86_BUILTIN_PEXPANDQLOAD512,
28936 IX86_BUILTIN_PEXPANDQLOAD512Z,
28937 IX86_BUILTIN_PMAXSD512,
28938 IX86_BUILTIN_PMAXSQ512,
28939 IX86_BUILTIN_PMAXUD512,
28940 IX86_BUILTIN_PMAXUQ512,
28941 IX86_BUILTIN_PMINSD512,
28942 IX86_BUILTIN_PMINSQ512,
28943 IX86_BUILTIN_PMINUD512,
28944 IX86_BUILTIN_PMINUQ512,
28945 IX86_BUILTIN_PMOVDB512,
28946 IX86_BUILTIN_PMOVDB512_MEM,
28947 IX86_BUILTIN_PMOVDW512,
28948 IX86_BUILTIN_PMOVDW512_MEM,
28949 IX86_BUILTIN_PMOVQB512,
28950 IX86_BUILTIN_PMOVQB512_MEM,
28951 IX86_BUILTIN_PMOVQD512,
28952 IX86_BUILTIN_PMOVQD512_MEM,
28953 IX86_BUILTIN_PMOVQW512,
28954 IX86_BUILTIN_PMOVQW512_MEM,
28955 IX86_BUILTIN_PMOVSDB512,
28956 IX86_BUILTIN_PMOVSDB512_MEM,
28957 IX86_BUILTIN_PMOVSDW512,
28958 IX86_BUILTIN_PMOVSDW512_MEM,
28959 IX86_BUILTIN_PMOVSQB512,
28960 IX86_BUILTIN_PMOVSQB512_MEM,
28961 IX86_BUILTIN_PMOVSQD512,
28962 IX86_BUILTIN_PMOVSQD512_MEM,
28963 IX86_BUILTIN_PMOVSQW512,
28964 IX86_BUILTIN_PMOVSQW512_MEM,
28965 IX86_BUILTIN_PMOVSXBD512,
28966 IX86_BUILTIN_PMOVSXBQ512,
28967 IX86_BUILTIN_PMOVSXDQ512,
28968 IX86_BUILTIN_PMOVSXWD512,
28969 IX86_BUILTIN_PMOVSXWQ512,
28970 IX86_BUILTIN_PMOVUSDB512,
28971 IX86_BUILTIN_PMOVUSDB512_MEM,
28972 IX86_BUILTIN_PMOVUSDW512,
28973 IX86_BUILTIN_PMOVUSDW512_MEM,
28974 IX86_BUILTIN_PMOVUSQB512,
28975 IX86_BUILTIN_PMOVUSQB512_MEM,
28976 IX86_BUILTIN_PMOVUSQD512,
28977 IX86_BUILTIN_PMOVUSQD512_MEM,
28978 IX86_BUILTIN_PMOVUSQW512,
28979 IX86_BUILTIN_PMOVUSQW512_MEM,
28980 IX86_BUILTIN_PMOVZXBD512,
28981 IX86_BUILTIN_PMOVZXBQ512,
28982 IX86_BUILTIN_PMOVZXDQ512,
28983 IX86_BUILTIN_PMOVZXWD512,
28984 IX86_BUILTIN_PMOVZXWQ512,
28985 IX86_BUILTIN_PMULDQ512,
28986 IX86_BUILTIN_PMULLD512,
28987 IX86_BUILTIN_PMULUDQ512,
28988 IX86_BUILTIN_PORD512,
28989 IX86_BUILTIN_PORQ512,
28990 IX86_BUILTIN_PROLD512,
28991 IX86_BUILTIN_PROLQ512,
28992 IX86_BUILTIN_PROLVD512,
28993 IX86_BUILTIN_PROLVQ512,
28994 IX86_BUILTIN_PRORD512,
28995 IX86_BUILTIN_PRORQ512,
28996 IX86_BUILTIN_PRORVD512,
28997 IX86_BUILTIN_PRORVQ512,
28998 IX86_BUILTIN_PSHUFD512,
28999 IX86_BUILTIN_PSLLD512,
29000 IX86_BUILTIN_PSLLDI512,
29001 IX86_BUILTIN_PSLLQ512,
29002 IX86_BUILTIN_PSLLQI512,
29003 IX86_BUILTIN_PSLLVV16SI,
29004 IX86_BUILTIN_PSLLVV8DI,
29005 IX86_BUILTIN_PSRAD512,
29006 IX86_BUILTIN_PSRADI512,
29007 IX86_BUILTIN_PSRAQ512,
29008 IX86_BUILTIN_PSRAQI512,
29009 IX86_BUILTIN_PSRAVV16SI,
29010 IX86_BUILTIN_PSRAVV8DI,
29011 IX86_BUILTIN_PSRLD512,
29012 IX86_BUILTIN_PSRLDI512,
29013 IX86_BUILTIN_PSRLQ512,
29014 IX86_BUILTIN_PSRLQI512,
29015 IX86_BUILTIN_PSRLVV16SI,
29016 IX86_BUILTIN_PSRLVV8DI,
29017 IX86_BUILTIN_PSUBD512,
29018 IX86_BUILTIN_PSUBQ512,
29019 IX86_BUILTIN_PTESTMD512,
29020 IX86_BUILTIN_PTESTMQ512,
29021 IX86_BUILTIN_PTESTNMD512,
29022 IX86_BUILTIN_PTESTNMQ512,
29023 IX86_BUILTIN_PUNPCKHDQ512,
29024 IX86_BUILTIN_PUNPCKHQDQ512,
29025 IX86_BUILTIN_PUNPCKLDQ512,
29026 IX86_BUILTIN_PUNPCKLQDQ512,
29027 IX86_BUILTIN_PXORD512,
29028 IX86_BUILTIN_PXORQ512,
29029 IX86_BUILTIN_RCP14PD512,
29030 IX86_BUILTIN_RCP14PS512,
29031 IX86_BUILTIN_RCP14SD,
29032 IX86_BUILTIN_RCP14SS,
29033 IX86_BUILTIN_RNDSCALEPD,
29034 IX86_BUILTIN_RNDSCALEPS,
29035 IX86_BUILTIN_RNDSCALESD,
29036 IX86_BUILTIN_RNDSCALESS,
29037 IX86_BUILTIN_RSQRT14PD512,
29038 IX86_BUILTIN_RSQRT14PS512,
29039 IX86_BUILTIN_RSQRT14SD,
29040 IX86_BUILTIN_RSQRT14SS,
29041 IX86_BUILTIN_SCALEFPD512,
29042 IX86_BUILTIN_SCALEFPS512,
29043 IX86_BUILTIN_SCALEFSD,
29044 IX86_BUILTIN_SCALEFSS,
29045 IX86_BUILTIN_SHUFPD512,
29046 IX86_BUILTIN_SHUFPS512,
29047 IX86_BUILTIN_SHUF_F32x4,
29048 IX86_BUILTIN_SHUF_F64x2,
29049 IX86_BUILTIN_SHUF_I32x4,
29050 IX86_BUILTIN_SHUF_I64x2,
29051 IX86_BUILTIN_SQRTPD512,
29052 IX86_BUILTIN_SQRTPD512_MASK,
29053 IX86_BUILTIN_SQRTPS512_MASK,
29054 IX86_BUILTIN_SQRTPS_NR512,
29055 IX86_BUILTIN_SQRTSD_ROUND,
29056 IX86_BUILTIN_SQRTSS_ROUND,
29057 IX86_BUILTIN_STOREAPD512,
29058 IX86_BUILTIN_STOREAPS512,
29059 IX86_BUILTIN_STOREDQUDI512,
29060 IX86_BUILTIN_STOREDQUSI512,
29061 IX86_BUILTIN_STOREUPD512,
29062 IX86_BUILTIN_STOREUPS512,
29063 IX86_BUILTIN_SUBPD512,
29064 IX86_BUILTIN_SUBPS512,
29065 IX86_BUILTIN_SUBSD_ROUND,
29066 IX86_BUILTIN_SUBSS_ROUND,
29067 IX86_BUILTIN_UCMPD512,
29068 IX86_BUILTIN_UCMPQ512,
29069 IX86_BUILTIN_UNPCKHPD512,
29070 IX86_BUILTIN_UNPCKHPS512,
29071 IX86_BUILTIN_UNPCKLPD512,
29072 IX86_BUILTIN_UNPCKLPS512,
29073 IX86_BUILTIN_VCVTSD2SI32,
29074 IX86_BUILTIN_VCVTSD2SI64,
29075 IX86_BUILTIN_VCVTSD2USI32,
29076 IX86_BUILTIN_VCVTSD2USI64,
29077 IX86_BUILTIN_VCVTSS2SI32,
29078 IX86_BUILTIN_VCVTSS2SI64,
29079 IX86_BUILTIN_VCVTSS2USI32,
29080 IX86_BUILTIN_VCVTSS2USI64,
29081 IX86_BUILTIN_VCVTTSD2SI32,
29082 IX86_BUILTIN_VCVTTSD2SI64,
29083 IX86_BUILTIN_VCVTTSD2USI32,
29084 IX86_BUILTIN_VCVTTSD2USI64,
29085 IX86_BUILTIN_VCVTTSS2SI32,
29086 IX86_BUILTIN_VCVTTSS2SI64,
29087 IX86_BUILTIN_VCVTTSS2USI32,
29088 IX86_BUILTIN_VCVTTSS2USI64,
29089 IX86_BUILTIN_VFMADDPD512_MASK,
29090 IX86_BUILTIN_VFMADDPD512_MASK3,
29091 IX86_BUILTIN_VFMADDPD512_MASKZ,
29092 IX86_BUILTIN_VFMADDPS512_MASK,
29093 IX86_BUILTIN_VFMADDPS512_MASK3,
29094 IX86_BUILTIN_VFMADDPS512_MASKZ,
29095 IX86_BUILTIN_VFMADDSD3_ROUND,
29096 IX86_BUILTIN_VFMADDSS3_ROUND,
29097 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29098 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29099 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29100 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29101 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29102 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29103 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29104 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29105 IX86_BUILTIN_VFMSUBPD512_MASK3,
29106 IX86_BUILTIN_VFMSUBPS512_MASK3,
29107 IX86_BUILTIN_VFMSUBSD3_MASK3,
29108 IX86_BUILTIN_VFMSUBSS3_MASK3,
29109 IX86_BUILTIN_VFNMADDPD512_MASK,
29110 IX86_BUILTIN_VFNMADDPS512_MASK,
29111 IX86_BUILTIN_VFNMSUBPD512_MASK,
29112 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29113 IX86_BUILTIN_VFNMSUBPS512_MASK,
29114 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29115 IX86_BUILTIN_VPCLZCNTD512,
29116 IX86_BUILTIN_VPCLZCNTQ512,
29117 IX86_BUILTIN_VPCONFLICTD512,
29118 IX86_BUILTIN_VPCONFLICTQ512,
29119 IX86_BUILTIN_VPERMDF512,
29120 IX86_BUILTIN_VPERMDI512,
29121 IX86_BUILTIN_VPERMI2VARD512,
29122 IX86_BUILTIN_VPERMI2VARPD512,
29123 IX86_BUILTIN_VPERMI2VARPS512,
29124 IX86_BUILTIN_VPERMI2VARQ512,
29125 IX86_BUILTIN_VPERMILPD512,
29126 IX86_BUILTIN_VPERMILPS512,
29127 IX86_BUILTIN_VPERMILVARPD512,
29128 IX86_BUILTIN_VPERMILVARPS512,
29129 IX86_BUILTIN_VPERMT2VARD512,
29130 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29131 IX86_BUILTIN_VPERMT2VARPD512,
29132 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29133 IX86_BUILTIN_VPERMT2VARPS512,
29134 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29135 IX86_BUILTIN_VPERMT2VARQ512,
29136 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29137 IX86_BUILTIN_VPERMVARDF512,
29138 IX86_BUILTIN_VPERMVARDI512,
29139 IX86_BUILTIN_VPERMVARSF512,
29140 IX86_BUILTIN_VPERMVARSI512,
29141 IX86_BUILTIN_VTERNLOGD512_MASK,
29142 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29143 IX86_BUILTIN_VTERNLOGQ512_MASK,
29144 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29145
29146 /* Mask arithmetic operations */
29147 IX86_BUILTIN_KAND16,
29148 IX86_BUILTIN_KANDN16,
29149 IX86_BUILTIN_KNOT16,
29150 IX86_BUILTIN_KOR16,
29151 IX86_BUILTIN_KORTESTC16,
29152 IX86_BUILTIN_KORTESTZ16,
29153 IX86_BUILTIN_KUNPCKBW,
29154 IX86_BUILTIN_KXNOR16,
29155 IX86_BUILTIN_KXOR16,
29156 IX86_BUILTIN_KMOV16,
29157
29158 /* AVX512VL. */
29159 IX86_BUILTIN_PMOVUSQD256_MEM,
29160 IX86_BUILTIN_PMOVUSQD128_MEM,
29161 IX86_BUILTIN_PMOVSQD256_MEM,
29162 IX86_BUILTIN_PMOVSQD128_MEM,
29163 IX86_BUILTIN_PMOVQD256_MEM,
29164 IX86_BUILTIN_PMOVQD128_MEM,
29165 IX86_BUILTIN_PMOVUSQW256_MEM,
29166 IX86_BUILTIN_PMOVUSQW128_MEM,
29167 IX86_BUILTIN_PMOVSQW256_MEM,
29168 IX86_BUILTIN_PMOVSQW128_MEM,
29169 IX86_BUILTIN_PMOVQW256_MEM,
29170 IX86_BUILTIN_PMOVQW128_MEM,
29171 IX86_BUILTIN_PMOVUSQB256_MEM,
29172 IX86_BUILTIN_PMOVUSQB128_MEM,
29173 IX86_BUILTIN_PMOVSQB256_MEM,
29174 IX86_BUILTIN_PMOVSQB128_MEM,
29175 IX86_BUILTIN_PMOVQB256_MEM,
29176 IX86_BUILTIN_PMOVQB128_MEM,
29177 IX86_BUILTIN_PMOVUSDW256_MEM,
29178 IX86_BUILTIN_PMOVUSDW128_MEM,
29179 IX86_BUILTIN_PMOVSDW256_MEM,
29180 IX86_BUILTIN_PMOVSDW128_MEM,
29181 IX86_BUILTIN_PMOVDW256_MEM,
29182 IX86_BUILTIN_PMOVDW128_MEM,
29183 IX86_BUILTIN_PMOVUSDB256_MEM,
29184 IX86_BUILTIN_PMOVUSDB128_MEM,
29185 IX86_BUILTIN_PMOVSDB256_MEM,
29186 IX86_BUILTIN_PMOVSDB128_MEM,
29187 IX86_BUILTIN_PMOVDB256_MEM,
29188 IX86_BUILTIN_PMOVDB128_MEM,
29189 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29190 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29191 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29192 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29193 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29194 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29195 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29196 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29197 IX86_BUILTIN_LOADAPD256_MASK,
29198 IX86_BUILTIN_LOADAPD128_MASK,
29199 IX86_BUILTIN_LOADAPS256_MASK,
29200 IX86_BUILTIN_LOADAPS128_MASK,
29201 IX86_BUILTIN_STOREAPD256_MASK,
29202 IX86_BUILTIN_STOREAPD128_MASK,
29203 IX86_BUILTIN_STOREAPS256_MASK,
29204 IX86_BUILTIN_STOREAPS128_MASK,
29205 IX86_BUILTIN_LOADUPD256_MASK,
29206 IX86_BUILTIN_LOADUPD128_MASK,
29207 IX86_BUILTIN_LOADUPS256_MASK,
29208 IX86_BUILTIN_LOADUPS128_MASK,
29209 IX86_BUILTIN_STOREUPD256_MASK,
29210 IX86_BUILTIN_STOREUPD128_MASK,
29211 IX86_BUILTIN_STOREUPS256_MASK,
29212 IX86_BUILTIN_STOREUPS128_MASK,
29213 IX86_BUILTIN_LOADDQUDI256_MASK,
29214 IX86_BUILTIN_LOADDQUDI128_MASK,
29215 IX86_BUILTIN_LOADDQUSI256_MASK,
29216 IX86_BUILTIN_LOADDQUSI128_MASK,
29217 IX86_BUILTIN_LOADDQUHI256_MASK,
29218 IX86_BUILTIN_LOADDQUHI128_MASK,
29219 IX86_BUILTIN_LOADDQUQI256_MASK,
29220 IX86_BUILTIN_LOADDQUQI128_MASK,
29221 IX86_BUILTIN_STOREDQUDI256_MASK,
29222 IX86_BUILTIN_STOREDQUDI128_MASK,
29223 IX86_BUILTIN_STOREDQUSI256_MASK,
29224 IX86_BUILTIN_STOREDQUSI128_MASK,
29225 IX86_BUILTIN_STOREDQUHI256_MASK,
29226 IX86_BUILTIN_STOREDQUHI128_MASK,
29227 IX86_BUILTIN_STOREDQUQI256_MASK,
29228 IX86_BUILTIN_STOREDQUQI128_MASK,
29229 IX86_BUILTIN_COMPRESSPDSTORE256,
29230 IX86_BUILTIN_COMPRESSPDSTORE128,
29231 IX86_BUILTIN_COMPRESSPSSTORE256,
29232 IX86_BUILTIN_COMPRESSPSSTORE128,
29233 IX86_BUILTIN_PCOMPRESSQSTORE256,
29234 IX86_BUILTIN_PCOMPRESSQSTORE128,
29235 IX86_BUILTIN_PCOMPRESSDSTORE256,
29236 IX86_BUILTIN_PCOMPRESSDSTORE128,
29237 IX86_BUILTIN_EXPANDPDLOAD256,
29238 IX86_BUILTIN_EXPANDPDLOAD128,
29239 IX86_BUILTIN_EXPANDPSLOAD256,
29240 IX86_BUILTIN_EXPANDPSLOAD128,
29241 IX86_BUILTIN_PEXPANDQLOAD256,
29242 IX86_BUILTIN_PEXPANDQLOAD128,
29243 IX86_BUILTIN_PEXPANDDLOAD256,
29244 IX86_BUILTIN_PEXPANDDLOAD128,
29245 IX86_BUILTIN_EXPANDPDLOAD256Z,
29246 IX86_BUILTIN_EXPANDPDLOAD128Z,
29247 IX86_BUILTIN_EXPANDPSLOAD256Z,
29248 IX86_BUILTIN_EXPANDPSLOAD128Z,
29249 IX86_BUILTIN_PEXPANDQLOAD256Z,
29250 IX86_BUILTIN_PEXPANDQLOAD128Z,
29251 IX86_BUILTIN_PEXPANDDLOAD256Z,
29252 IX86_BUILTIN_PEXPANDDLOAD128Z,
29253 IX86_BUILTIN_PALIGNR256_MASK,
29254 IX86_BUILTIN_PALIGNR128_MASK,
29255 IX86_BUILTIN_MOVDQA64_256_MASK,
29256 IX86_BUILTIN_MOVDQA64_128_MASK,
29257 IX86_BUILTIN_MOVDQA32_256_MASK,
29258 IX86_BUILTIN_MOVDQA32_128_MASK,
29259 IX86_BUILTIN_MOVAPD256_MASK,
29260 IX86_BUILTIN_MOVAPD128_MASK,
29261 IX86_BUILTIN_MOVAPS256_MASK,
29262 IX86_BUILTIN_MOVAPS128_MASK,
29263 IX86_BUILTIN_MOVDQUHI256_MASK,
29264 IX86_BUILTIN_MOVDQUHI128_MASK,
29265 IX86_BUILTIN_MOVDQUQI256_MASK,
29266 IX86_BUILTIN_MOVDQUQI128_MASK,
29267 IX86_BUILTIN_MINPS128_MASK,
29268 IX86_BUILTIN_MAXPS128_MASK,
29269 IX86_BUILTIN_MINPD128_MASK,
29270 IX86_BUILTIN_MAXPD128_MASK,
29271 IX86_BUILTIN_MAXPD256_MASK,
29272 IX86_BUILTIN_MAXPS256_MASK,
29273 IX86_BUILTIN_MINPD256_MASK,
29274 IX86_BUILTIN_MINPS256_MASK,
29275 IX86_BUILTIN_MULPS128_MASK,
29276 IX86_BUILTIN_DIVPS128_MASK,
29277 IX86_BUILTIN_MULPD128_MASK,
29278 IX86_BUILTIN_DIVPD128_MASK,
29279 IX86_BUILTIN_DIVPD256_MASK,
29280 IX86_BUILTIN_DIVPS256_MASK,
29281 IX86_BUILTIN_MULPD256_MASK,
29282 IX86_BUILTIN_MULPS256_MASK,
29283 IX86_BUILTIN_ADDPD128_MASK,
29284 IX86_BUILTIN_ADDPD256_MASK,
29285 IX86_BUILTIN_ADDPS128_MASK,
29286 IX86_BUILTIN_ADDPS256_MASK,
29287 IX86_BUILTIN_SUBPD128_MASK,
29288 IX86_BUILTIN_SUBPD256_MASK,
29289 IX86_BUILTIN_SUBPS128_MASK,
29290 IX86_BUILTIN_SUBPS256_MASK,
29291 IX86_BUILTIN_XORPD256_MASK,
29292 IX86_BUILTIN_XORPD128_MASK,
29293 IX86_BUILTIN_XORPS256_MASK,
29294 IX86_BUILTIN_XORPS128_MASK,
29295 IX86_BUILTIN_ORPD256_MASK,
29296 IX86_BUILTIN_ORPD128_MASK,
29297 IX86_BUILTIN_ORPS256_MASK,
29298 IX86_BUILTIN_ORPS128_MASK,
29299 IX86_BUILTIN_BROADCASTF32x2_256,
29300 IX86_BUILTIN_BROADCASTI32x2_256,
29301 IX86_BUILTIN_BROADCASTI32x2_128,
29302 IX86_BUILTIN_BROADCASTF64X2_256,
29303 IX86_BUILTIN_BROADCASTI64X2_256,
29304 IX86_BUILTIN_BROADCASTF32X4_256,
29305 IX86_BUILTIN_BROADCASTI32X4_256,
29306 IX86_BUILTIN_EXTRACTF32X4_256,
29307 IX86_BUILTIN_EXTRACTI32X4_256,
29308 IX86_BUILTIN_DBPSADBW256,
29309 IX86_BUILTIN_DBPSADBW128,
29310 IX86_BUILTIN_CVTTPD2QQ256,
29311 IX86_BUILTIN_CVTTPD2QQ128,
29312 IX86_BUILTIN_CVTTPD2UQQ256,
29313 IX86_BUILTIN_CVTTPD2UQQ128,
29314 IX86_BUILTIN_CVTPD2QQ256,
29315 IX86_BUILTIN_CVTPD2QQ128,
29316 IX86_BUILTIN_CVTPD2UQQ256,
29317 IX86_BUILTIN_CVTPD2UQQ128,
29318 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29319 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29320 IX86_BUILTIN_CVTTPS2QQ256,
29321 IX86_BUILTIN_CVTTPS2QQ128,
29322 IX86_BUILTIN_CVTTPS2UQQ256,
29323 IX86_BUILTIN_CVTTPS2UQQ128,
29324 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29325 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29326 IX86_BUILTIN_CVTTPS2UDQ256,
29327 IX86_BUILTIN_CVTTPS2UDQ128,
29328 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29329 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29330 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29331 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29332 IX86_BUILTIN_CVTPD2DQ256_MASK,
29333 IX86_BUILTIN_CVTPD2DQ128_MASK,
29334 IX86_BUILTIN_CVTDQ2PD256_MASK,
29335 IX86_BUILTIN_CVTDQ2PD128_MASK,
29336 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29337 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29338 IX86_BUILTIN_CVTDQ2PS256_MASK,
29339 IX86_BUILTIN_CVTDQ2PS128_MASK,
29340 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29341 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29342 IX86_BUILTIN_CVTPS2PD256_MASK,
29343 IX86_BUILTIN_CVTPS2PD128_MASK,
29344 IX86_BUILTIN_PBROADCASTB256_MASK,
29345 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29346 IX86_BUILTIN_PBROADCASTB128_MASK,
29347 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29348 IX86_BUILTIN_PBROADCASTW256_MASK,
29349 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29350 IX86_BUILTIN_PBROADCASTW128_MASK,
29351 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29352 IX86_BUILTIN_PBROADCASTD256_MASK,
29353 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29354 IX86_BUILTIN_PBROADCASTD128_MASK,
29355 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29356 IX86_BUILTIN_PBROADCASTQ256_MASK,
29357 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29358 IX86_BUILTIN_PBROADCASTQ128_MASK,
29359 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29360 IX86_BUILTIN_BROADCASTSS256,
29361 IX86_BUILTIN_BROADCASTSS128,
29362 IX86_BUILTIN_BROADCASTSD256,
29363 IX86_BUILTIN_EXTRACTF64X2_256,
29364 IX86_BUILTIN_EXTRACTI64X2_256,
29365 IX86_BUILTIN_INSERTF32X4_256,
29366 IX86_BUILTIN_INSERTI32X4_256,
29367 IX86_BUILTIN_PMOVSXBW256_MASK,
29368 IX86_BUILTIN_PMOVSXBW128_MASK,
29369 IX86_BUILTIN_PMOVSXBD256_MASK,
29370 IX86_BUILTIN_PMOVSXBD128_MASK,
29371 IX86_BUILTIN_PMOVSXBQ256_MASK,
29372 IX86_BUILTIN_PMOVSXBQ128_MASK,
29373 IX86_BUILTIN_PMOVSXWD256_MASK,
29374 IX86_BUILTIN_PMOVSXWD128_MASK,
29375 IX86_BUILTIN_PMOVSXWQ256_MASK,
29376 IX86_BUILTIN_PMOVSXWQ128_MASK,
29377 IX86_BUILTIN_PMOVSXDQ256_MASK,
29378 IX86_BUILTIN_PMOVSXDQ128_MASK,
29379 IX86_BUILTIN_PMOVZXBW256_MASK,
29380 IX86_BUILTIN_PMOVZXBW128_MASK,
29381 IX86_BUILTIN_PMOVZXBD256_MASK,
29382 IX86_BUILTIN_PMOVZXBD128_MASK,
29383 IX86_BUILTIN_PMOVZXBQ256_MASK,
29384 IX86_BUILTIN_PMOVZXBQ128_MASK,
29385 IX86_BUILTIN_PMOVZXWD256_MASK,
29386 IX86_BUILTIN_PMOVZXWD128_MASK,
29387 IX86_BUILTIN_PMOVZXWQ256_MASK,
29388 IX86_BUILTIN_PMOVZXWQ128_MASK,
29389 IX86_BUILTIN_PMOVZXDQ256_MASK,
29390 IX86_BUILTIN_PMOVZXDQ128_MASK,
29391 IX86_BUILTIN_REDUCEPD256_MASK,
29392 IX86_BUILTIN_REDUCEPD128_MASK,
29393 IX86_BUILTIN_REDUCEPS256_MASK,
29394 IX86_BUILTIN_REDUCEPS128_MASK,
29395 IX86_BUILTIN_REDUCESD_MASK,
29396 IX86_BUILTIN_REDUCESS_MASK,
29397 IX86_BUILTIN_VPERMVARHI256_MASK,
29398 IX86_BUILTIN_VPERMVARHI128_MASK,
29399 IX86_BUILTIN_VPERMT2VARHI256,
29400 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29401 IX86_BUILTIN_VPERMT2VARHI128,
29402 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29403 IX86_BUILTIN_VPERMI2VARHI256,
29404 IX86_BUILTIN_VPERMI2VARHI128,
29405 IX86_BUILTIN_RCP14PD256,
29406 IX86_BUILTIN_RCP14PD128,
29407 IX86_BUILTIN_RCP14PS256,
29408 IX86_BUILTIN_RCP14PS128,
29409 IX86_BUILTIN_RSQRT14PD256_MASK,
29410 IX86_BUILTIN_RSQRT14PD128_MASK,
29411 IX86_BUILTIN_RSQRT14PS256_MASK,
29412 IX86_BUILTIN_RSQRT14PS128_MASK,
29413 IX86_BUILTIN_SQRTPD256_MASK,
29414 IX86_BUILTIN_SQRTPD128_MASK,
29415 IX86_BUILTIN_SQRTPS256_MASK,
29416 IX86_BUILTIN_SQRTPS128_MASK,
29417 IX86_BUILTIN_PADDB128_MASK,
29418 IX86_BUILTIN_PADDW128_MASK,
29419 IX86_BUILTIN_PADDD128_MASK,
29420 IX86_BUILTIN_PADDQ128_MASK,
29421 IX86_BUILTIN_PSUBB128_MASK,
29422 IX86_BUILTIN_PSUBW128_MASK,
29423 IX86_BUILTIN_PSUBD128_MASK,
29424 IX86_BUILTIN_PSUBQ128_MASK,
29425 IX86_BUILTIN_PADDSB128_MASK,
29426 IX86_BUILTIN_PADDSW128_MASK,
29427 IX86_BUILTIN_PSUBSB128_MASK,
29428 IX86_BUILTIN_PSUBSW128_MASK,
29429 IX86_BUILTIN_PADDUSB128_MASK,
29430 IX86_BUILTIN_PADDUSW128_MASK,
29431 IX86_BUILTIN_PSUBUSB128_MASK,
29432 IX86_BUILTIN_PSUBUSW128_MASK,
29433 IX86_BUILTIN_PADDB256_MASK,
29434 IX86_BUILTIN_PADDW256_MASK,
29435 IX86_BUILTIN_PADDD256_MASK,
29436 IX86_BUILTIN_PADDQ256_MASK,
29437 IX86_BUILTIN_PADDSB256_MASK,
29438 IX86_BUILTIN_PADDSW256_MASK,
29439 IX86_BUILTIN_PADDUSB256_MASK,
29440 IX86_BUILTIN_PADDUSW256_MASK,
29441 IX86_BUILTIN_PSUBB256_MASK,
29442 IX86_BUILTIN_PSUBW256_MASK,
29443 IX86_BUILTIN_PSUBD256_MASK,
29444 IX86_BUILTIN_PSUBQ256_MASK,
29445 IX86_BUILTIN_PSUBSB256_MASK,
29446 IX86_BUILTIN_PSUBSW256_MASK,
29447 IX86_BUILTIN_PSUBUSB256_MASK,
29448 IX86_BUILTIN_PSUBUSW256_MASK,
29449 IX86_BUILTIN_SHUF_F64x2_256,
29450 IX86_BUILTIN_SHUF_I64x2_256,
29451 IX86_BUILTIN_SHUF_I32x4_256,
29452 IX86_BUILTIN_SHUF_F32x4_256,
29453 IX86_BUILTIN_PMOVWB128,
29454 IX86_BUILTIN_PMOVWB256,
29455 IX86_BUILTIN_PMOVSWB128,
29456 IX86_BUILTIN_PMOVSWB256,
29457 IX86_BUILTIN_PMOVUSWB128,
29458 IX86_BUILTIN_PMOVUSWB256,
29459 IX86_BUILTIN_PMOVDB128,
29460 IX86_BUILTIN_PMOVDB256,
29461 IX86_BUILTIN_PMOVSDB128,
29462 IX86_BUILTIN_PMOVSDB256,
29463 IX86_BUILTIN_PMOVUSDB128,
29464 IX86_BUILTIN_PMOVUSDB256,
29465 IX86_BUILTIN_PMOVDW128,
29466 IX86_BUILTIN_PMOVDW256,
29467 IX86_BUILTIN_PMOVSDW128,
29468 IX86_BUILTIN_PMOVSDW256,
29469 IX86_BUILTIN_PMOVUSDW128,
29470 IX86_BUILTIN_PMOVUSDW256,
29471 IX86_BUILTIN_PMOVQB128,
29472 IX86_BUILTIN_PMOVQB256,
29473 IX86_BUILTIN_PMOVSQB128,
29474 IX86_BUILTIN_PMOVSQB256,
29475 IX86_BUILTIN_PMOVUSQB128,
29476 IX86_BUILTIN_PMOVUSQB256,
29477 IX86_BUILTIN_PMOVQW128,
29478 IX86_BUILTIN_PMOVQW256,
29479 IX86_BUILTIN_PMOVSQW128,
29480 IX86_BUILTIN_PMOVSQW256,
29481 IX86_BUILTIN_PMOVUSQW128,
29482 IX86_BUILTIN_PMOVUSQW256,
29483 IX86_BUILTIN_PMOVQD128,
29484 IX86_BUILTIN_PMOVQD256,
29485 IX86_BUILTIN_PMOVSQD128,
29486 IX86_BUILTIN_PMOVSQD256,
29487 IX86_BUILTIN_PMOVUSQD128,
29488 IX86_BUILTIN_PMOVUSQD256,
29489 IX86_BUILTIN_RANGEPD256,
29490 IX86_BUILTIN_RANGEPD128,
29491 IX86_BUILTIN_RANGEPS256,
29492 IX86_BUILTIN_RANGEPS128,
29493 IX86_BUILTIN_GETEXPPS256,
29494 IX86_BUILTIN_GETEXPPD256,
29495 IX86_BUILTIN_GETEXPPS128,
29496 IX86_BUILTIN_GETEXPPD128,
29497 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29498 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29499 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29500 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29501 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29502 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29503 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29504 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29505 IX86_BUILTIN_PABSQ256,
29506 IX86_BUILTIN_PABSQ128,
29507 IX86_BUILTIN_PABSD256_MASK,
29508 IX86_BUILTIN_PABSD128_MASK,
29509 IX86_BUILTIN_PMULHRSW256_MASK,
29510 IX86_BUILTIN_PMULHRSW128_MASK,
29511 IX86_BUILTIN_PMULHUW128_MASK,
29512 IX86_BUILTIN_PMULHUW256_MASK,
29513 IX86_BUILTIN_PMULHW256_MASK,
29514 IX86_BUILTIN_PMULHW128_MASK,
29515 IX86_BUILTIN_PMULLW256_MASK,
29516 IX86_BUILTIN_PMULLW128_MASK,
29517 IX86_BUILTIN_PMULLQ256,
29518 IX86_BUILTIN_PMULLQ128,
29519 IX86_BUILTIN_ANDPD256_MASK,
29520 IX86_BUILTIN_ANDPD128_MASK,
29521 IX86_BUILTIN_ANDPS256_MASK,
29522 IX86_BUILTIN_ANDPS128_MASK,
29523 IX86_BUILTIN_ANDNPD256_MASK,
29524 IX86_BUILTIN_ANDNPD128_MASK,
29525 IX86_BUILTIN_ANDNPS256_MASK,
29526 IX86_BUILTIN_ANDNPS128_MASK,
29527 IX86_BUILTIN_PSLLWI128_MASK,
29528 IX86_BUILTIN_PSLLDI128_MASK,
29529 IX86_BUILTIN_PSLLQI128_MASK,
29530 IX86_BUILTIN_PSLLW128_MASK,
29531 IX86_BUILTIN_PSLLD128_MASK,
29532 IX86_BUILTIN_PSLLQ128_MASK,
29533 IX86_BUILTIN_PSLLWI256_MASK ,
29534 IX86_BUILTIN_PSLLW256_MASK,
29535 IX86_BUILTIN_PSLLDI256_MASK,
29536 IX86_BUILTIN_PSLLD256_MASK,
29537 IX86_BUILTIN_PSLLQI256_MASK,
29538 IX86_BUILTIN_PSLLQ256_MASK,
29539 IX86_BUILTIN_PSRADI128_MASK,
29540 IX86_BUILTIN_PSRAD128_MASK,
29541 IX86_BUILTIN_PSRADI256_MASK,
29542 IX86_BUILTIN_PSRAD256_MASK,
29543 IX86_BUILTIN_PSRAQI128_MASK,
29544 IX86_BUILTIN_PSRAQ128_MASK,
29545 IX86_BUILTIN_PSRAQI256_MASK,
29546 IX86_BUILTIN_PSRAQ256_MASK,
29547 IX86_BUILTIN_PANDD256,
29548 IX86_BUILTIN_PANDD128,
29549 IX86_BUILTIN_PSRLDI128_MASK,
29550 IX86_BUILTIN_PSRLD128_MASK,
29551 IX86_BUILTIN_PSRLDI256_MASK,
29552 IX86_BUILTIN_PSRLD256_MASK,
29553 IX86_BUILTIN_PSRLQI128_MASK,
29554 IX86_BUILTIN_PSRLQ128_MASK,
29555 IX86_BUILTIN_PSRLQI256_MASK,
29556 IX86_BUILTIN_PSRLQ256_MASK,
29557 IX86_BUILTIN_PANDQ256,
29558 IX86_BUILTIN_PANDQ128,
29559 IX86_BUILTIN_PANDND256,
29560 IX86_BUILTIN_PANDND128,
29561 IX86_BUILTIN_PANDNQ256,
29562 IX86_BUILTIN_PANDNQ128,
29563 IX86_BUILTIN_PORD256,
29564 IX86_BUILTIN_PORD128,
29565 IX86_BUILTIN_PORQ256,
29566 IX86_BUILTIN_PORQ128,
29567 IX86_BUILTIN_PXORD256,
29568 IX86_BUILTIN_PXORD128,
29569 IX86_BUILTIN_PXORQ256,
29570 IX86_BUILTIN_PXORQ128,
29571 IX86_BUILTIN_PACKSSWB256_MASK,
29572 IX86_BUILTIN_PACKSSWB128_MASK,
29573 IX86_BUILTIN_PACKUSWB256_MASK,
29574 IX86_BUILTIN_PACKUSWB128_MASK,
29575 IX86_BUILTIN_RNDSCALEPS256,
29576 IX86_BUILTIN_RNDSCALEPD256,
29577 IX86_BUILTIN_RNDSCALEPS128,
29578 IX86_BUILTIN_RNDSCALEPD128,
29579 IX86_BUILTIN_VTERNLOGQ256_MASK,
29580 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29581 IX86_BUILTIN_VTERNLOGD256_MASK,
29582 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29583 IX86_BUILTIN_VTERNLOGQ128_MASK,
29584 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29585 IX86_BUILTIN_VTERNLOGD128_MASK,
29586 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29587 IX86_BUILTIN_SCALEFPD256,
29588 IX86_BUILTIN_SCALEFPS256,
29589 IX86_BUILTIN_SCALEFPD128,
29590 IX86_BUILTIN_SCALEFPS128,
29591 IX86_BUILTIN_VFMADDPD256_MASK,
29592 IX86_BUILTIN_VFMADDPD256_MASK3,
29593 IX86_BUILTIN_VFMADDPD256_MASKZ,
29594 IX86_BUILTIN_VFMADDPD128_MASK,
29595 IX86_BUILTIN_VFMADDPD128_MASK3,
29596 IX86_BUILTIN_VFMADDPD128_MASKZ,
29597 IX86_BUILTIN_VFMADDPS256_MASK,
29598 IX86_BUILTIN_VFMADDPS256_MASK3,
29599 IX86_BUILTIN_VFMADDPS256_MASKZ,
29600 IX86_BUILTIN_VFMADDPS128_MASK,
29601 IX86_BUILTIN_VFMADDPS128_MASK3,
29602 IX86_BUILTIN_VFMADDPS128_MASKZ,
29603 IX86_BUILTIN_VFMSUBPD256_MASK3,
29604 IX86_BUILTIN_VFMSUBPD128_MASK3,
29605 IX86_BUILTIN_VFMSUBPS256_MASK3,
29606 IX86_BUILTIN_VFMSUBPS128_MASK3,
29607 IX86_BUILTIN_VFNMADDPD256_MASK,
29608 IX86_BUILTIN_VFNMADDPD128_MASK,
29609 IX86_BUILTIN_VFNMADDPS256_MASK,
29610 IX86_BUILTIN_VFNMADDPS128_MASK,
29611 IX86_BUILTIN_VFNMSUBPD256_MASK,
29612 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29613 IX86_BUILTIN_VFNMSUBPD128_MASK,
29614 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29615 IX86_BUILTIN_VFNMSUBPS256_MASK,
29616 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29617 IX86_BUILTIN_VFNMSUBPS128_MASK,
29618 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29619 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29620 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29621 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29622 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29623 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29624 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29625 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29626 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29627 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29628 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29629 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29630 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29631 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29632 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29633 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29634 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29635 IX86_BUILTIN_INSERTF64X2_256,
29636 IX86_BUILTIN_INSERTI64X2_256,
29637 IX86_BUILTIN_PSRAVV16HI,
29638 IX86_BUILTIN_PSRAVV8HI,
29639 IX86_BUILTIN_PMADDUBSW256_MASK,
29640 IX86_BUILTIN_PMADDUBSW128_MASK,
29641 IX86_BUILTIN_PMADDWD256_MASK,
29642 IX86_BUILTIN_PMADDWD128_MASK,
29643 IX86_BUILTIN_PSRLVV16HI,
29644 IX86_BUILTIN_PSRLVV8HI,
29645 IX86_BUILTIN_CVTPS2DQ256_MASK,
29646 IX86_BUILTIN_CVTPS2DQ128_MASK,
29647 IX86_BUILTIN_CVTPS2UDQ256,
29648 IX86_BUILTIN_CVTPS2UDQ128,
29649 IX86_BUILTIN_CVTPS2QQ256,
29650 IX86_BUILTIN_CVTPS2QQ128,
29651 IX86_BUILTIN_CVTPS2UQQ256,
29652 IX86_BUILTIN_CVTPS2UQQ128,
29653 IX86_BUILTIN_GETMANTPS256,
29654 IX86_BUILTIN_GETMANTPS128,
29655 IX86_BUILTIN_GETMANTPD256,
29656 IX86_BUILTIN_GETMANTPD128,
29657 IX86_BUILTIN_MOVDDUP256_MASK,
29658 IX86_BUILTIN_MOVDDUP128_MASK,
29659 IX86_BUILTIN_MOVSHDUP256_MASK,
29660 IX86_BUILTIN_MOVSHDUP128_MASK,
29661 IX86_BUILTIN_MOVSLDUP256_MASK,
29662 IX86_BUILTIN_MOVSLDUP128_MASK,
29663 IX86_BUILTIN_CVTQQ2PS256,
29664 IX86_BUILTIN_CVTQQ2PS128,
29665 IX86_BUILTIN_CVTUQQ2PS256,
29666 IX86_BUILTIN_CVTUQQ2PS128,
29667 IX86_BUILTIN_CVTQQ2PD256,
29668 IX86_BUILTIN_CVTQQ2PD128,
29669 IX86_BUILTIN_CVTUQQ2PD256,
29670 IX86_BUILTIN_CVTUQQ2PD128,
29671 IX86_BUILTIN_VPERMT2VARQ256,
29672 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29673 IX86_BUILTIN_VPERMT2VARD256,
29674 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29675 IX86_BUILTIN_VPERMI2VARQ256,
29676 IX86_BUILTIN_VPERMI2VARD256,
29677 IX86_BUILTIN_VPERMT2VARPD256,
29678 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29679 IX86_BUILTIN_VPERMT2VARPS256,
29680 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29681 IX86_BUILTIN_VPERMI2VARPD256,
29682 IX86_BUILTIN_VPERMI2VARPS256,
29683 IX86_BUILTIN_VPERMT2VARQ128,
29684 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29685 IX86_BUILTIN_VPERMT2VARD128,
29686 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29687 IX86_BUILTIN_VPERMI2VARQ128,
29688 IX86_BUILTIN_VPERMI2VARD128,
29689 IX86_BUILTIN_VPERMT2VARPD128,
29690 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29691 IX86_BUILTIN_VPERMT2VARPS128,
29692 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29693 IX86_BUILTIN_VPERMI2VARPD128,
29694 IX86_BUILTIN_VPERMI2VARPS128,
29695 IX86_BUILTIN_PSHUFB256_MASK,
29696 IX86_BUILTIN_PSHUFB128_MASK,
29697 IX86_BUILTIN_PSHUFHW256_MASK,
29698 IX86_BUILTIN_PSHUFHW128_MASK,
29699 IX86_BUILTIN_PSHUFLW256_MASK,
29700 IX86_BUILTIN_PSHUFLW128_MASK,
29701 IX86_BUILTIN_PSHUFD256_MASK,
29702 IX86_BUILTIN_PSHUFD128_MASK,
29703 IX86_BUILTIN_SHUFPD256_MASK,
29704 IX86_BUILTIN_SHUFPD128_MASK,
29705 IX86_BUILTIN_SHUFPS256_MASK,
29706 IX86_BUILTIN_SHUFPS128_MASK,
29707 IX86_BUILTIN_PROLVQ256,
29708 IX86_BUILTIN_PROLVQ128,
29709 IX86_BUILTIN_PROLQ256,
29710 IX86_BUILTIN_PROLQ128,
29711 IX86_BUILTIN_PRORVQ256,
29712 IX86_BUILTIN_PRORVQ128,
29713 IX86_BUILTIN_PRORQ256,
29714 IX86_BUILTIN_PRORQ128,
29715 IX86_BUILTIN_PSRAVQ128,
29716 IX86_BUILTIN_PSRAVQ256,
29717 IX86_BUILTIN_PSLLVV4DI_MASK,
29718 IX86_BUILTIN_PSLLVV2DI_MASK,
29719 IX86_BUILTIN_PSLLVV8SI_MASK,
29720 IX86_BUILTIN_PSLLVV4SI_MASK,
29721 IX86_BUILTIN_PSRAVV8SI_MASK,
29722 IX86_BUILTIN_PSRAVV4SI_MASK,
29723 IX86_BUILTIN_PSRLVV4DI_MASK,
29724 IX86_BUILTIN_PSRLVV2DI_MASK,
29725 IX86_BUILTIN_PSRLVV8SI_MASK,
29726 IX86_BUILTIN_PSRLVV4SI_MASK,
29727 IX86_BUILTIN_PSRAWI256_MASK,
29728 IX86_BUILTIN_PSRAW256_MASK,
29729 IX86_BUILTIN_PSRAWI128_MASK,
29730 IX86_BUILTIN_PSRAW128_MASK,
29731 IX86_BUILTIN_PSRLWI256_MASK,
29732 IX86_BUILTIN_PSRLW256_MASK,
29733 IX86_BUILTIN_PSRLWI128_MASK,
29734 IX86_BUILTIN_PSRLW128_MASK,
29735 IX86_BUILTIN_PRORVD256,
29736 IX86_BUILTIN_PROLVD256,
29737 IX86_BUILTIN_PRORD256,
29738 IX86_BUILTIN_PROLD256,
29739 IX86_BUILTIN_PRORVD128,
29740 IX86_BUILTIN_PROLVD128,
29741 IX86_BUILTIN_PRORD128,
29742 IX86_BUILTIN_PROLD128,
29743 IX86_BUILTIN_FPCLASSPD256,
29744 IX86_BUILTIN_FPCLASSPD128,
29745 IX86_BUILTIN_FPCLASSSD,
29746 IX86_BUILTIN_FPCLASSPS256,
29747 IX86_BUILTIN_FPCLASSPS128,
29748 IX86_BUILTIN_FPCLASSSS,
29749 IX86_BUILTIN_CVTB2MASK128,
29750 IX86_BUILTIN_CVTB2MASK256,
29751 IX86_BUILTIN_CVTW2MASK128,
29752 IX86_BUILTIN_CVTW2MASK256,
29753 IX86_BUILTIN_CVTD2MASK128,
29754 IX86_BUILTIN_CVTD2MASK256,
29755 IX86_BUILTIN_CVTQ2MASK128,
29756 IX86_BUILTIN_CVTQ2MASK256,
29757 IX86_BUILTIN_CVTMASK2B128,
29758 IX86_BUILTIN_CVTMASK2B256,
29759 IX86_BUILTIN_CVTMASK2W128,
29760 IX86_BUILTIN_CVTMASK2W256,
29761 IX86_BUILTIN_CVTMASK2D128,
29762 IX86_BUILTIN_CVTMASK2D256,
29763 IX86_BUILTIN_CVTMASK2Q128,
29764 IX86_BUILTIN_CVTMASK2Q256,
29765 IX86_BUILTIN_PCMPEQB128_MASK,
29766 IX86_BUILTIN_PCMPEQB256_MASK,
29767 IX86_BUILTIN_PCMPEQW128_MASK,
29768 IX86_BUILTIN_PCMPEQW256_MASK,
29769 IX86_BUILTIN_PCMPEQD128_MASK,
29770 IX86_BUILTIN_PCMPEQD256_MASK,
29771 IX86_BUILTIN_PCMPEQQ128_MASK,
29772 IX86_BUILTIN_PCMPEQQ256_MASK,
29773 IX86_BUILTIN_PCMPGTB128_MASK,
29774 IX86_BUILTIN_PCMPGTB256_MASK,
29775 IX86_BUILTIN_PCMPGTW128_MASK,
29776 IX86_BUILTIN_PCMPGTW256_MASK,
29777 IX86_BUILTIN_PCMPGTD128_MASK,
29778 IX86_BUILTIN_PCMPGTD256_MASK,
29779 IX86_BUILTIN_PCMPGTQ128_MASK,
29780 IX86_BUILTIN_PCMPGTQ256_MASK,
29781 IX86_BUILTIN_PTESTMB128,
29782 IX86_BUILTIN_PTESTMB256,
29783 IX86_BUILTIN_PTESTMW128,
29784 IX86_BUILTIN_PTESTMW256,
29785 IX86_BUILTIN_PTESTMD128,
29786 IX86_BUILTIN_PTESTMD256,
29787 IX86_BUILTIN_PTESTMQ128,
29788 IX86_BUILTIN_PTESTMQ256,
29789 IX86_BUILTIN_PTESTNMB128,
29790 IX86_BUILTIN_PTESTNMB256,
29791 IX86_BUILTIN_PTESTNMW128,
29792 IX86_BUILTIN_PTESTNMW256,
29793 IX86_BUILTIN_PTESTNMD128,
29794 IX86_BUILTIN_PTESTNMD256,
29795 IX86_BUILTIN_PTESTNMQ128,
29796 IX86_BUILTIN_PTESTNMQ256,
29797 IX86_BUILTIN_PBROADCASTMB128,
29798 IX86_BUILTIN_PBROADCASTMB256,
29799 IX86_BUILTIN_PBROADCASTMW128,
29800 IX86_BUILTIN_PBROADCASTMW256,
29801 IX86_BUILTIN_COMPRESSPD256,
29802 IX86_BUILTIN_COMPRESSPD128,
29803 IX86_BUILTIN_COMPRESSPS256,
29804 IX86_BUILTIN_COMPRESSPS128,
29805 IX86_BUILTIN_PCOMPRESSQ256,
29806 IX86_BUILTIN_PCOMPRESSQ128,
29807 IX86_BUILTIN_PCOMPRESSD256,
29808 IX86_BUILTIN_PCOMPRESSD128,
29809 IX86_BUILTIN_EXPANDPD256,
29810 IX86_BUILTIN_EXPANDPD128,
29811 IX86_BUILTIN_EXPANDPS256,
29812 IX86_BUILTIN_EXPANDPS128,
29813 IX86_BUILTIN_PEXPANDQ256,
29814 IX86_BUILTIN_PEXPANDQ128,
29815 IX86_BUILTIN_PEXPANDD256,
29816 IX86_BUILTIN_PEXPANDD128,
29817 IX86_BUILTIN_EXPANDPD256Z,
29818 IX86_BUILTIN_EXPANDPD128Z,
29819 IX86_BUILTIN_EXPANDPS256Z,
29820 IX86_BUILTIN_EXPANDPS128Z,
29821 IX86_BUILTIN_PEXPANDQ256Z,
29822 IX86_BUILTIN_PEXPANDQ128Z,
29823 IX86_BUILTIN_PEXPANDD256Z,
29824 IX86_BUILTIN_PEXPANDD128Z,
29825 IX86_BUILTIN_PMAXSD256_MASK,
29826 IX86_BUILTIN_PMINSD256_MASK,
29827 IX86_BUILTIN_PMAXUD256_MASK,
29828 IX86_BUILTIN_PMINUD256_MASK,
29829 IX86_BUILTIN_PMAXSD128_MASK,
29830 IX86_BUILTIN_PMINSD128_MASK,
29831 IX86_BUILTIN_PMAXUD128_MASK,
29832 IX86_BUILTIN_PMINUD128_MASK,
29833 IX86_BUILTIN_PMAXSQ256_MASK,
29834 IX86_BUILTIN_PMINSQ256_MASK,
29835 IX86_BUILTIN_PMAXUQ256_MASK,
29836 IX86_BUILTIN_PMINUQ256_MASK,
29837 IX86_BUILTIN_PMAXSQ128_MASK,
29838 IX86_BUILTIN_PMINSQ128_MASK,
29839 IX86_BUILTIN_PMAXUQ128_MASK,
29840 IX86_BUILTIN_PMINUQ128_MASK,
29841 IX86_BUILTIN_PMINSB256_MASK,
29842 IX86_BUILTIN_PMINUB256_MASK,
29843 IX86_BUILTIN_PMAXSB256_MASK,
29844 IX86_BUILTIN_PMAXUB256_MASK,
29845 IX86_BUILTIN_PMINSB128_MASK,
29846 IX86_BUILTIN_PMINUB128_MASK,
29847 IX86_BUILTIN_PMAXSB128_MASK,
29848 IX86_BUILTIN_PMAXUB128_MASK,
29849 IX86_BUILTIN_PMINSW256_MASK,
29850 IX86_BUILTIN_PMINUW256_MASK,
29851 IX86_BUILTIN_PMAXSW256_MASK,
29852 IX86_BUILTIN_PMAXUW256_MASK,
29853 IX86_BUILTIN_PMINSW128_MASK,
29854 IX86_BUILTIN_PMINUW128_MASK,
29855 IX86_BUILTIN_PMAXSW128_MASK,
29856 IX86_BUILTIN_PMAXUW128_MASK,
29857 IX86_BUILTIN_VPCONFLICTQ256,
29858 IX86_BUILTIN_VPCONFLICTD256,
29859 IX86_BUILTIN_VPCLZCNTQ256,
29860 IX86_BUILTIN_VPCLZCNTD256,
29861 IX86_BUILTIN_UNPCKHPD256_MASK,
29862 IX86_BUILTIN_UNPCKHPD128_MASK,
29863 IX86_BUILTIN_UNPCKHPS256_MASK,
29864 IX86_BUILTIN_UNPCKHPS128_MASK,
29865 IX86_BUILTIN_UNPCKLPD256_MASK,
29866 IX86_BUILTIN_UNPCKLPD128_MASK,
29867 IX86_BUILTIN_UNPCKLPS256_MASK,
29868 IX86_BUILTIN_VPCONFLICTQ128,
29869 IX86_BUILTIN_VPCONFLICTD128,
29870 IX86_BUILTIN_VPCLZCNTQ128,
29871 IX86_BUILTIN_VPCLZCNTD128,
29872 IX86_BUILTIN_UNPCKLPS128_MASK,
29873 IX86_BUILTIN_ALIGND256,
29874 IX86_BUILTIN_ALIGNQ256,
29875 IX86_BUILTIN_ALIGND128,
29876 IX86_BUILTIN_ALIGNQ128,
29877 IX86_BUILTIN_CVTPS2PH256_MASK,
29878 IX86_BUILTIN_CVTPS2PH_MASK,
29879 IX86_BUILTIN_CVTPH2PS_MASK,
29880 IX86_BUILTIN_CVTPH2PS256_MASK,
29881 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29882 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29883 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29884 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29885 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29886 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29887 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29888 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29889 IX86_BUILTIN_PUNPCKHBW128_MASK,
29890 IX86_BUILTIN_PUNPCKHBW256_MASK,
29891 IX86_BUILTIN_PUNPCKHWD128_MASK,
29892 IX86_BUILTIN_PUNPCKHWD256_MASK,
29893 IX86_BUILTIN_PUNPCKLBW128_MASK,
29894 IX86_BUILTIN_PUNPCKLBW256_MASK,
29895 IX86_BUILTIN_PUNPCKLWD128_MASK,
29896 IX86_BUILTIN_PUNPCKLWD256_MASK,
29897 IX86_BUILTIN_PSLLVV16HI,
29898 IX86_BUILTIN_PSLLVV8HI,
29899 IX86_BUILTIN_PACKSSDW256_MASK,
29900 IX86_BUILTIN_PACKSSDW128_MASK,
29901 IX86_BUILTIN_PACKUSDW256_MASK,
29902 IX86_BUILTIN_PACKUSDW128_MASK,
29903 IX86_BUILTIN_PAVGB256_MASK,
29904 IX86_BUILTIN_PAVGW256_MASK,
29905 IX86_BUILTIN_PAVGB128_MASK,
29906 IX86_BUILTIN_PAVGW128_MASK,
29907 IX86_BUILTIN_VPERMVARSF256_MASK,
29908 IX86_BUILTIN_VPERMVARDF256_MASK,
29909 IX86_BUILTIN_VPERMDF256_MASK,
29910 IX86_BUILTIN_PABSB256_MASK,
29911 IX86_BUILTIN_PABSB128_MASK,
29912 IX86_BUILTIN_PABSW256_MASK,
29913 IX86_BUILTIN_PABSW128_MASK,
29914 IX86_BUILTIN_VPERMILVARPD_MASK,
29915 IX86_BUILTIN_VPERMILVARPS_MASK,
29916 IX86_BUILTIN_VPERMILVARPD256_MASK,
29917 IX86_BUILTIN_VPERMILVARPS256_MASK,
29918 IX86_BUILTIN_VPERMILPD_MASK,
29919 IX86_BUILTIN_VPERMILPS_MASK,
29920 IX86_BUILTIN_VPERMILPD256_MASK,
29921 IX86_BUILTIN_VPERMILPS256_MASK,
29922 IX86_BUILTIN_BLENDMQ256,
29923 IX86_BUILTIN_BLENDMD256,
29924 IX86_BUILTIN_BLENDMPD256,
29925 IX86_BUILTIN_BLENDMPS256,
29926 IX86_BUILTIN_BLENDMQ128,
29927 IX86_BUILTIN_BLENDMD128,
29928 IX86_BUILTIN_BLENDMPD128,
29929 IX86_BUILTIN_BLENDMPS128,
29930 IX86_BUILTIN_BLENDMW256,
29931 IX86_BUILTIN_BLENDMB256,
29932 IX86_BUILTIN_BLENDMW128,
29933 IX86_BUILTIN_BLENDMB128,
29934 IX86_BUILTIN_PMULLD256_MASK,
29935 IX86_BUILTIN_PMULLD128_MASK,
29936 IX86_BUILTIN_PMULUDQ256_MASK,
29937 IX86_BUILTIN_PMULDQ256_MASK,
29938 IX86_BUILTIN_PMULDQ128_MASK,
29939 IX86_BUILTIN_PMULUDQ128_MASK,
29940 IX86_BUILTIN_CVTPD2PS256_MASK,
29941 IX86_BUILTIN_CVTPD2PS_MASK,
29942 IX86_BUILTIN_VPERMVARSI256_MASK,
29943 IX86_BUILTIN_VPERMVARDI256_MASK,
29944 IX86_BUILTIN_VPERMDI256_MASK,
29945 IX86_BUILTIN_CMPQ256,
29946 IX86_BUILTIN_CMPD256,
29947 IX86_BUILTIN_UCMPQ256,
29948 IX86_BUILTIN_UCMPD256,
29949 IX86_BUILTIN_CMPB256,
29950 IX86_BUILTIN_CMPW256,
29951 IX86_BUILTIN_UCMPB256,
29952 IX86_BUILTIN_UCMPW256,
29953 IX86_BUILTIN_CMPPD256_MASK,
29954 IX86_BUILTIN_CMPPS256_MASK,
29955 IX86_BUILTIN_CMPQ128,
29956 IX86_BUILTIN_CMPD128,
29957 IX86_BUILTIN_UCMPQ128,
29958 IX86_BUILTIN_UCMPD128,
29959 IX86_BUILTIN_CMPB128,
29960 IX86_BUILTIN_CMPW128,
29961 IX86_BUILTIN_UCMPB128,
29962 IX86_BUILTIN_UCMPW128,
29963 IX86_BUILTIN_CMPPD128_MASK,
29964 IX86_BUILTIN_CMPPS128_MASK,
29965
29966 IX86_BUILTIN_GATHER3SIV8SF,
29967 IX86_BUILTIN_GATHER3SIV4SF,
29968 IX86_BUILTIN_GATHER3SIV4DF,
29969 IX86_BUILTIN_GATHER3SIV2DF,
29970 IX86_BUILTIN_GATHER3DIV8SF,
29971 IX86_BUILTIN_GATHER3DIV4SF,
29972 IX86_BUILTIN_GATHER3DIV4DF,
29973 IX86_BUILTIN_GATHER3DIV2DF,
29974 IX86_BUILTIN_GATHER3SIV8SI,
29975 IX86_BUILTIN_GATHER3SIV4SI,
29976 IX86_BUILTIN_GATHER3SIV4DI,
29977 IX86_BUILTIN_GATHER3SIV2DI,
29978 IX86_BUILTIN_GATHER3DIV8SI,
29979 IX86_BUILTIN_GATHER3DIV4SI,
29980 IX86_BUILTIN_GATHER3DIV4DI,
29981 IX86_BUILTIN_GATHER3DIV2DI,
29982 IX86_BUILTIN_SCATTERSIV8SF,
29983 IX86_BUILTIN_SCATTERSIV4SF,
29984 IX86_BUILTIN_SCATTERSIV4DF,
29985 IX86_BUILTIN_SCATTERSIV2DF,
29986 IX86_BUILTIN_SCATTERDIV8SF,
29987 IX86_BUILTIN_SCATTERDIV4SF,
29988 IX86_BUILTIN_SCATTERDIV4DF,
29989 IX86_BUILTIN_SCATTERDIV2DF,
29990 IX86_BUILTIN_SCATTERSIV8SI,
29991 IX86_BUILTIN_SCATTERSIV4SI,
29992 IX86_BUILTIN_SCATTERSIV4DI,
29993 IX86_BUILTIN_SCATTERSIV2DI,
29994 IX86_BUILTIN_SCATTERDIV8SI,
29995 IX86_BUILTIN_SCATTERDIV4SI,
29996 IX86_BUILTIN_SCATTERDIV4DI,
29997 IX86_BUILTIN_SCATTERDIV2DI,
29998
29999 /* AVX512DQ. */
30000 IX86_BUILTIN_RANGESD128,
30001 IX86_BUILTIN_RANGESS128,
30002 IX86_BUILTIN_KUNPCKWD,
30003 IX86_BUILTIN_KUNPCKDQ,
30004 IX86_BUILTIN_BROADCASTF32x2_512,
30005 IX86_BUILTIN_BROADCASTI32x2_512,
30006 IX86_BUILTIN_BROADCASTF64X2_512,
30007 IX86_BUILTIN_BROADCASTI64X2_512,
30008 IX86_BUILTIN_BROADCASTF32X8_512,
30009 IX86_BUILTIN_BROADCASTI32X8_512,
30010 IX86_BUILTIN_EXTRACTF64X2_512,
30011 IX86_BUILTIN_EXTRACTF32X8,
30012 IX86_BUILTIN_EXTRACTI64X2_512,
30013 IX86_BUILTIN_EXTRACTI32X8,
30014 IX86_BUILTIN_REDUCEPD512_MASK,
30015 IX86_BUILTIN_REDUCEPS512_MASK,
30016 IX86_BUILTIN_PMULLQ512,
30017 IX86_BUILTIN_XORPD512,
30018 IX86_BUILTIN_XORPS512,
30019 IX86_BUILTIN_ORPD512,
30020 IX86_BUILTIN_ORPS512,
30021 IX86_BUILTIN_ANDPD512,
30022 IX86_BUILTIN_ANDPS512,
30023 IX86_BUILTIN_ANDNPD512,
30024 IX86_BUILTIN_ANDNPS512,
30025 IX86_BUILTIN_INSERTF32X8,
30026 IX86_BUILTIN_INSERTI32X8,
30027 IX86_BUILTIN_INSERTF64X2_512,
30028 IX86_BUILTIN_INSERTI64X2_512,
30029 IX86_BUILTIN_FPCLASSPD512,
30030 IX86_BUILTIN_FPCLASSPS512,
30031 IX86_BUILTIN_CVTD2MASK512,
30032 IX86_BUILTIN_CVTQ2MASK512,
30033 IX86_BUILTIN_CVTMASK2D512,
30034 IX86_BUILTIN_CVTMASK2Q512,
30035 IX86_BUILTIN_CVTPD2QQ512,
30036 IX86_BUILTIN_CVTPS2QQ512,
30037 IX86_BUILTIN_CVTPD2UQQ512,
30038 IX86_BUILTIN_CVTPS2UQQ512,
30039 IX86_BUILTIN_CVTQQ2PS512,
30040 IX86_BUILTIN_CVTUQQ2PS512,
30041 IX86_BUILTIN_CVTQQ2PD512,
30042 IX86_BUILTIN_CVTUQQ2PD512,
30043 IX86_BUILTIN_CVTTPS2QQ512,
30044 IX86_BUILTIN_CVTTPS2UQQ512,
30045 IX86_BUILTIN_CVTTPD2QQ512,
30046 IX86_BUILTIN_CVTTPD2UQQ512,
30047 IX86_BUILTIN_RANGEPS512,
30048 IX86_BUILTIN_RANGEPD512,
30049
30050 /* AVX512BW. */
30051 IX86_BUILTIN_PACKUSDW512,
30052 IX86_BUILTIN_PACKSSDW512,
30053 IX86_BUILTIN_LOADDQUHI512_MASK,
30054 IX86_BUILTIN_LOADDQUQI512_MASK,
30055 IX86_BUILTIN_PSLLDQ512,
30056 IX86_BUILTIN_PSRLDQ512,
30057 IX86_BUILTIN_STOREDQUHI512_MASK,
30058 IX86_BUILTIN_STOREDQUQI512_MASK,
30059 IX86_BUILTIN_PALIGNR512,
30060 IX86_BUILTIN_PALIGNR512_MASK,
30061 IX86_BUILTIN_MOVDQUHI512_MASK,
30062 IX86_BUILTIN_MOVDQUQI512_MASK,
30063 IX86_BUILTIN_PSADBW512,
30064 IX86_BUILTIN_DBPSADBW512,
30065 IX86_BUILTIN_PBROADCASTB512,
30066 IX86_BUILTIN_PBROADCASTB512_GPR,
30067 IX86_BUILTIN_PBROADCASTW512,
30068 IX86_BUILTIN_PBROADCASTW512_GPR,
30069 IX86_BUILTIN_PMOVSXBW512_MASK,
30070 IX86_BUILTIN_PMOVZXBW512_MASK,
30071 IX86_BUILTIN_VPERMVARHI512_MASK,
30072 IX86_BUILTIN_VPERMT2VARHI512,
30073 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30074 IX86_BUILTIN_VPERMI2VARHI512,
30075 IX86_BUILTIN_PAVGB512,
30076 IX86_BUILTIN_PAVGW512,
30077 IX86_BUILTIN_PADDB512,
30078 IX86_BUILTIN_PSUBB512,
30079 IX86_BUILTIN_PSUBSB512,
30080 IX86_BUILTIN_PADDSB512,
30081 IX86_BUILTIN_PSUBUSB512,
30082 IX86_BUILTIN_PADDUSB512,
30083 IX86_BUILTIN_PSUBW512,
30084 IX86_BUILTIN_PADDW512,
30085 IX86_BUILTIN_PSUBSW512,
30086 IX86_BUILTIN_PADDSW512,
30087 IX86_BUILTIN_PSUBUSW512,
30088 IX86_BUILTIN_PADDUSW512,
30089 IX86_BUILTIN_PMAXUW512,
30090 IX86_BUILTIN_PMAXSW512,
30091 IX86_BUILTIN_PMINUW512,
30092 IX86_BUILTIN_PMINSW512,
30093 IX86_BUILTIN_PMAXUB512,
30094 IX86_BUILTIN_PMAXSB512,
30095 IX86_BUILTIN_PMINUB512,
30096 IX86_BUILTIN_PMINSB512,
30097 IX86_BUILTIN_PMOVWB512,
30098 IX86_BUILTIN_PMOVSWB512,
30099 IX86_BUILTIN_PMOVUSWB512,
30100 IX86_BUILTIN_PMULHRSW512_MASK,
30101 IX86_BUILTIN_PMULHUW512_MASK,
30102 IX86_BUILTIN_PMULHW512_MASK,
30103 IX86_BUILTIN_PMULLW512_MASK,
30104 IX86_BUILTIN_PSLLWI512_MASK,
30105 IX86_BUILTIN_PSLLW512_MASK,
30106 IX86_BUILTIN_PACKSSWB512,
30107 IX86_BUILTIN_PACKUSWB512,
30108 IX86_BUILTIN_PSRAVV32HI,
30109 IX86_BUILTIN_PMADDUBSW512_MASK,
30110 IX86_BUILTIN_PMADDWD512_MASK,
30111 IX86_BUILTIN_PSRLVV32HI,
30112 IX86_BUILTIN_PUNPCKHBW512,
30113 IX86_BUILTIN_PUNPCKHWD512,
30114 IX86_BUILTIN_PUNPCKLBW512,
30115 IX86_BUILTIN_PUNPCKLWD512,
30116 IX86_BUILTIN_PSHUFB512,
30117 IX86_BUILTIN_PSHUFHW512,
30118 IX86_BUILTIN_PSHUFLW512,
30119 IX86_BUILTIN_PSRAWI512,
30120 IX86_BUILTIN_PSRAW512,
30121 IX86_BUILTIN_PSRLWI512,
30122 IX86_BUILTIN_PSRLW512,
30123 IX86_BUILTIN_CVTB2MASK512,
30124 IX86_BUILTIN_CVTW2MASK512,
30125 IX86_BUILTIN_CVTMASK2B512,
30126 IX86_BUILTIN_CVTMASK2W512,
30127 IX86_BUILTIN_PCMPEQB512_MASK,
30128 IX86_BUILTIN_PCMPEQW512_MASK,
30129 IX86_BUILTIN_PCMPGTB512_MASK,
30130 IX86_BUILTIN_PCMPGTW512_MASK,
30131 IX86_BUILTIN_PTESTMB512,
30132 IX86_BUILTIN_PTESTMW512,
30133 IX86_BUILTIN_PTESTNMB512,
30134 IX86_BUILTIN_PTESTNMW512,
30135 IX86_BUILTIN_PSLLVV32HI,
30136 IX86_BUILTIN_PABSB512,
30137 IX86_BUILTIN_PABSW512,
30138 IX86_BUILTIN_BLENDMW512,
30139 IX86_BUILTIN_BLENDMB512,
30140 IX86_BUILTIN_CMPB512,
30141 IX86_BUILTIN_CMPW512,
30142 IX86_BUILTIN_UCMPB512,
30143 IX86_BUILTIN_UCMPW512,
30144
30145 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30146 where all operands are 32-byte or 64-byte wide respectively. */
30147 IX86_BUILTIN_GATHERALTSIV4DF,
30148 IX86_BUILTIN_GATHERALTDIV8SF,
30149 IX86_BUILTIN_GATHERALTSIV4DI,
30150 IX86_BUILTIN_GATHERALTDIV8SI,
30151 IX86_BUILTIN_GATHER3ALTDIV16SF,
30152 IX86_BUILTIN_GATHER3ALTDIV16SI,
30153 IX86_BUILTIN_GATHER3ALTSIV4DF,
30154 IX86_BUILTIN_GATHER3ALTDIV8SF,
30155 IX86_BUILTIN_GATHER3ALTSIV4DI,
30156 IX86_BUILTIN_GATHER3ALTDIV8SI,
30157 IX86_BUILTIN_GATHER3ALTSIV8DF,
30158 IX86_BUILTIN_GATHER3ALTSIV8DI,
30159 IX86_BUILTIN_GATHER3DIV16SF,
30160 IX86_BUILTIN_GATHER3DIV16SI,
30161 IX86_BUILTIN_GATHER3DIV8DF,
30162 IX86_BUILTIN_GATHER3DIV8DI,
30163 IX86_BUILTIN_GATHER3SIV16SF,
30164 IX86_BUILTIN_GATHER3SIV16SI,
30165 IX86_BUILTIN_GATHER3SIV8DF,
30166 IX86_BUILTIN_GATHER3SIV8DI,
30167 IX86_BUILTIN_SCATTERDIV16SF,
30168 IX86_BUILTIN_SCATTERDIV16SI,
30169 IX86_BUILTIN_SCATTERDIV8DF,
30170 IX86_BUILTIN_SCATTERDIV8DI,
30171 IX86_BUILTIN_SCATTERSIV16SF,
30172 IX86_BUILTIN_SCATTERSIV16SI,
30173 IX86_BUILTIN_SCATTERSIV8DF,
30174 IX86_BUILTIN_SCATTERSIV8DI,
30175
30176 /* AVX512PF */
30177 IX86_BUILTIN_GATHERPFQPD,
30178 IX86_BUILTIN_GATHERPFDPS,
30179 IX86_BUILTIN_GATHERPFDPD,
30180 IX86_BUILTIN_GATHERPFQPS,
30181 IX86_BUILTIN_SCATTERPFDPD,
30182 IX86_BUILTIN_SCATTERPFDPS,
30183 IX86_BUILTIN_SCATTERPFQPD,
30184 IX86_BUILTIN_SCATTERPFQPS,
30185
30186 /* AVX-512ER */
30187 IX86_BUILTIN_EXP2PD_MASK,
30188 IX86_BUILTIN_EXP2PS_MASK,
30189 IX86_BUILTIN_EXP2PS,
30190 IX86_BUILTIN_RCP28PD,
30191 IX86_BUILTIN_RCP28PS,
30192 IX86_BUILTIN_RCP28SD,
30193 IX86_BUILTIN_RCP28SS,
30194 IX86_BUILTIN_RSQRT28PD,
30195 IX86_BUILTIN_RSQRT28PS,
30196 IX86_BUILTIN_RSQRT28SD,
30197 IX86_BUILTIN_RSQRT28SS,
30198
30199 /* AVX-512IFMA */
30200 IX86_BUILTIN_VPMADD52LUQ512,
30201 IX86_BUILTIN_VPMADD52HUQ512,
30202 IX86_BUILTIN_VPMADD52LUQ256,
30203 IX86_BUILTIN_VPMADD52HUQ256,
30204 IX86_BUILTIN_VPMADD52LUQ128,
30205 IX86_BUILTIN_VPMADD52HUQ128,
30206 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30207 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30208 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30209 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30210 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30211 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30212
30213 /* AVX-512VBMI */
30214 IX86_BUILTIN_VPMULTISHIFTQB512,
30215 IX86_BUILTIN_VPMULTISHIFTQB256,
30216 IX86_BUILTIN_VPMULTISHIFTQB128,
30217 IX86_BUILTIN_VPERMVARQI512_MASK,
30218 IX86_BUILTIN_VPERMT2VARQI512,
30219 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30220 IX86_BUILTIN_VPERMI2VARQI512,
30221 IX86_BUILTIN_VPERMVARQI256_MASK,
30222 IX86_BUILTIN_VPERMVARQI128_MASK,
30223 IX86_BUILTIN_VPERMT2VARQI256,
30224 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30225 IX86_BUILTIN_VPERMT2VARQI128,
30226 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30227 IX86_BUILTIN_VPERMI2VARQI256,
30228 IX86_BUILTIN_VPERMI2VARQI128,
30229
30230 /* SHA builtins. */
30231 IX86_BUILTIN_SHA1MSG1,
30232 IX86_BUILTIN_SHA1MSG2,
30233 IX86_BUILTIN_SHA1NEXTE,
30234 IX86_BUILTIN_SHA1RNDS4,
30235 IX86_BUILTIN_SHA256MSG1,
30236 IX86_BUILTIN_SHA256MSG2,
30237 IX86_BUILTIN_SHA256RNDS2,
30238
30239 /* CLWB instructions. */
30240 IX86_BUILTIN_CLWB,
30241
30242 /* PCOMMIT instructions. */
30243 IX86_BUILTIN_PCOMMIT,
30244
30245 /* CLFLUSHOPT instructions. */
30246 IX86_BUILTIN_CLFLUSHOPT,
30247
30248 /* TFmode support builtins. */
30249 IX86_BUILTIN_INFQ,
30250 IX86_BUILTIN_HUGE_VALQ,
30251 IX86_BUILTIN_FABSQ,
30252 IX86_BUILTIN_COPYSIGNQ,
30253
30254 /* Vectorizer support builtins. */
30255 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30256 IX86_BUILTIN_CPYSGNPS,
30257 IX86_BUILTIN_CPYSGNPD,
30258 IX86_BUILTIN_CPYSGNPS256,
30259 IX86_BUILTIN_CPYSGNPS512,
30260 IX86_BUILTIN_CPYSGNPD256,
30261 IX86_BUILTIN_CPYSGNPD512,
30262 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30263 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30264
30265
30266 /* FMA4 instructions. */
30267 IX86_BUILTIN_VFMADDSS,
30268 IX86_BUILTIN_VFMADDSD,
30269 IX86_BUILTIN_VFMADDPS,
30270 IX86_BUILTIN_VFMADDPD,
30271 IX86_BUILTIN_VFMADDPS256,
30272 IX86_BUILTIN_VFMADDPD256,
30273 IX86_BUILTIN_VFMADDSUBPS,
30274 IX86_BUILTIN_VFMADDSUBPD,
30275 IX86_BUILTIN_VFMADDSUBPS256,
30276 IX86_BUILTIN_VFMADDSUBPD256,
30277
30278 /* FMA3 instructions. */
30279 IX86_BUILTIN_VFMADDSS3,
30280 IX86_BUILTIN_VFMADDSD3,
30281
30282 /* XOP instructions. */
30283 IX86_BUILTIN_VPCMOV,
30284 IX86_BUILTIN_VPCMOV_V2DI,
30285 IX86_BUILTIN_VPCMOV_V4SI,
30286 IX86_BUILTIN_VPCMOV_V8HI,
30287 IX86_BUILTIN_VPCMOV_V16QI,
30288 IX86_BUILTIN_VPCMOV_V4SF,
30289 IX86_BUILTIN_VPCMOV_V2DF,
30290 IX86_BUILTIN_VPCMOV256,
30291 IX86_BUILTIN_VPCMOV_V4DI256,
30292 IX86_BUILTIN_VPCMOV_V8SI256,
30293 IX86_BUILTIN_VPCMOV_V16HI256,
30294 IX86_BUILTIN_VPCMOV_V32QI256,
30295 IX86_BUILTIN_VPCMOV_V8SF256,
30296 IX86_BUILTIN_VPCMOV_V4DF256,
30297
30298 IX86_BUILTIN_VPPERM,
30299
30300 IX86_BUILTIN_VPMACSSWW,
30301 IX86_BUILTIN_VPMACSWW,
30302 IX86_BUILTIN_VPMACSSWD,
30303 IX86_BUILTIN_VPMACSWD,
30304 IX86_BUILTIN_VPMACSSDD,
30305 IX86_BUILTIN_VPMACSDD,
30306 IX86_BUILTIN_VPMACSSDQL,
30307 IX86_BUILTIN_VPMACSSDQH,
30308 IX86_BUILTIN_VPMACSDQL,
30309 IX86_BUILTIN_VPMACSDQH,
30310 IX86_BUILTIN_VPMADCSSWD,
30311 IX86_BUILTIN_VPMADCSWD,
30312
30313 IX86_BUILTIN_VPHADDBW,
30314 IX86_BUILTIN_VPHADDBD,
30315 IX86_BUILTIN_VPHADDBQ,
30316 IX86_BUILTIN_VPHADDWD,
30317 IX86_BUILTIN_VPHADDWQ,
30318 IX86_BUILTIN_VPHADDDQ,
30319 IX86_BUILTIN_VPHADDUBW,
30320 IX86_BUILTIN_VPHADDUBD,
30321 IX86_BUILTIN_VPHADDUBQ,
30322 IX86_BUILTIN_VPHADDUWD,
30323 IX86_BUILTIN_VPHADDUWQ,
30324 IX86_BUILTIN_VPHADDUDQ,
30325 IX86_BUILTIN_VPHSUBBW,
30326 IX86_BUILTIN_VPHSUBWD,
30327 IX86_BUILTIN_VPHSUBDQ,
30328
30329 IX86_BUILTIN_VPROTB,
30330 IX86_BUILTIN_VPROTW,
30331 IX86_BUILTIN_VPROTD,
30332 IX86_BUILTIN_VPROTQ,
30333 IX86_BUILTIN_VPROTB_IMM,
30334 IX86_BUILTIN_VPROTW_IMM,
30335 IX86_BUILTIN_VPROTD_IMM,
30336 IX86_BUILTIN_VPROTQ_IMM,
30337
30338 IX86_BUILTIN_VPSHLB,
30339 IX86_BUILTIN_VPSHLW,
30340 IX86_BUILTIN_VPSHLD,
30341 IX86_BUILTIN_VPSHLQ,
30342 IX86_BUILTIN_VPSHAB,
30343 IX86_BUILTIN_VPSHAW,
30344 IX86_BUILTIN_VPSHAD,
30345 IX86_BUILTIN_VPSHAQ,
30346
30347 IX86_BUILTIN_VFRCZSS,
30348 IX86_BUILTIN_VFRCZSD,
30349 IX86_BUILTIN_VFRCZPS,
30350 IX86_BUILTIN_VFRCZPD,
30351 IX86_BUILTIN_VFRCZPS256,
30352 IX86_BUILTIN_VFRCZPD256,
30353
30354 IX86_BUILTIN_VPCOMEQUB,
30355 IX86_BUILTIN_VPCOMNEUB,
30356 IX86_BUILTIN_VPCOMLTUB,
30357 IX86_BUILTIN_VPCOMLEUB,
30358 IX86_BUILTIN_VPCOMGTUB,
30359 IX86_BUILTIN_VPCOMGEUB,
30360 IX86_BUILTIN_VPCOMFALSEUB,
30361 IX86_BUILTIN_VPCOMTRUEUB,
30362
30363 IX86_BUILTIN_VPCOMEQUW,
30364 IX86_BUILTIN_VPCOMNEUW,
30365 IX86_BUILTIN_VPCOMLTUW,
30366 IX86_BUILTIN_VPCOMLEUW,
30367 IX86_BUILTIN_VPCOMGTUW,
30368 IX86_BUILTIN_VPCOMGEUW,
30369 IX86_BUILTIN_VPCOMFALSEUW,
30370 IX86_BUILTIN_VPCOMTRUEUW,
30371
30372 IX86_BUILTIN_VPCOMEQUD,
30373 IX86_BUILTIN_VPCOMNEUD,
30374 IX86_BUILTIN_VPCOMLTUD,
30375 IX86_BUILTIN_VPCOMLEUD,
30376 IX86_BUILTIN_VPCOMGTUD,
30377 IX86_BUILTIN_VPCOMGEUD,
30378 IX86_BUILTIN_VPCOMFALSEUD,
30379 IX86_BUILTIN_VPCOMTRUEUD,
30380
30381 IX86_BUILTIN_VPCOMEQUQ,
30382 IX86_BUILTIN_VPCOMNEUQ,
30383 IX86_BUILTIN_VPCOMLTUQ,
30384 IX86_BUILTIN_VPCOMLEUQ,
30385 IX86_BUILTIN_VPCOMGTUQ,
30386 IX86_BUILTIN_VPCOMGEUQ,
30387 IX86_BUILTIN_VPCOMFALSEUQ,
30388 IX86_BUILTIN_VPCOMTRUEUQ,
30389
30390 IX86_BUILTIN_VPCOMEQB,
30391 IX86_BUILTIN_VPCOMNEB,
30392 IX86_BUILTIN_VPCOMLTB,
30393 IX86_BUILTIN_VPCOMLEB,
30394 IX86_BUILTIN_VPCOMGTB,
30395 IX86_BUILTIN_VPCOMGEB,
30396 IX86_BUILTIN_VPCOMFALSEB,
30397 IX86_BUILTIN_VPCOMTRUEB,
30398
30399 IX86_BUILTIN_VPCOMEQW,
30400 IX86_BUILTIN_VPCOMNEW,
30401 IX86_BUILTIN_VPCOMLTW,
30402 IX86_BUILTIN_VPCOMLEW,
30403 IX86_BUILTIN_VPCOMGTW,
30404 IX86_BUILTIN_VPCOMGEW,
30405 IX86_BUILTIN_VPCOMFALSEW,
30406 IX86_BUILTIN_VPCOMTRUEW,
30407
30408 IX86_BUILTIN_VPCOMEQD,
30409 IX86_BUILTIN_VPCOMNED,
30410 IX86_BUILTIN_VPCOMLTD,
30411 IX86_BUILTIN_VPCOMLED,
30412 IX86_BUILTIN_VPCOMGTD,
30413 IX86_BUILTIN_VPCOMGED,
30414 IX86_BUILTIN_VPCOMFALSED,
30415 IX86_BUILTIN_VPCOMTRUED,
30416
30417 IX86_BUILTIN_VPCOMEQQ,
30418 IX86_BUILTIN_VPCOMNEQ,
30419 IX86_BUILTIN_VPCOMLTQ,
30420 IX86_BUILTIN_VPCOMLEQ,
30421 IX86_BUILTIN_VPCOMGTQ,
30422 IX86_BUILTIN_VPCOMGEQ,
30423 IX86_BUILTIN_VPCOMFALSEQ,
30424 IX86_BUILTIN_VPCOMTRUEQ,
30425
30426 /* LWP instructions. */
30427 IX86_BUILTIN_LLWPCB,
30428 IX86_BUILTIN_SLWPCB,
30429 IX86_BUILTIN_LWPVAL32,
30430 IX86_BUILTIN_LWPVAL64,
30431 IX86_BUILTIN_LWPINS32,
30432 IX86_BUILTIN_LWPINS64,
30433
30434 IX86_BUILTIN_CLZS,
30435
30436 /* RTM */
30437 IX86_BUILTIN_XBEGIN,
30438 IX86_BUILTIN_XEND,
30439 IX86_BUILTIN_XABORT,
30440 IX86_BUILTIN_XTEST,
30441
30442 /* MPX */
30443 IX86_BUILTIN_BNDMK,
30444 IX86_BUILTIN_BNDSTX,
30445 IX86_BUILTIN_BNDLDX,
30446 IX86_BUILTIN_BNDCL,
30447 IX86_BUILTIN_BNDCU,
30448 IX86_BUILTIN_BNDRET,
30449 IX86_BUILTIN_BNDNARROW,
30450 IX86_BUILTIN_BNDINT,
30451 IX86_BUILTIN_SIZEOF,
30452 IX86_BUILTIN_BNDLOWER,
30453 IX86_BUILTIN_BNDUPPER,
30454
30455 /* BMI instructions. */
30456 IX86_BUILTIN_BEXTR32,
30457 IX86_BUILTIN_BEXTR64,
30458 IX86_BUILTIN_CTZS,
30459
30460 /* TBM instructions. */
30461 IX86_BUILTIN_BEXTRI32,
30462 IX86_BUILTIN_BEXTRI64,
30463
30464 /* BMI2 instructions. */
30465 IX86_BUILTIN_BZHI32,
30466 IX86_BUILTIN_BZHI64,
30467 IX86_BUILTIN_PDEP32,
30468 IX86_BUILTIN_PDEP64,
30469 IX86_BUILTIN_PEXT32,
30470 IX86_BUILTIN_PEXT64,
30471
30472 /* ADX instructions. */
30473 IX86_BUILTIN_ADDCARRYX32,
30474 IX86_BUILTIN_ADDCARRYX64,
30475
30476 /* SBB instructions. */
30477 IX86_BUILTIN_SBB32,
30478 IX86_BUILTIN_SBB64,
30479
30480 /* FSGSBASE instructions. */
30481 IX86_BUILTIN_RDFSBASE32,
30482 IX86_BUILTIN_RDFSBASE64,
30483 IX86_BUILTIN_RDGSBASE32,
30484 IX86_BUILTIN_RDGSBASE64,
30485 IX86_BUILTIN_WRFSBASE32,
30486 IX86_BUILTIN_WRFSBASE64,
30487 IX86_BUILTIN_WRGSBASE32,
30488 IX86_BUILTIN_WRGSBASE64,
30489
30490 /* RDRND instructions. */
30491 IX86_BUILTIN_RDRAND16_STEP,
30492 IX86_BUILTIN_RDRAND32_STEP,
30493 IX86_BUILTIN_RDRAND64_STEP,
30494
30495 /* RDSEED instructions. */
30496 IX86_BUILTIN_RDSEED16_STEP,
30497 IX86_BUILTIN_RDSEED32_STEP,
30498 IX86_BUILTIN_RDSEED64_STEP,
30499
30500 /* F16C instructions. */
30501 IX86_BUILTIN_CVTPH2PS,
30502 IX86_BUILTIN_CVTPH2PS256,
30503 IX86_BUILTIN_CVTPS2PH,
30504 IX86_BUILTIN_CVTPS2PH256,
30505
30506 /* CFString built-in for darwin */
30507 IX86_BUILTIN_CFSTRING,
30508
30509 /* Builtins to get CPU type and supported features. */
30510 IX86_BUILTIN_CPU_INIT,
30511 IX86_BUILTIN_CPU_IS,
30512 IX86_BUILTIN_CPU_SUPPORTS,
30513
30514 /* Read/write FLAGS register built-ins. */
30515 IX86_BUILTIN_READ_FLAGS,
30516 IX86_BUILTIN_WRITE_FLAGS,
30517
30518 IX86_BUILTIN_MAX
30519 };
30520
30521 /* Table for the ix86 builtin decls. */
30522 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30523
30524 /* Table of all of the builtin functions that are possible with different ISA's
30525 but are waiting to be built until a function is declared to use that
30526 ISA. */
30527 struct builtin_isa {
30528 const char *name; /* function name */
30529 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30530 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30531 bool const_p; /* true if the declaration is constant */
30532 bool leaf_p; /* true if the declaration has leaf attribute */
30533 bool nothrow_p; /* true if the declaration has nothrow attribute */
30534 bool set_and_not_built_p;
30535 };
30536
30537 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30538
30539 /* Bits that can still enable any inclusion of a builtin. */
30540 static HOST_WIDE_INT deferred_isa_values = 0;
30541
30542 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30543 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30544 function decl in the ix86_builtins array. Returns the function decl or
30545 NULL_TREE, if the builtin was not added.
30546
30547 If the front end has a special hook for builtin functions, delay adding
30548 builtin functions that aren't in the current ISA until the ISA is changed
30549 with function specific optimization. Doing so, can save about 300K for the
30550 default compiler. When the builtin is expanded, check at that time whether
30551 it is valid.
30552
30553 If the front end doesn't have a special hook, record all builtins, even if
30554 it isn't an instruction set in the current ISA in case the user uses
30555 function specific options for a different ISA, so that we don't get scope
30556 errors if a builtin is added in the middle of a function scope. */
30557
30558 static inline tree
30559 def_builtin (HOST_WIDE_INT mask, const char *name,
30560 enum ix86_builtin_func_type tcode,
30561 enum ix86_builtins code)
30562 {
30563 tree decl = NULL_TREE;
30564
30565 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30566 {
30567 ix86_builtins_isa[(int) code].isa = mask;
30568
30569 mask &= ~OPTION_MASK_ISA_64BIT;
30570 if (mask == 0
30571 || (mask & ix86_isa_flags) != 0
30572 || (lang_hooks.builtin_function
30573 == lang_hooks.builtin_function_ext_scope))
30574
30575 {
30576 tree type = ix86_get_builtin_func_type (tcode);
30577 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30578 NULL, NULL_TREE);
30579 ix86_builtins[(int) code] = decl;
30580 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30581 }
30582 else
30583 {
30584 /* Just a MASK where set_and_not_built_p == true can potentially
30585 include a builtin. */
30586 deferred_isa_values |= mask;
30587 ix86_builtins[(int) code] = NULL_TREE;
30588 ix86_builtins_isa[(int) code].tcode = tcode;
30589 ix86_builtins_isa[(int) code].name = name;
30590 ix86_builtins_isa[(int) code].leaf_p = false;
30591 ix86_builtins_isa[(int) code].nothrow_p = false;
30592 ix86_builtins_isa[(int) code].const_p = false;
30593 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30594 }
30595 }
30596
30597 return decl;
30598 }
30599
30600 /* Like def_builtin, but also marks the function decl "const". */
30601
30602 static inline tree
30603 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30604 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30605 {
30606 tree decl = def_builtin (mask, name, tcode, code);
30607 if (decl)
30608 TREE_READONLY (decl) = 1;
30609 else
30610 ix86_builtins_isa[(int) code].const_p = true;
30611
30612 return decl;
30613 }
30614
30615 /* Add any new builtin functions for a given ISA that may not have been
30616 declared. This saves a bit of space compared to adding all of the
30617 declarations to the tree, even if we didn't use them. */
30618
30619 static void
30620 ix86_add_new_builtins (HOST_WIDE_INT isa)
30621 {
30622 if ((isa & deferred_isa_values) == 0)
30623 return;
30624
30625 /* Bits in ISA value can be removed from potential isa values. */
30626 deferred_isa_values &= ~isa;
30627
30628 int i;
30629 tree saved_current_target_pragma = current_target_pragma;
30630 current_target_pragma = NULL_TREE;
30631
30632 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30633 {
30634 if ((ix86_builtins_isa[i].isa & isa) != 0
30635 && ix86_builtins_isa[i].set_and_not_built_p)
30636 {
30637 tree decl, type;
30638
30639 /* Don't define the builtin again. */
30640 ix86_builtins_isa[i].set_and_not_built_p = false;
30641
30642 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30643 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30644 type, i, BUILT_IN_MD, NULL,
30645 NULL_TREE);
30646
30647 ix86_builtins[i] = decl;
30648 if (ix86_builtins_isa[i].const_p)
30649 TREE_READONLY (decl) = 1;
30650 if (ix86_builtins_isa[i].leaf_p)
30651 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30652 NULL_TREE);
30653 if (ix86_builtins_isa[i].nothrow_p)
30654 TREE_NOTHROW (decl) = 1;
30655 }
30656 }
30657
30658 current_target_pragma = saved_current_target_pragma;
30659 }
30660
30661 /* Bits for builtin_description.flag. */
30662
30663 /* Set when we don't support the comparison natively, and should
30664 swap_comparison in order to support it. */
30665 #define BUILTIN_DESC_SWAP_OPERANDS 1
30666
30667 struct builtin_description
30668 {
30669 const HOST_WIDE_INT mask;
30670 const enum insn_code icode;
30671 const char *const name;
30672 const enum ix86_builtins code;
30673 const enum rtx_code comparison;
30674 const int flag;
30675 };
30676
30677 static const struct builtin_description bdesc_comi[] =
30678 {
30679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30703 };
30704
30705 static const struct builtin_description bdesc_pcmpestr[] =
30706 {
30707 /* SSE4.2 */
30708 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30709 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30710 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30711 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30712 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30713 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30714 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30715 };
30716
30717 static const struct builtin_description bdesc_pcmpistr[] =
30718 {
30719 /* SSE4.2 */
30720 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30721 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30722 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30723 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30724 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30725 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30726 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30727 };
30728
30729 /* Special builtins with variable number of arguments. */
30730 static const struct builtin_description bdesc_special_args[] =
30731 {
30732 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30733 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30734 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30735
30736 /* 80387 (for use internally for atomic compound assignment). */
30737 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30738 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30739 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30740 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30741
30742 /* MMX */
30743 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30744
30745 /* 3DNow! */
30746 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30747
30748 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30749 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30750 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30751 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30752 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30753 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30754 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30755 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30756 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30757
30758 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30759 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30760 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30761 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30762 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30763 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30764 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30765 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30766
30767 /* SSE */
30768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30771
30772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30776
30777 /* SSE or 3DNow!A */
30778 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30779 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30780
30781 /* SSE2 */
30782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30789 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30792
30793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30795
30796 /* SSE3 */
30797 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30798
30799 /* SSE4.1 */
30800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30801
30802 /* SSE4A */
30803 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30804 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30805
30806 /* AVX */
30807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30809
30810 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30811 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30812 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30815
30816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30823
30824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30827
30828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30836
30837 /* AVX2 */
30838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30847
30848 /* AVX512F */
30849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30896
30897 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30898 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30899 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30900 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30901 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30902 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30903
30904 /* FSGSBASE */
30905 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30906 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30907 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30908 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30909 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30910 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30911 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30912 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30913
30914 /* RTM */
30915 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30916 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30917 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30918
30919 /* AVX512BW */
30920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30924
30925 /* AVX512VL */
30926 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30927 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30928 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30929 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31020
31021 /* PCOMMIT. */
31022 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31023 };
31024
31025 /* Builtins with variable number of arguments. */
31026 static const struct builtin_description bdesc_args[] =
31027 {
31028 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31029 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31030 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31031 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31032 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31033 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31034 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31035
31036 /* MMX */
31037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31043
31044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31052
31053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31055
31056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31060
31061 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31067
31068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31069 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31074
31075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31078
31079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31080
31081 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31086 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31087
31088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31094
31095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31099
31100 /* 3DNow! */
31101 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31102 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31103 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31104 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31105
31106 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31107 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31108 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31109 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31110 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31111 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31112 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31113 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31114 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31115 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31116 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31117 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31118 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31119 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31120 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31121
31122 /* 3DNow!A */
31123 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31124 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31125 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31126 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31127 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31128 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31129
31130 /* SSE */
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31135 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31139 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31142 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31143
31144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31145
31146 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31148 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31154
31155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31168 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31175
31176 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31177 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31180
31181 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31183 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31184 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31185
31186 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31187
31188 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31191 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31192 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31193
31194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31195 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31196 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31197
31198 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31199
31200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31203
31204 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31205 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31206
31207 /* SSE MMX or 3Dnow!A */
31208 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31209 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31210 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31211
31212 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31213 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31214 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31215 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31216
31217 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31218 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31219
31220 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31221
31222 /* SSE2 */
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31224
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31230
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31236
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31238
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31241 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31242 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31243
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31247
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31256
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31277
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31282
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31287
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31289
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31293
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31295
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31304
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31313
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31316
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31321
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31324
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31331
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31336
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31345
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31349
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31352
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31355
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31357
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31359 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31362
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31366 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31370
31371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31372 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31378
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31383
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31387
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31389
31390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31391
31392 /* SSE2 MMX */
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31395
31396 /* SSE3 */
31397 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31398 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31399
31400 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31402 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31403 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31404 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31406
31407 /* SSSE3 */
31408 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31409 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31410 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31411 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31412 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31413 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31414
31415 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31416 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31417 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31418 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31419 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31420 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31421 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31422 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31423 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31424 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31425 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31426 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31427 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31428 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31429 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31430 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31431 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31432 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31433 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31434 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31435 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31436 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31437 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31438 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31439
31440 /* SSSE3. */
31441 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31442 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31443
31444 /* SSE4.1 */
31445 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31446 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31447 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31448 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31449 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31452 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31453 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31454 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31455
31456 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31457 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31458 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31459 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31460 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31461 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31462 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31463 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31464 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31465 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31466 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31467 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31468 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31469
31470 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31471 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31472 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31473 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31474 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31475 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31476 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31477 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31478 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31479 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31480 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31481 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31482
31483 /* SSE4.1 */
31484 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31485 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31486 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31487 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31488
31489 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31490 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31491 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31492 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31493
31494 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31495 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31496
31497 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31498 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31499
31500 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31501 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31502 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31503 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31504
31505 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31506 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31507
31508 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31509 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31510
31511 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31512 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31513 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31514
31515 /* SSE4.2 */
31516 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31517 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31518 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31519 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31520 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31521
31522 /* SSE4A */
31523 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31524 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31525 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31526 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31527
31528 /* AES */
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31530 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31531
31532 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31535 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31536
31537 /* PCLMUL */
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31539
31540 /* AVX */
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31567
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31572
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31607
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31611
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31617
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31619
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31622
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31627
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31630
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31633
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31638
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31641
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31644
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31649
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31656
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31672
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31675
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31678
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31680
31681 /* AVX2 */
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31828
31829 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31830
31831 /* BMI */
31832 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31833 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31834 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31835
31836 /* TBM */
31837 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31838 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31839
31840 /* F16C */
31841 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31842 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31843 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31844 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31845
31846 /* BMI2 */
31847 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31848 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31849 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31850 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31851 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31852 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31853
31854 /* AVX512F */
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31910 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31911 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32021 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32022 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32023 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32024 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32051
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32056 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32060
32061 /* Mask arithmetic operations */
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32072
32073 /* SHA */
32074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32081
32082 /* AVX512VL. */
32083 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32093 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32094 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32095 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32121 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32122 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32123 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32124 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32125 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32126 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32127 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32128 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32129 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32130 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32131 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32132 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32133 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32140 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32141 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32142 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32143 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32144 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32145 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32146 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32147 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32150 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32151 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32152 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32153 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32193 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32194 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32209 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32221 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32222 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32223 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32224 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32225 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32226 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32251 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32260 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32261 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32262 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32267 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32268 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32269 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32270 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32271 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32272 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32278 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32283 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32284 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32285 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32286 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32287 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32321 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32322 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32347 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32348 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32349 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32350 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32351 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32352 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32353 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32354 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32355 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32356 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32403 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32467 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32479 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32480 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32481 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32482 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32493 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32494 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32495 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32496 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32497 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32498 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32499 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32500 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32573 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32574 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32575 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32578 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32579 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32580 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32583 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32584 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32585 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32586 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32591 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32592 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32593 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32594 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32598 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32628 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32629 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32630 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32687 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32688 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32689 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32690 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32698 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32699 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32700 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32701 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32720 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32723 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32724 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32725 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32726 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32727 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32728 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32729 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32733 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32734 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32735 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32736 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32779 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32780 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32781 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32782 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32795
32796 /* AVX512DQ. */
32797 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32798 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32799 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32800 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32801 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32802 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32803 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32804 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32805 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32806 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32807 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32808 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32809 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32810 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32811 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32812 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32813 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32814 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32815 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32816 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32817 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32818 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32819 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32820 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32821 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32822 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32823 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32824 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32825 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32826 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32827 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32828
32829 /* AVX512BW. */
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32874 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32883 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32884 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32885 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32886 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32887 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32888 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32889 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32892 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32893 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32894 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32895 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32896 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32897 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32898 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32899 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32900 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32901 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32902 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32903 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32904 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32905 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32906 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32907 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32908 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32921
32922 /* AVX512IFMA */
32923 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32924 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32925 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32926 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32927 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32928 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32929 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32930 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32931 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32932 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32933 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32934 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32935
32936 /* AVX512VBMI */
32937 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32938 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32939 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32940 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32941 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32942 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32943 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32944 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32945 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32946 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32947 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32948 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32949 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32950 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32951 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32952 };
32953
32954 /* Builtins with rounding support. */
32955 static const struct builtin_description bdesc_round_args[] =
32956 {
32957 /* AVX512F */
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32977 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32979 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32986 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32988 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33038 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33040 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33042 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33044 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33046 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33048 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33050 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33052 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33077
33078 /* AVX512ER */
33079 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33080 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33081 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33082 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33083 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33084 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33085 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33086 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33087 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33088 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33089
33090 /* AVX512DQ. */
33091 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33092 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33093 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33094 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33095 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33096 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33097 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33098 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33099 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33100 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33101 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33102 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33103 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33104 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33105 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33106 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33107 };
33108
33109 /* Bultins for MPX. */
33110 static const struct builtin_description bdesc_mpx[] =
33111 {
33112 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33113 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33114 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33115 };
33116
33117 /* Const builtins for MPX. */
33118 static const struct builtin_description bdesc_mpx_const[] =
33119 {
33120 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33121 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33122 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33123 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33124 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33125 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33126 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33127 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33128 };
33129
33130 /* FMA4 and XOP. */
33131 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33132 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33133 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33134 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33135 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33136 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33137 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33138 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33139 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33140 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33141 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33142 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33143 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33144 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33145 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33146 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33147 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33148 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33149 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33150 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33151 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33152 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33153 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33154 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33155 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33156 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33157 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33158 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33159 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33160 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33161 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33162 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33163 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33164 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33165 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33166 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33167 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33168 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33169 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33170 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33171 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33172 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33173 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33174 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33175 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33176 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33177 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33178 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33179 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33180 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33181 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33182 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33183
33184 static const struct builtin_description bdesc_multi_arg[] =
33185 {
33186 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33187 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33188 UNKNOWN, (int)MULTI_ARG_3_SF },
33189 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33190 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33191 UNKNOWN, (int)MULTI_ARG_3_DF },
33192
33193 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33194 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33195 UNKNOWN, (int)MULTI_ARG_3_SF },
33196 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33197 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33198 UNKNOWN, (int)MULTI_ARG_3_DF },
33199
33200 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33201 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33202 UNKNOWN, (int)MULTI_ARG_3_SF },
33203 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33204 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33205 UNKNOWN, (int)MULTI_ARG_3_DF },
33206 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33207 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33208 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33209 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33210 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33211 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33212
33213 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33214 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33215 UNKNOWN, (int)MULTI_ARG_3_SF },
33216 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33217 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33218 UNKNOWN, (int)MULTI_ARG_3_DF },
33219 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33220 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33221 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33222 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33223 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33224 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33225
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33233
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33241
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33243
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33256
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33273
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33280
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33296
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33304
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33312
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33320
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33328
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33336
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33344
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33352
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33360
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33369
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33378
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33383
33384 };
33385 \f
33386 /* TM vector builtins. */
33387
33388 /* Reuse the existing x86-specific `struct builtin_description' cause
33389 we're lazy. Add casts to make them fit. */
33390 static const struct builtin_description bdesc_tm[] =
33391 {
33392 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33393 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33394 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33395 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33396 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33397 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33398 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33399
33400 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33401 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33402 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33403 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33404 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33405 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33406 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33407
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33412 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33413 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33415
33416 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33417 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33418 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33419 };
33420
33421 /* TM callbacks. */
33422
33423 /* Return the builtin decl needed to load a vector of TYPE. */
33424
33425 static tree
33426 ix86_builtin_tm_load (tree type)
33427 {
33428 if (TREE_CODE (type) == VECTOR_TYPE)
33429 {
33430 switch (tree_to_uhwi (TYPE_SIZE (type)))
33431 {
33432 case 64:
33433 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33434 case 128:
33435 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33436 case 256:
33437 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33438 }
33439 }
33440 return NULL_TREE;
33441 }
33442
33443 /* Return the builtin decl needed to store a vector of TYPE. */
33444
33445 static tree
33446 ix86_builtin_tm_store (tree type)
33447 {
33448 if (TREE_CODE (type) == VECTOR_TYPE)
33449 {
33450 switch (tree_to_uhwi (TYPE_SIZE (type)))
33451 {
33452 case 64:
33453 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33454 case 128:
33455 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33456 case 256:
33457 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33458 }
33459 }
33460 return NULL_TREE;
33461 }
33462 \f
33463 /* Initialize the transactional memory vector load/store builtins. */
33464
33465 static void
33466 ix86_init_tm_builtins (void)
33467 {
33468 enum ix86_builtin_func_type ftype;
33469 const struct builtin_description *d;
33470 size_t i;
33471 tree decl;
33472 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33473 tree attrs_log, attrs_type_log;
33474
33475 if (!flag_tm)
33476 return;
33477
33478 /* If there are no builtins defined, we must be compiling in a
33479 language without trans-mem support. */
33480 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33481 return;
33482
33483 /* Use whatever attributes a normal TM load has. */
33484 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33485 attrs_load = DECL_ATTRIBUTES (decl);
33486 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33487 /* Use whatever attributes a normal TM store has. */
33488 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33489 attrs_store = DECL_ATTRIBUTES (decl);
33490 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33491 /* Use whatever attributes a normal TM log has. */
33492 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33493 attrs_log = DECL_ATTRIBUTES (decl);
33494 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33495
33496 for (i = 0, d = bdesc_tm;
33497 i < ARRAY_SIZE (bdesc_tm);
33498 i++, d++)
33499 {
33500 if ((d->mask & ix86_isa_flags) != 0
33501 || (lang_hooks.builtin_function
33502 == lang_hooks.builtin_function_ext_scope))
33503 {
33504 tree type, attrs, attrs_type;
33505 enum built_in_function code = (enum built_in_function) d->code;
33506
33507 ftype = (enum ix86_builtin_func_type) d->flag;
33508 type = ix86_get_builtin_func_type (ftype);
33509
33510 if (BUILTIN_TM_LOAD_P (code))
33511 {
33512 attrs = attrs_load;
33513 attrs_type = attrs_type_load;
33514 }
33515 else if (BUILTIN_TM_STORE_P (code))
33516 {
33517 attrs = attrs_store;
33518 attrs_type = attrs_type_store;
33519 }
33520 else
33521 {
33522 attrs = attrs_log;
33523 attrs_type = attrs_type_log;
33524 }
33525 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33526 /* The builtin without the prefix for
33527 calling it directly. */
33528 d->name + strlen ("__builtin_"),
33529 attrs);
33530 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33531 set the TYPE_ATTRIBUTES. */
33532 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33533
33534 set_builtin_decl (code, decl, false);
33535 }
33536 }
33537 }
33538
33539 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33540 in the current target ISA to allow the user to compile particular modules
33541 with different target specific options that differ from the command line
33542 options. */
33543 static void
33544 ix86_init_mmx_sse_builtins (void)
33545 {
33546 const struct builtin_description * d;
33547 enum ix86_builtin_func_type ftype;
33548 size_t i;
33549
33550 /* Add all special builtins with variable number of operands. */
33551 for (i = 0, d = bdesc_special_args;
33552 i < ARRAY_SIZE (bdesc_special_args);
33553 i++, d++)
33554 {
33555 if (d->name == 0)
33556 continue;
33557
33558 ftype = (enum ix86_builtin_func_type) d->flag;
33559 def_builtin (d->mask, d->name, ftype, d->code);
33560 }
33561
33562 /* Add all builtins with variable number of operands. */
33563 for (i = 0, d = bdesc_args;
33564 i < ARRAY_SIZE (bdesc_args);
33565 i++, d++)
33566 {
33567 if (d->name == 0)
33568 continue;
33569
33570 ftype = (enum ix86_builtin_func_type) d->flag;
33571 def_builtin_const (d->mask, d->name, ftype, d->code);
33572 }
33573
33574 /* Add all builtins with rounding. */
33575 for (i = 0, d = bdesc_round_args;
33576 i < ARRAY_SIZE (bdesc_round_args);
33577 i++, d++)
33578 {
33579 if (d->name == 0)
33580 continue;
33581
33582 ftype = (enum ix86_builtin_func_type) d->flag;
33583 def_builtin_const (d->mask, d->name, ftype, d->code);
33584 }
33585
33586 /* pcmpestr[im] insns. */
33587 for (i = 0, d = bdesc_pcmpestr;
33588 i < ARRAY_SIZE (bdesc_pcmpestr);
33589 i++, d++)
33590 {
33591 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33592 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33593 else
33594 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33595 def_builtin_const (d->mask, d->name, ftype, d->code);
33596 }
33597
33598 /* pcmpistr[im] insns. */
33599 for (i = 0, d = bdesc_pcmpistr;
33600 i < ARRAY_SIZE (bdesc_pcmpistr);
33601 i++, d++)
33602 {
33603 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33604 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33605 else
33606 ftype = INT_FTYPE_V16QI_V16QI_INT;
33607 def_builtin_const (d->mask, d->name, ftype, d->code);
33608 }
33609
33610 /* comi/ucomi insns. */
33611 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33612 {
33613 if (d->mask == OPTION_MASK_ISA_SSE2)
33614 ftype = INT_FTYPE_V2DF_V2DF;
33615 else
33616 ftype = INT_FTYPE_V4SF_V4SF;
33617 def_builtin_const (d->mask, d->name, ftype, d->code);
33618 }
33619
33620 /* SSE */
33621 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33622 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33623 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33624 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33625
33626 /* SSE or 3DNow!A */
33627 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33628 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33629 IX86_BUILTIN_MASKMOVQ);
33630
33631 /* SSE2 */
33632 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33633 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33634
33635 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33636 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33637 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33638 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33639
33640 /* SSE3. */
33641 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33642 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33643 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33644 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33645
33646 /* AES */
33647 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33648 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33649 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33650 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33651 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33652 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33653 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33654 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33655 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33656 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33657 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33658 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33659
33660 /* PCLMUL */
33661 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33662 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33663
33664 /* RDRND */
33665 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33666 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33667 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33668 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33669 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33670 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33671 IX86_BUILTIN_RDRAND64_STEP);
33672
33673 /* AVX2 */
33674 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33675 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33676 IX86_BUILTIN_GATHERSIV2DF);
33677
33678 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33679 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33680 IX86_BUILTIN_GATHERSIV4DF);
33681
33682 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33683 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33684 IX86_BUILTIN_GATHERDIV2DF);
33685
33686 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33687 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33688 IX86_BUILTIN_GATHERDIV4DF);
33689
33690 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33691 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33692 IX86_BUILTIN_GATHERSIV4SF);
33693
33694 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33695 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33696 IX86_BUILTIN_GATHERSIV8SF);
33697
33698 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33699 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33700 IX86_BUILTIN_GATHERDIV4SF);
33701
33702 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33703 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33704 IX86_BUILTIN_GATHERDIV8SF);
33705
33706 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33707 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33708 IX86_BUILTIN_GATHERSIV2DI);
33709
33710 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33711 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33712 IX86_BUILTIN_GATHERSIV4DI);
33713
33714 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33715 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33716 IX86_BUILTIN_GATHERDIV2DI);
33717
33718 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33719 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33720 IX86_BUILTIN_GATHERDIV4DI);
33721
33722 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33723 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33724 IX86_BUILTIN_GATHERSIV4SI);
33725
33726 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33727 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33728 IX86_BUILTIN_GATHERSIV8SI);
33729
33730 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33731 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33732 IX86_BUILTIN_GATHERDIV4SI);
33733
33734 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33735 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33736 IX86_BUILTIN_GATHERDIV8SI);
33737
33738 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33739 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33740 IX86_BUILTIN_GATHERALTSIV4DF);
33741
33742 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33743 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33744 IX86_BUILTIN_GATHERALTDIV8SF);
33745
33746 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33747 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33748 IX86_BUILTIN_GATHERALTSIV4DI);
33749
33750 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33751 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33752 IX86_BUILTIN_GATHERALTDIV8SI);
33753
33754 /* AVX512F */
33755 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33756 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33757 IX86_BUILTIN_GATHER3SIV16SF);
33758
33759 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33760 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33761 IX86_BUILTIN_GATHER3SIV8DF);
33762
33763 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33764 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33765 IX86_BUILTIN_GATHER3DIV16SF);
33766
33767 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33768 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33769 IX86_BUILTIN_GATHER3DIV8DF);
33770
33771 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33772 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33773 IX86_BUILTIN_GATHER3SIV16SI);
33774
33775 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33776 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33777 IX86_BUILTIN_GATHER3SIV8DI);
33778
33779 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33780 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33781 IX86_BUILTIN_GATHER3DIV16SI);
33782
33783 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33784 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33785 IX86_BUILTIN_GATHER3DIV8DI);
33786
33787 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33788 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33789 IX86_BUILTIN_GATHER3ALTSIV8DF);
33790
33791 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33792 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33793 IX86_BUILTIN_GATHER3ALTDIV16SF);
33794
33795 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33796 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33797 IX86_BUILTIN_GATHER3ALTSIV8DI);
33798
33799 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33800 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33801 IX86_BUILTIN_GATHER3ALTDIV16SI);
33802
33803 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33804 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33805 IX86_BUILTIN_SCATTERSIV16SF);
33806
33807 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33808 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33809 IX86_BUILTIN_SCATTERSIV8DF);
33810
33811 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33812 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33813 IX86_BUILTIN_SCATTERDIV16SF);
33814
33815 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33816 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33817 IX86_BUILTIN_SCATTERDIV8DF);
33818
33819 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33820 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33821 IX86_BUILTIN_SCATTERSIV16SI);
33822
33823 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33824 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33825 IX86_BUILTIN_SCATTERSIV8DI);
33826
33827 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33828 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33829 IX86_BUILTIN_SCATTERDIV16SI);
33830
33831 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33832 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33833 IX86_BUILTIN_SCATTERDIV8DI);
33834
33835 /* AVX512VL */
33836 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33837 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33838 IX86_BUILTIN_GATHER3SIV2DF);
33839
33840 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33841 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33842 IX86_BUILTIN_GATHER3SIV4DF);
33843
33844 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33845 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33846 IX86_BUILTIN_GATHER3DIV2DF);
33847
33848 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33849 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33850 IX86_BUILTIN_GATHER3DIV4DF);
33851
33852 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33853 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33854 IX86_BUILTIN_GATHER3SIV4SF);
33855
33856 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33857 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33858 IX86_BUILTIN_GATHER3SIV8SF);
33859
33860 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33861 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33862 IX86_BUILTIN_GATHER3DIV4SF);
33863
33864 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33865 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33866 IX86_BUILTIN_GATHER3DIV8SF);
33867
33868 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33869 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33870 IX86_BUILTIN_GATHER3SIV2DI);
33871
33872 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33873 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33874 IX86_BUILTIN_GATHER3SIV4DI);
33875
33876 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33877 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33878 IX86_BUILTIN_GATHER3DIV2DI);
33879
33880 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33881 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33882 IX86_BUILTIN_GATHER3DIV4DI);
33883
33884 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33885 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33886 IX86_BUILTIN_GATHER3SIV4SI);
33887
33888 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33889 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33890 IX86_BUILTIN_GATHER3SIV8SI);
33891
33892 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33893 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33894 IX86_BUILTIN_GATHER3DIV4SI);
33895
33896 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33897 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33898 IX86_BUILTIN_GATHER3DIV8SI);
33899
33900 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33901 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33902 IX86_BUILTIN_GATHER3ALTSIV4DF);
33903
33904 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33905 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33906 IX86_BUILTIN_GATHER3ALTDIV8SF);
33907
33908 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33909 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33910 IX86_BUILTIN_GATHER3ALTSIV4DI);
33911
33912 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33913 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33914 IX86_BUILTIN_GATHER3ALTDIV8SI);
33915
33916 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33917 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33918 IX86_BUILTIN_SCATTERSIV8SF);
33919
33920 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33921 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33922 IX86_BUILTIN_SCATTERSIV4SF);
33923
33924 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33925 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33926 IX86_BUILTIN_SCATTERSIV4DF);
33927
33928 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33929 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33930 IX86_BUILTIN_SCATTERSIV2DF);
33931
33932 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33933 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33934 IX86_BUILTIN_SCATTERDIV8SF);
33935
33936 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33937 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33938 IX86_BUILTIN_SCATTERDIV4SF);
33939
33940 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33941 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33942 IX86_BUILTIN_SCATTERDIV4DF);
33943
33944 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33945 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33946 IX86_BUILTIN_SCATTERDIV2DF);
33947
33948 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33949 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33950 IX86_BUILTIN_SCATTERSIV8SI);
33951
33952 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33953 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33954 IX86_BUILTIN_SCATTERSIV4SI);
33955
33956 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33957 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33958 IX86_BUILTIN_SCATTERSIV4DI);
33959
33960 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33961 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33962 IX86_BUILTIN_SCATTERSIV2DI);
33963
33964 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33965 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33966 IX86_BUILTIN_SCATTERDIV8SI);
33967
33968 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33969 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33970 IX86_BUILTIN_SCATTERDIV4SI);
33971
33972 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33973 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33974 IX86_BUILTIN_SCATTERDIV4DI);
33975
33976 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33977 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33978 IX86_BUILTIN_SCATTERDIV2DI);
33979
33980 /* AVX512PF */
33981 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33982 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33983 IX86_BUILTIN_GATHERPFDPD);
33984 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33985 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33986 IX86_BUILTIN_GATHERPFDPS);
33987 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33988 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33989 IX86_BUILTIN_GATHERPFQPD);
33990 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33991 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33992 IX86_BUILTIN_GATHERPFQPS);
33993 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33994 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33995 IX86_BUILTIN_SCATTERPFDPD);
33996 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33997 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33998 IX86_BUILTIN_SCATTERPFDPS);
33999 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34000 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34001 IX86_BUILTIN_SCATTERPFQPD);
34002 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34003 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34004 IX86_BUILTIN_SCATTERPFQPS);
34005
34006 /* SHA */
34007 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34008 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34009 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34010 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34011 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34012 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34013 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34014 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34015 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34016 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34017 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34018 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34019 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34020 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34021
34022 /* RTM. */
34023 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34024 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34025
34026 /* MMX access to the vec_init patterns. */
34027 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34028 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34029
34030 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34031 V4HI_FTYPE_HI_HI_HI_HI,
34032 IX86_BUILTIN_VEC_INIT_V4HI);
34033
34034 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34035 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34036 IX86_BUILTIN_VEC_INIT_V8QI);
34037
34038 /* Access to the vec_extract patterns. */
34039 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34040 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34041 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34042 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34043 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34044 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34045 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34046 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34047 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34048 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34049
34050 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34051 "__builtin_ia32_vec_ext_v4hi",
34052 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34053
34054 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34055 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34056
34057 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34058 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34059
34060 /* Access to the vec_set patterns. */
34061 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34062 "__builtin_ia32_vec_set_v2di",
34063 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34064
34065 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34066 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34067
34068 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34069 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34070
34071 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34072 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34073
34074 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34075 "__builtin_ia32_vec_set_v4hi",
34076 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34077
34078 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34079 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34080
34081 /* RDSEED */
34082 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34083 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34084 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34085 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34086 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34087 "__builtin_ia32_rdseed_di_step",
34088 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34089
34090 /* ADCX */
34091 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34092 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34093 def_builtin (OPTION_MASK_ISA_64BIT,
34094 "__builtin_ia32_addcarryx_u64",
34095 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34096 IX86_BUILTIN_ADDCARRYX64);
34097
34098 /* SBB */
34099 def_builtin (0, "__builtin_ia32_sbb_u32",
34100 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34101 def_builtin (OPTION_MASK_ISA_64BIT,
34102 "__builtin_ia32_sbb_u64",
34103 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34104 IX86_BUILTIN_SBB64);
34105
34106 /* Read/write FLAGS. */
34107 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34108 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34109 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34110 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34111 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34112 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34113 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34114 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34115
34116 /* CLFLUSHOPT. */
34117 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34118 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34119
34120 /* CLWB. */
34121 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34122 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34123
34124 /* Add FMA4 multi-arg argument instructions */
34125 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34126 {
34127 if (d->name == 0)
34128 continue;
34129
34130 ftype = (enum ix86_builtin_func_type) d->flag;
34131 def_builtin_const (d->mask, d->name, ftype, d->code);
34132 }
34133 }
34134
34135 static void
34136 ix86_init_mpx_builtins ()
34137 {
34138 const struct builtin_description * d;
34139 enum ix86_builtin_func_type ftype;
34140 tree decl;
34141 size_t i;
34142
34143 for (i = 0, d = bdesc_mpx;
34144 i < ARRAY_SIZE (bdesc_mpx);
34145 i++, d++)
34146 {
34147 if (d->name == 0)
34148 continue;
34149
34150 ftype = (enum ix86_builtin_func_type) d->flag;
34151 decl = def_builtin (d->mask, d->name, ftype, d->code);
34152
34153 /* With no leaf and nothrow flags for MPX builtins
34154 abnormal edges may follow its call when setjmp
34155 presents in the function. Since we may have a lot
34156 of MPX builtins calls it causes lots of useless
34157 edges and enormous PHI nodes. To avoid this we mark
34158 MPX builtins as leaf and nothrow. */
34159 if (decl)
34160 {
34161 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34162 NULL_TREE);
34163 TREE_NOTHROW (decl) = 1;
34164 }
34165 else
34166 {
34167 ix86_builtins_isa[(int)d->code].leaf_p = true;
34168 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34169 }
34170 }
34171
34172 for (i = 0, d = bdesc_mpx_const;
34173 i < ARRAY_SIZE (bdesc_mpx_const);
34174 i++, d++)
34175 {
34176 if (d->name == 0)
34177 continue;
34178
34179 ftype = (enum ix86_builtin_func_type) d->flag;
34180 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34181
34182 if (decl)
34183 {
34184 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34185 NULL_TREE);
34186 TREE_NOTHROW (decl) = 1;
34187 }
34188 else
34189 {
34190 ix86_builtins_isa[(int)d->code].leaf_p = true;
34191 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34192 }
34193 }
34194 }
34195
34196 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34197 to return a pointer to VERSION_DECL if the outcome of the expression
34198 formed by PREDICATE_CHAIN is true. This function will be called during
34199 version dispatch to decide which function version to execute. It returns
34200 the basic block at the end, to which more conditions can be added. */
34201
34202 static basic_block
34203 add_condition_to_bb (tree function_decl, tree version_decl,
34204 tree predicate_chain, basic_block new_bb)
34205 {
34206 gimple return_stmt;
34207 tree convert_expr, result_var;
34208 gimple convert_stmt;
34209 gimple call_cond_stmt;
34210 gimple if_else_stmt;
34211
34212 basic_block bb1, bb2, bb3;
34213 edge e12, e23;
34214
34215 tree cond_var, and_expr_var = NULL_TREE;
34216 gimple_seq gseq;
34217
34218 tree predicate_decl, predicate_arg;
34219
34220 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34221
34222 gcc_assert (new_bb != NULL);
34223 gseq = bb_seq (new_bb);
34224
34225
34226 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34227 build_fold_addr_expr (version_decl));
34228 result_var = create_tmp_var (ptr_type_node);
34229 convert_stmt = gimple_build_assign (result_var, convert_expr);
34230 return_stmt = gimple_build_return (result_var);
34231
34232 if (predicate_chain == NULL_TREE)
34233 {
34234 gimple_seq_add_stmt (&gseq, convert_stmt);
34235 gimple_seq_add_stmt (&gseq, return_stmt);
34236 set_bb_seq (new_bb, gseq);
34237 gimple_set_bb (convert_stmt, new_bb);
34238 gimple_set_bb (return_stmt, new_bb);
34239 pop_cfun ();
34240 return new_bb;
34241 }
34242
34243 while (predicate_chain != NULL)
34244 {
34245 cond_var = create_tmp_var (integer_type_node);
34246 predicate_decl = TREE_PURPOSE (predicate_chain);
34247 predicate_arg = TREE_VALUE (predicate_chain);
34248 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34249 gimple_call_set_lhs (call_cond_stmt, cond_var);
34250
34251 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34252 gimple_set_bb (call_cond_stmt, new_bb);
34253 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34254
34255 predicate_chain = TREE_CHAIN (predicate_chain);
34256
34257 if (and_expr_var == NULL)
34258 and_expr_var = cond_var;
34259 else
34260 {
34261 gimple assign_stmt;
34262 /* Use MIN_EXPR to check if any integer is zero?.
34263 and_expr_var = min_expr <cond_var, and_expr_var> */
34264 assign_stmt = gimple_build_assign (and_expr_var,
34265 build2 (MIN_EXPR, integer_type_node,
34266 cond_var, and_expr_var));
34267
34268 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34269 gimple_set_bb (assign_stmt, new_bb);
34270 gimple_seq_add_stmt (&gseq, assign_stmt);
34271 }
34272 }
34273
34274 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34275 integer_zero_node,
34276 NULL_TREE, NULL_TREE);
34277 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34278 gimple_set_bb (if_else_stmt, new_bb);
34279 gimple_seq_add_stmt (&gseq, if_else_stmt);
34280
34281 gimple_seq_add_stmt (&gseq, convert_stmt);
34282 gimple_seq_add_stmt (&gseq, return_stmt);
34283 set_bb_seq (new_bb, gseq);
34284
34285 bb1 = new_bb;
34286 e12 = split_block (bb1, if_else_stmt);
34287 bb2 = e12->dest;
34288 e12->flags &= ~EDGE_FALLTHRU;
34289 e12->flags |= EDGE_TRUE_VALUE;
34290
34291 e23 = split_block (bb2, return_stmt);
34292
34293 gimple_set_bb (convert_stmt, bb2);
34294 gimple_set_bb (return_stmt, bb2);
34295
34296 bb3 = e23->dest;
34297 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34298
34299 remove_edge (e23);
34300 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34301
34302 pop_cfun ();
34303
34304 return bb3;
34305 }
34306
34307 /* This parses the attribute arguments to target in DECL and determines
34308 the right builtin to use to match the platform specification.
34309 It returns the priority value for this version decl. If PREDICATE_LIST
34310 is not NULL, it stores the list of cpu features that need to be checked
34311 before dispatching this function. */
34312
34313 static unsigned int
34314 get_builtin_code_for_version (tree decl, tree *predicate_list)
34315 {
34316 tree attrs;
34317 struct cl_target_option cur_target;
34318 tree target_node;
34319 struct cl_target_option *new_target;
34320 const char *arg_str = NULL;
34321 const char *attrs_str = NULL;
34322 char *tok_str = NULL;
34323 char *token;
34324
34325 /* Priority of i386 features, greater value is higher priority. This is
34326 used to decide the order in which function dispatch must happen. For
34327 instance, a version specialized for SSE4.2 should be checked for dispatch
34328 before a version for SSE3, as SSE4.2 implies SSE3. */
34329 enum feature_priority
34330 {
34331 P_ZERO = 0,
34332 P_MMX,
34333 P_SSE,
34334 P_SSE2,
34335 P_SSE3,
34336 P_SSSE3,
34337 P_PROC_SSSE3,
34338 P_SSE4_A,
34339 P_PROC_SSE4_A,
34340 P_SSE4_1,
34341 P_SSE4_2,
34342 P_PROC_SSE4_2,
34343 P_POPCNT,
34344 P_AVX,
34345 P_PROC_AVX,
34346 P_BMI,
34347 P_PROC_BMI,
34348 P_FMA4,
34349 P_XOP,
34350 P_PROC_XOP,
34351 P_FMA,
34352 P_PROC_FMA,
34353 P_BMI2,
34354 P_AVX2,
34355 P_PROC_AVX2,
34356 P_AVX512F,
34357 P_PROC_AVX512F
34358 };
34359
34360 enum feature_priority priority = P_ZERO;
34361
34362 /* These are the target attribute strings for which a dispatcher is
34363 available, from fold_builtin_cpu. */
34364
34365 static struct _feature_list
34366 {
34367 const char *const name;
34368 const enum feature_priority priority;
34369 }
34370 const feature_list[] =
34371 {
34372 {"mmx", P_MMX},
34373 {"sse", P_SSE},
34374 {"sse2", P_SSE2},
34375 {"sse3", P_SSE3},
34376 {"sse4a", P_SSE4_A},
34377 {"ssse3", P_SSSE3},
34378 {"sse4.1", P_SSE4_1},
34379 {"sse4.2", P_SSE4_2},
34380 {"popcnt", P_POPCNT},
34381 {"avx", P_AVX},
34382 {"bmi", P_BMI},
34383 {"fma4", P_FMA4},
34384 {"xop", P_XOP},
34385 {"fma", P_FMA},
34386 {"bmi2", P_BMI2},
34387 {"avx2", P_AVX2},
34388 {"avx512f", P_AVX512F}
34389 };
34390
34391
34392 static unsigned int NUM_FEATURES
34393 = sizeof (feature_list) / sizeof (struct _feature_list);
34394
34395 unsigned int i;
34396
34397 tree predicate_chain = NULL_TREE;
34398 tree predicate_decl, predicate_arg;
34399
34400 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34401 gcc_assert (attrs != NULL);
34402
34403 attrs = TREE_VALUE (TREE_VALUE (attrs));
34404
34405 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34406 attrs_str = TREE_STRING_POINTER (attrs);
34407
34408 /* Return priority zero for default function. */
34409 if (strcmp (attrs_str, "default") == 0)
34410 return 0;
34411
34412 /* Handle arch= if specified. For priority, set it to be 1 more than
34413 the best instruction set the processor can handle. For instance, if
34414 there is a version for atom and a version for ssse3 (the highest ISA
34415 priority for atom), the atom version must be checked for dispatch
34416 before the ssse3 version. */
34417 if (strstr (attrs_str, "arch=") != NULL)
34418 {
34419 cl_target_option_save (&cur_target, &global_options);
34420 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34421 &global_options_set);
34422
34423 gcc_assert (target_node);
34424 new_target = TREE_TARGET_OPTION (target_node);
34425 gcc_assert (new_target);
34426
34427 if (new_target->arch_specified && new_target->arch > 0)
34428 {
34429 switch (new_target->arch)
34430 {
34431 case PROCESSOR_CORE2:
34432 arg_str = "core2";
34433 priority = P_PROC_SSSE3;
34434 break;
34435 case PROCESSOR_NEHALEM:
34436 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34437 arg_str = "westmere";
34438 else
34439 /* We translate "arch=corei7" and "arch=nehalem" to
34440 "corei7" so that it will be mapped to M_INTEL_COREI7
34441 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34442 arg_str = "corei7";
34443 priority = P_PROC_SSE4_2;
34444 break;
34445 case PROCESSOR_SANDYBRIDGE:
34446 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34447 arg_str = "ivybridge";
34448 else
34449 arg_str = "sandybridge";
34450 priority = P_PROC_AVX;
34451 break;
34452 case PROCESSOR_HASWELL:
34453 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34454 arg_str = "broadwell";
34455 else
34456 arg_str = "haswell";
34457 priority = P_PROC_AVX2;
34458 break;
34459 case PROCESSOR_BONNELL:
34460 arg_str = "bonnell";
34461 priority = P_PROC_SSSE3;
34462 break;
34463 case PROCESSOR_KNL:
34464 arg_str = "knl";
34465 priority = P_PROC_AVX512F;
34466 break;
34467 case PROCESSOR_SILVERMONT:
34468 arg_str = "silvermont";
34469 priority = P_PROC_SSE4_2;
34470 break;
34471 case PROCESSOR_AMDFAM10:
34472 arg_str = "amdfam10h";
34473 priority = P_PROC_SSE4_A;
34474 break;
34475 case PROCESSOR_BTVER1:
34476 arg_str = "btver1";
34477 priority = P_PROC_SSE4_A;
34478 break;
34479 case PROCESSOR_BTVER2:
34480 arg_str = "btver2";
34481 priority = P_PROC_BMI;
34482 break;
34483 case PROCESSOR_BDVER1:
34484 arg_str = "bdver1";
34485 priority = P_PROC_XOP;
34486 break;
34487 case PROCESSOR_BDVER2:
34488 arg_str = "bdver2";
34489 priority = P_PROC_FMA;
34490 break;
34491 case PROCESSOR_BDVER3:
34492 arg_str = "bdver3";
34493 priority = P_PROC_FMA;
34494 break;
34495 case PROCESSOR_BDVER4:
34496 arg_str = "bdver4";
34497 priority = P_PROC_AVX2;
34498 break;
34499 }
34500 }
34501
34502 cl_target_option_restore (&global_options, &cur_target);
34503
34504 if (predicate_list && arg_str == NULL)
34505 {
34506 error_at (DECL_SOURCE_LOCATION (decl),
34507 "No dispatcher found for the versioning attributes");
34508 return 0;
34509 }
34510
34511 if (predicate_list)
34512 {
34513 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34514 /* For a C string literal the length includes the trailing NULL. */
34515 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34516 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34517 predicate_chain);
34518 }
34519 }
34520
34521 /* Process feature name. */
34522 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34523 strcpy (tok_str, attrs_str);
34524 token = strtok (tok_str, ",");
34525 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34526
34527 while (token != NULL)
34528 {
34529 /* Do not process "arch=" */
34530 if (strncmp (token, "arch=", 5) == 0)
34531 {
34532 token = strtok (NULL, ",");
34533 continue;
34534 }
34535 for (i = 0; i < NUM_FEATURES; ++i)
34536 {
34537 if (strcmp (token, feature_list[i].name) == 0)
34538 {
34539 if (predicate_list)
34540 {
34541 predicate_arg = build_string_literal (
34542 strlen (feature_list[i].name) + 1,
34543 feature_list[i].name);
34544 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34545 predicate_chain);
34546 }
34547 /* Find the maximum priority feature. */
34548 if (feature_list[i].priority > priority)
34549 priority = feature_list[i].priority;
34550
34551 break;
34552 }
34553 }
34554 if (predicate_list && i == NUM_FEATURES)
34555 {
34556 error_at (DECL_SOURCE_LOCATION (decl),
34557 "No dispatcher found for %s", token);
34558 return 0;
34559 }
34560 token = strtok (NULL, ",");
34561 }
34562 free (tok_str);
34563
34564 if (predicate_list && predicate_chain == NULL_TREE)
34565 {
34566 error_at (DECL_SOURCE_LOCATION (decl),
34567 "No dispatcher found for the versioning attributes : %s",
34568 attrs_str);
34569 return 0;
34570 }
34571 else if (predicate_list)
34572 {
34573 predicate_chain = nreverse (predicate_chain);
34574 *predicate_list = predicate_chain;
34575 }
34576
34577 return priority;
34578 }
34579
34580 /* This compares the priority of target features in function DECL1
34581 and DECL2. It returns positive value if DECL1 is higher priority,
34582 negative value if DECL2 is higher priority and 0 if they are the
34583 same. */
34584
34585 static int
34586 ix86_compare_version_priority (tree decl1, tree decl2)
34587 {
34588 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34589 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34590
34591 return (int)priority1 - (int)priority2;
34592 }
34593
34594 /* V1 and V2 point to function versions with different priorities
34595 based on the target ISA. This function compares their priorities. */
34596
34597 static int
34598 feature_compare (const void *v1, const void *v2)
34599 {
34600 typedef struct _function_version_info
34601 {
34602 tree version_decl;
34603 tree predicate_chain;
34604 unsigned int dispatch_priority;
34605 } function_version_info;
34606
34607 const function_version_info c1 = *(const function_version_info *)v1;
34608 const function_version_info c2 = *(const function_version_info *)v2;
34609 return (c2.dispatch_priority - c1.dispatch_priority);
34610 }
34611
34612 /* This function generates the dispatch function for
34613 multi-versioned functions. DISPATCH_DECL is the function which will
34614 contain the dispatch logic. FNDECLS are the function choices for
34615 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34616 in DISPATCH_DECL in which the dispatch code is generated. */
34617
34618 static int
34619 dispatch_function_versions (tree dispatch_decl,
34620 void *fndecls_p,
34621 basic_block *empty_bb)
34622 {
34623 tree default_decl;
34624 gimple ifunc_cpu_init_stmt;
34625 gimple_seq gseq;
34626 int ix;
34627 tree ele;
34628 vec<tree> *fndecls;
34629 unsigned int num_versions = 0;
34630 unsigned int actual_versions = 0;
34631 unsigned int i;
34632
34633 struct _function_version_info
34634 {
34635 tree version_decl;
34636 tree predicate_chain;
34637 unsigned int dispatch_priority;
34638 }*function_version_info;
34639
34640 gcc_assert (dispatch_decl != NULL
34641 && fndecls_p != NULL
34642 && empty_bb != NULL);
34643
34644 /*fndecls_p is actually a vector. */
34645 fndecls = static_cast<vec<tree> *> (fndecls_p);
34646
34647 /* At least one more version other than the default. */
34648 num_versions = fndecls->length ();
34649 gcc_assert (num_versions >= 2);
34650
34651 function_version_info = (struct _function_version_info *)
34652 XNEWVEC (struct _function_version_info, (num_versions - 1));
34653
34654 /* The first version in the vector is the default decl. */
34655 default_decl = (*fndecls)[0];
34656
34657 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34658
34659 gseq = bb_seq (*empty_bb);
34660 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34661 constructors, so explicity call __builtin_cpu_init here. */
34662 ifunc_cpu_init_stmt = gimple_build_call_vec (
34663 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34664 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34665 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34666 set_bb_seq (*empty_bb, gseq);
34667
34668 pop_cfun ();
34669
34670
34671 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34672 {
34673 tree version_decl = ele;
34674 tree predicate_chain = NULL_TREE;
34675 unsigned int priority;
34676 /* Get attribute string, parse it and find the right predicate decl.
34677 The predicate function could be a lengthy combination of many
34678 features, like arch-type and various isa-variants. */
34679 priority = get_builtin_code_for_version (version_decl,
34680 &predicate_chain);
34681
34682 if (predicate_chain == NULL_TREE)
34683 continue;
34684
34685 function_version_info [actual_versions].version_decl = version_decl;
34686 function_version_info [actual_versions].predicate_chain
34687 = predicate_chain;
34688 function_version_info [actual_versions].dispatch_priority = priority;
34689 actual_versions++;
34690 }
34691
34692 /* Sort the versions according to descending order of dispatch priority. The
34693 priority is based on the ISA. This is not a perfect solution. There
34694 could still be ambiguity. If more than one function version is suitable
34695 to execute, which one should be dispatched? In future, allow the user
34696 to specify a dispatch priority next to the version. */
34697 qsort (function_version_info, actual_versions,
34698 sizeof (struct _function_version_info), feature_compare);
34699
34700 for (i = 0; i < actual_versions; ++i)
34701 *empty_bb = add_condition_to_bb (dispatch_decl,
34702 function_version_info[i].version_decl,
34703 function_version_info[i].predicate_chain,
34704 *empty_bb);
34705
34706 /* dispatch default version at the end. */
34707 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34708 NULL, *empty_bb);
34709
34710 free (function_version_info);
34711 return 0;
34712 }
34713
34714 /* Comparator function to be used in qsort routine to sort attribute
34715 specification strings to "target". */
34716
34717 static int
34718 attr_strcmp (const void *v1, const void *v2)
34719 {
34720 const char *c1 = *(char *const*)v1;
34721 const char *c2 = *(char *const*)v2;
34722 return strcmp (c1, c2);
34723 }
34724
34725 /* ARGLIST is the argument to target attribute. This function tokenizes
34726 the comma separated arguments, sorts them and returns a string which
34727 is a unique identifier for the comma separated arguments. It also
34728 replaces non-identifier characters "=,-" with "_". */
34729
34730 static char *
34731 sorted_attr_string (tree arglist)
34732 {
34733 tree arg;
34734 size_t str_len_sum = 0;
34735 char **args = NULL;
34736 char *attr_str, *ret_str;
34737 char *attr = NULL;
34738 unsigned int argnum = 1;
34739 unsigned int i;
34740
34741 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34742 {
34743 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34744 size_t len = strlen (str);
34745 str_len_sum += len + 1;
34746 if (arg != arglist)
34747 argnum++;
34748 for (i = 0; i < strlen (str); i++)
34749 if (str[i] == ',')
34750 argnum++;
34751 }
34752
34753 attr_str = XNEWVEC (char, str_len_sum);
34754 str_len_sum = 0;
34755 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34756 {
34757 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34758 size_t len = strlen (str);
34759 memcpy (attr_str + str_len_sum, str, len);
34760 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34761 str_len_sum += len + 1;
34762 }
34763
34764 /* Replace "=,-" with "_". */
34765 for (i = 0; i < strlen (attr_str); i++)
34766 if (attr_str[i] == '=' || attr_str[i]== '-')
34767 attr_str[i] = '_';
34768
34769 if (argnum == 1)
34770 return attr_str;
34771
34772 args = XNEWVEC (char *, argnum);
34773
34774 i = 0;
34775 attr = strtok (attr_str, ",");
34776 while (attr != NULL)
34777 {
34778 args[i] = attr;
34779 i++;
34780 attr = strtok (NULL, ",");
34781 }
34782
34783 qsort (args, argnum, sizeof (char *), attr_strcmp);
34784
34785 ret_str = XNEWVEC (char, str_len_sum);
34786 str_len_sum = 0;
34787 for (i = 0; i < argnum; i++)
34788 {
34789 size_t len = strlen (args[i]);
34790 memcpy (ret_str + str_len_sum, args[i], len);
34791 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34792 str_len_sum += len + 1;
34793 }
34794
34795 XDELETEVEC (args);
34796 XDELETEVEC (attr_str);
34797 return ret_str;
34798 }
34799
34800 /* This function changes the assembler name for functions that are
34801 versions. If DECL is a function version and has a "target"
34802 attribute, it appends the attribute string to its assembler name. */
34803
34804 static tree
34805 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34806 {
34807 tree version_attr;
34808 const char *orig_name, *version_string;
34809 char *attr_str, *assembler_name;
34810
34811 if (DECL_DECLARED_INLINE_P (decl)
34812 && lookup_attribute ("gnu_inline",
34813 DECL_ATTRIBUTES (decl)))
34814 error_at (DECL_SOURCE_LOCATION (decl),
34815 "Function versions cannot be marked as gnu_inline,"
34816 " bodies have to be generated");
34817
34818 if (DECL_VIRTUAL_P (decl)
34819 || DECL_VINDEX (decl))
34820 sorry ("Virtual function multiversioning not supported");
34821
34822 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34823
34824 /* target attribute string cannot be NULL. */
34825 gcc_assert (version_attr != NULL_TREE);
34826
34827 orig_name = IDENTIFIER_POINTER (id);
34828 version_string
34829 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34830
34831 if (strcmp (version_string, "default") == 0)
34832 return id;
34833
34834 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34835 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34836
34837 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34838
34839 /* Allow assembler name to be modified if already set. */
34840 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34841 SET_DECL_RTL (decl, NULL);
34842
34843 tree ret = get_identifier (assembler_name);
34844 XDELETEVEC (attr_str);
34845 XDELETEVEC (assembler_name);
34846 return ret;
34847 }
34848
34849 /* This function returns true if FN1 and FN2 are versions of the same function,
34850 that is, the target strings of the function decls are different. This assumes
34851 that FN1 and FN2 have the same signature. */
34852
34853 static bool
34854 ix86_function_versions (tree fn1, tree fn2)
34855 {
34856 tree attr1, attr2;
34857 char *target1, *target2;
34858 bool result;
34859
34860 if (TREE_CODE (fn1) != FUNCTION_DECL
34861 || TREE_CODE (fn2) != FUNCTION_DECL)
34862 return false;
34863
34864 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34865 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34866
34867 /* At least one function decl should have the target attribute specified. */
34868 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34869 return false;
34870
34871 /* Diagnose missing target attribute if one of the decls is already
34872 multi-versioned. */
34873 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34874 {
34875 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34876 {
34877 if (attr2 != NULL_TREE)
34878 {
34879 tree tem = fn1;
34880 fn1 = fn2;
34881 fn2 = tem;
34882 attr1 = attr2;
34883 }
34884 error_at (DECL_SOURCE_LOCATION (fn2),
34885 "missing %<target%> attribute for multi-versioned %D",
34886 fn2);
34887 inform (DECL_SOURCE_LOCATION (fn1),
34888 "previous declaration of %D", fn1);
34889 /* Prevent diagnosing of the same error multiple times. */
34890 DECL_ATTRIBUTES (fn2)
34891 = tree_cons (get_identifier ("target"),
34892 copy_node (TREE_VALUE (attr1)),
34893 DECL_ATTRIBUTES (fn2));
34894 }
34895 return false;
34896 }
34897
34898 target1 = sorted_attr_string (TREE_VALUE (attr1));
34899 target2 = sorted_attr_string (TREE_VALUE (attr2));
34900
34901 /* The sorted target strings must be different for fn1 and fn2
34902 to be versions. */
34903 if (strcmp (target1, target2) == 0)
34904 result = false;
34905 else
34906 result = true;
34907
34908 XDELETEVEC (target1);
34909 XDELETEVEC (target2);
34910
34911 return result;
34912 }
34913
34914 static tree
34915 ix86_mangle_decl_assembler_name (tree decl, tree id)
34916 {
34917 /* For function version, add the target suffix to the assembler name. */
34918 if (TREE_CODE (decl) == FUNCTION_DECL
34919 && DECL_FUNCTION_VERSIONED (decl))
34920 id = ix86_mangle_function_version_assembler_name (decl, id);
34921 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34922 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34923 #endif
34924
34925 return id;
34926 }
34927
34928 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34929 is true, append the full path name of the source file. */
34930
34931 static char *
34932 make_name (tree decl, const char *suffix, bool make_unique)
34933 {
34934 char *global_var_name;
34935 int name_len;
34936 const char *name;
34937 const char *unique_name = NULL;
34938
34939 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34940
34941 /* Get a unique name that can be used globally without any chances
34942 of collision at link time. */
34943 if (make_unique)
34944 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34945
34946 name_len = strlen (name) + strlen (suffix) + 2;
34947
34948 if (make_unique)
34949 name_len += strlen (unique_name) + 1;
34950 global_var_name = XNEWVEC (char, name_len);
34951
34952 /* Use '.' to concatenate names as it is demangler friendly. */
34953 if (make_unique)
34954 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34955 suffix);
34956 else
34957 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34958
34959 return global_var_name;
34960 }
34961
34962 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34963
34964 /* Make a dispatcher declaration for the multi-versioned function DECL.
34965 Calls to DECL function will be replaced with calls to the dispatcher
34966 by the front-end. Return the decl created. */
34967
34968 static tree
34969 make_dispatcher_decl (const tree decl)
34970 {
34971 tree func_decl;
34972 char *func_name;
34973 tree fn_type, func_type;
34974 bool is_uniq = false;
34975
34976 if (TREE_PUBLIC (decl) == 0)
34977 is_uniq = true;
34978
34979 func_name = make_name (decl, "ifunc", is_uniq);
34980
34981 fn_type = TREE_TYPE (decl);
34982 func_type = build_function_type (TREE_TYPE (fn_type),
34983 TYPE_ARG_TYPES (fn_type));
34984
34985 func_decl = build_fn_decl (func_name, func_type);
34986 XDELETEVEC (func_name);
34987 TREE_USED (func_decl) = 1;
34988 DECL_CONTEXT (func_decl) = NULL_TREE;
34989 DECL_INITIAL (func_decl) = error_mark_node;
34990 DECL_ARTIFICIAL (func_decl) = 1;
34991 /* Mark this func as external, the resolver will flip it again if
34992 it gets generated. */
34993 DECL_EXTERNAL (func_decl) = 1;
34994 /* This will be of type IFUNCs have to be externally visible. */
34995 TREE_PUBLIC (func_decl) = 1;
34996
34997 return func_decl;
34998 }
34999
35000 #endif
35001
35002 /* Returns true if decl is multi-versioned and DECL is the default function,
35003 that is it is not tagged with target specific optimization. */
35004
35005 static bool
35006 is_function_default_version (const tree decl)
35007 {
35008 if (TREE_CODE (decl) != FUNCTION_DECL
35009 || !DECL_FUNCTION_VERSIONED (decl))
35010 return false;
35011 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35012 gcc_assert (attr);
35013 attr = TREE_VALUE (TREE_VALUE (attr));
35014 return (TREE_CODE (attr) == STRING_CST
35015 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35016 }
35017
35018 /* Make a dispatcher declaration for the multi-versioned function DECL.
35019 Calls to DECL function will be replaced with calls to the dispatcher
35020 by the front-end. Returns the decl of the dispatcher function. */
35021
35022 static tree
35023 ix86_get_function_versions_dispatcher (void *decl)
35024 {
35025 tree fn = (tree) decl;
35026 struct cgraph_node *node = NULL;
35027 struct cgraph_node *default_node = NULL;
35028 struct cgraph_function_version_info *node_v = NULL;
35029 struct cgraph_function_version_info *first_v = NULL;
35030
35031 tree dispatch_decl = NULL;
35032
35033 struct cgraph_function_version_info *default_version_info = NULL;
35034
35035 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35036
35037 node = cgraph_node::get (fn);
35038 gcc_assert (node != NULL);
35039
35040 node_v = node->function_version ();
35041 gcc_assert (node_v != NULL);
35042
35043 if (node_v->dispatcher_resolver != NULL)
35044 return node_v->dispatcher_resolver;
35045
35046 /* Find the default version and make it the first node. */
35047 first_v = node_v;
35048 /* Go to the beginning of the chain. */
35049 while (first_v->prev != NULL)
35050 first_v = first_v->prev;
35051 default_version_info = first_v;
35052 while (default_version_info != NULL)
35053 {
35054 if (is_function_default_version
35055 (default_version_info->this_node->decl))
35056 break;
35057 default_version_info = default_version_info->next;
35058 }
35059
35060 /* If there is no default node, just return NULL. */
35061 if (default_version_info == NULL)
35062 return NULL;
35063
35064 /* Make default info the first node. */
35065 if (first_v != default_version_info)
35066 {
35067 default_version_info->prev->next = default_version_info->next;
35068 if (default_version_info->next)
35069 default_version_info->next->prev = default_version_info->prev;
35070 first_v->prev = default_version_info;
35071 default_version_info->next = first_v;
35072 default_version_info->prev = NULL;
35073 }
35074
35075 default_node = default_version_info->this_node;
35076
35077 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35078 if (targetm.has_ifunc_p ())
35079 {
35080 struct cgraph_function_version_info *it_v = NULL;
35081 struct cgraph_node *dispatcher_node = NULL;
35082 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35083
35084 /* Right now, the dispatching is done via ifunc. */
35085 dispatch_decl = make_dispatcher_decl (default_node->decl);
35086
35087 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35088 gcc_assert (dispatcher_node != NULL);
35089 dispatcher_node->dispatcher_function = 1;
35090 dispatcher_version_info
35091 = dispatcher_node->insert_new_function_version ();
35092 dispatcher_version_info->next = default_version_info;
35093 dispatcher_node->definition = 1;
35094
35095 /* Set the dispatcher for all the versions. */
35096 it_v = default_version_info;
35097 while (it_v != NULL)
35098 {
35099 it_v->dispatcher_resolver = dispatch_decl;
35100 it_v = it_v->next;
35101 }
35102 }
35103 else
35104 #endif
35105 {
35106 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35107 "multiversioning needs ifunc which is not supported "
35108 "on this target");
35109 }
35110
35111 return dispatch_decl;
35112 }
35113
35114 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35115 it to CHAIN. */
35116
35117 static tree
35118 make_attribute (const char *name, const char *arg_name, tree chain)
35119 {
35120 tree attr_name;
35121 tree attr_arg_name;
35122 tree attr_args;
35123 tree attr;
35124
35125 attr_name = get_identifier (name);
35126 attr_arg_name = build_string (strlen (arg_name), arg_name);
35127 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35128 attr = tree_cons (attr_name, attr_args, chain);
35129 return attr;
35130 }
35131
35132 /* Make the resolver function decl to dispatch the versions of
35133 a multi-versioned function, DEFAULT_DECL. Create an
35134 empty basic block in the resolver and store the pointer in
35135 EMPTY_BB. Return the decl of the resolver function. */
35136
35137 static tree
35138 make_resolver_func (const tree default_decl,
35139 const tree dispatch_decl,
35140 basic_block *empty_bb)
35141 {
35142 char *resolver_name;
35143 tree decl, type, decl_name, t;
35144 bool is_uniq = false;
35145
35146 /* IFUNC's have to be globally visible. So, if the default_decl is
35147 not, then the name of the IFUNC should be made unique. */
35148 if (TREE_PUBLIC (default_decl) == 0)
35149 is_uniq = true;
35150
35151 /* Append the filename to the resolver function if the versions are
35152 not externally visible. This is because the resolver function has
35153 to be externally visible for the loader to find it. So, appending
35154 the filename will prevent conflicts with a resolver function from
35155 another module which is based on the same version name. */
35156 resolver_name = make_name (default_decl, "resolver", is_uniq);
35157
35158 /* The resolver function should return a (void *). */
35159 type = build_function_type_list (ptr_type_node, NULL_TREE);
35160
35161 decl = build_fn_decl (resolver_name, type);
35162 decl_name = get_identifier (resolver_name);
35163 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35164
35165 DECL_NAME (decl) = decl_name;
35166 TREE_USED (decl) = 1;
35167 DECL_ARTIFICIAL (decl) = 1;
35168 DECL_IGNORED_P (decl) = 0;
35169 /* IFUNC resolvers have to be externally visible. */
35170 TREE_PUBLIC (decl) = 1;
35171 DECL_UNINLINABLE (decl) = 1;
35172
35173 /* Resolver is not external, body is generated. */
35174 DECL_EXTERNAL (decl) = 0;
35175 DECL_EXTERNAL (dispatch_decl) = 0;
35176
35177 DECL_CONTEXT (decl) = NULL_TREE;
35178 DECL_INITIAL (decl) = make_node (BLOCK);
35179 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35180
35181 if (DECL_COMDAT_GROUP (default_decl)
35182 || TREE_PUBLIC (default_decl))
35183 {
35184 /* In this case, each translation unit with a call to this
35185 versioned function will put out a resolver. Ensure it
35186 is comdat to keep just one copy. */
35187 DECL_COMDAT (decl) = 1;
35188 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35189 }
35190 /* Build result decl and add to function_decl. */
35191 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35192 DECL_ARTIFICIAL (t) = 1;
35193 DECL_IGNORED_P (t) = 1;
35194 DECL_RESULT (decl) = t;
35195
35196 gimplify_function_tree (decl);
35197 push_cfun (DECL_STRUCT_FUNCTION (decl));
35198 *empty_bb = init_lowered_empty_function (decl, false, 0);
35199
35200 cgraph_node::add_new_function (decl, true);
35201 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35202
35203 pop_cfun ();
35204
35205 gcc_assert (dispatch_decl != NULL);
35206 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35207 DECL_ATTRIBUTES (dispatch_decl)
35208 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35209
35210 /* Create the alias for dispatch to resolver here. */
35211 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35212 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35213 XDELETEVEC (resolver_name);
35214 return decl;
35215 }
35216
35217 /* Generate the dispatching code body to dispatch multi-versioned function
35218 DECL. The target hook is called to process the "target" attributes and
35219 provide the code to dispatch the right function at run-time. NODE points
35220 to the dispatcher decl whose body will be created. */
35221
35222 static tree
35223 ix86_generate_version_dispatcher_body (void *node_p)
35224 {
35225 tree resolver_decl;
35226 basic_block empty_bb;
35227 tree default_ver_decl;
35228 struct cgraph_node *versn;
35229 struct cgraph_node *node;
35230
35231 struct cgraph_function_version_info *node_version_info = NULL;
35232 struct cgraph_function_version_info *versn_info = NULL;
35233
35234 node = (cgraph_node *)node_p;
35235
35236 node_version_info = node->function_version ();
35237 gcc_assert (node->dispatcher_function
35238 && node_version_info != NULL);
35239
35240 if (node_version_info->dispatcher_resolver)
35241 return node_version_info->dispatcher_resolver;
35242
35243 /* The first version in the chain corresponds to the default version. */
35244 default_ver_decl = node_version_info->next->this_node->decl;
35245
35246 /* node is going to be an alias, so remove the finalized bit. */
35247 node->definition = false;
35248
35249 resolver_decl = make_resolver_func (default_ver_decl,
35250 node->decl, &empty_bb);
35251
35252 node_version_info->dispatcher_resolver = resolver_decl;
35253
35254 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35255
35256 auto_vec<tree, 2> fn_ver_vec;
35257
35258 for (versn_info = node_version_info->next; versn_info;
35259 versn_info = versn_info->next)
35260 {
35261 versn = versn_info->this_node;
35262 /* Check for virtual functions here again, as by this time it should
35263 have been determined if this function needs a vtable index or
35264 not. This happens for methods in derived classes that override
35265 virtual methods in base classes but are not explicitly marked as
35266 virtual. */
35267 if (DECL_VINDEX (versn->decl))
35268 sorry ("Virtual function multiversioning not supported");
35269
35270 fn_ver_vec.safe_push (versn->decl);
35271 }
35272
35273 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35274 cgraph_edge::rebuild_edges ();
35275 pop_cfun ();
35276 return resolver_decl;
35277 }
35278 /* This builds the processor_model struct type defined in
35279 libgcc/config/i386/cpuinfo.c */
35280
35281 static tree
35282 build_processor_model_struct (void)
35283 {
35284 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35285 "__cpu_features"};
35286 tree field = NULL_TREE, field_chain = NULL_TREE;
35287 int i;
35288 tree type = make_node (RECORD_TYPE);
35289
35290 /* The first 3 fields are unsigned int. */
35291 for (i = 0; i < 3; ++i)
35292 {
35293 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35294 get_identifier (field_name[i]), unsigned_type_node);
35295 if (field_chain != NULL_TREE)
35296 DECL_CHAIN (field) = field_chain;
35297 field_chain = field;
35298 }
35299
35300 /* The last field is an array of unsigned integers of size one. */
35301 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35302 get_identifier (field_name[3]),
35303 build_array_type (unsigned_type_node,
35304 build_index_type (size_one_node)));
35305 if (field_chain != NULL_TREE)
35306 DECL_CHAIN (field) = field_chain;
35307 field_chain = field;
35308
35309 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35310 return type;
35311 }
35312
35313 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35314
35315 static tree
35316 make_var_decl (tree type, const char *name)
35317 {
35318 tree new_decl;
35319
35320 new_decl = build_decl (UNKNOWN_LOCATION,
35321 VAR_DECL,
35322 get_identifier(name),
35323 type);
35324
35325 DECL_EXTERNAL (new_decl) = 1;
35326 TREE_STATIC (new_decl) = 1;
35327 TREE_PUBLIC (new_decl) = 1;
35328 DECL_INITIAL (new_decl) = 0;
35329 DECL_ARTIFICIAL (new_decl) = 0;
35330 DECL_PRESERVE_P (new_decl) = 1;
35331
35332 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35333 assemble_variable (new_decl, 0, 0, 0);
35334
35335 return new_decl;
35336 }
35337
35338 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35339 into an integer defined in libgcc/config/i386/cpuinfo.c */
35340
35341 static tree
35342 fold_builtin_cpu (tree fndecl, tree *args)
35343 {
35344 unsigned int i;
35345 enum ix86_builtins fn_code = (enum ix86_builtins)
35346 DECL_FUNCTION_CODE (fndecl);
35347 tree param_string_cst = NULL;
35348
35349 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35350 enum processor_features
35351 {
35352 F_CMOV = 0,
35353 F_MMX,
35354 F_POPCNT,
35355 F_SSE,
35356 F_SSE2,
35357 F_SSE3,
35358 F_SSSE3,
35359 F_SSE4_1,
35360 F_SSE4_2,
35361 F_AVX,
35362 F_AVX2,
35363 F_SSE4_A,
35364 F_FMA4,
35365 F_XOP,
35366 F_FMA,
35367 F_AVX512F,
35368 F_BMI,
35369 F_BMI2,
35370 F_MAX
35371 };
35372
35373 /* These are the values for vendor types and cpu types and subtypes
35374 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35375 the corresponding start value. */
35376 enum processor_model
35377 {
35378 M_INTEL = 1,
35379 M_AMD,
35380 M_CPU_TYPE_START,
35381 M_INTEL_BONNELL,
35382 M_INTEL_CORE2,
35383 M_INTEL_COREI7,
35384 M_AMDFAM10H,
35385 M_AMDFAM15H,
35386 M_INTEL_SILVERMONT,
35387 M_INTEL_KNL,
35388 M_AMD_BTVER1,
35389 M_AMD_BTVER2,
35390 M_CPU_SUBTYPE_START,
35391 M_INTEL_COREI7_NEHALEM,
35392 M_INTEL_COREI7_WESTMERE,
35393 M_INTEL_COREI7_SANDYBRIDGE,
35394 M_AMDFAM10H_BARCELONA,
35395 M_AMDFAM10H_SHANGHAI,
35396 M_AMDFAM10H_ISTANBUL,
35397 M_AMDFAM15H_BDVER1,
35398 M_AMDFAM15H_BDVER2,
35399 M_AMDFAM15H_BDVER3,
35400 M_AMDFAM15H_BDVER4,
35401 M_INTEL_COREI7_IVYBRIDGE,
35402 M_INTEL_COREI7_HASWELL,
35403 M_INTEL_COREI7_BROADWELL
35404 };
35405
35406 static struct _arch_names_table
35407 {
35408 const char *const name;
35409 const enum processor_model model;
35410 }
35411 const arch_names_table[] =
35412 {
35413 {"amd", M_AMD},
35414 {"intel", M_INTEL},
35415 {"atom", M_INTEL_BONNELL},
35416 {"slm", M_INTEL_SILVERMONT},
35417 {"core2", M_INTEL_CORE2},
35418 {"corei7", M_INTEL_COREI7},
35419 {"nehalem", M_INTEL_COREI7_NEHALEM},
35420 {"westmere", M_INTEL_COREI7_WESTMERE},
35421 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35422 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35423 {"haswell", M_INTEL_COREI7_HASWELL},
35424 {"broadwell", M_INTEL_COREI7_BROADWELL},
35425 {"bonnell", M_INTEL_BONNELL},
35426 {"silvermont", M_INTEL_SILVERMONT},
35427 {"knl", M_INTEL_KNL},
35428 {"amdfam10h", M_AMDFAM10H},
35429 {"barcelona", M_AMDFAM10H_BARCELONA},
35430 {"shanghai", M_AMDFAM10H_SHANGHAI},
35431 {"istanbul", M_AMDFAM10H_ISTANBUL},
35432 {"btver1", M_AMD_BTVER1},
35433 {"amdfam15h", M_AMDFAM15H},
35434 {"bdver1", M_AMDFAM15H_BDVER1},
35435 {"bdver2", M_AMDFAM15H_BDVER2},
35436 {"bdver3", M_AMDFAM15H_BDVER3},
35437 {"bdver4", M_AMDFAM15H_BDVER4},
35438 {"btver2", M_AMD_BTVER2},
35439 };
35440
35441 static struct _isa_names_table
35442 {
35443 const char *const name;
35444 const enum processor_features feature;
35445 }
35446 const isa_names_table[] =
35447 {
35448 {"cmov", F_CMOV},
35449 {"mmx", F_MMX},
35450 {"popcnt", F_POPCNT},
35451 {"sse", F_SSE},
35452 {"sse2", F_SSE2},
35453 {"sse3", F_SSE3},
35454 {"ssse3", F_SSSE3},
35455 {"sse4a", F_SSE4_A},
35456 {"sse4.1", F_SSE4_1},
35457 {"sse4.2", F_SSE4_2},
35458 {"avx", F_AVX},
35459 {"fma4", F_FMA4},
35460 {"xop", F_XOP},
35461 {"fma", F_FMA},
35462 {"avx2", F_AVX2},
35463 {"avx512f",F_AVX512F},
35464 {"bmi", F_BMI},
35465 {"bmi2", F_BMI2}
35466 };
35467
35468 tree __processor_model_type = build_processor_model_struct ();
35469 tree __cpu_model_var = make_var_decl (__processor_model_type,
35470 "__cpu_model");
35471
35472
35473 varpool_node::add (__cpu_model_var);
35474
35475 gcc_assert ((args != NULL) && (*args != NULL));
35476
35477 param_string_cst = *args;
35478 while (param_string_cst
35479 && TREE_CODE (param_string_cst) != STRING_CST)
35480 {
35481 /* *args must be a expr that can contain other EXPRS leading to a
35482 STRING_CST. */
35483 if (!EXPR_P (param_string_cst))
35484 {
35485 error ("Parameter to builtin must be a string constant or literal");
35486 return integer_zero_node;
35487 }
35488 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35489 }
35490
35491 gcc_assert (param_string_cst);
35492
35493 if (fn_code == IX86_BUILTIN_CPU_IS)
35494 {
35495 tree ref;
35496 tree field;
35497 tree final;
35498
35499 unsigned int field_val = 0;
35500 unsigned int NUM_ARCH_NAMES
35501 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35502
35503 for (i = 0; i < NUM_ARCH_NAMES; i++)
35504 if (strcmp (arch_names_table[i].name,
35505 TREE_STRING_POINTER (param_string_cst)) == 0)
35506 break;
35507
35508 if (i == NUM_ARCH_NAMES)
35509 {
35510 error ("Parameter to builtin not valid: %s",
35511 TREE_STRING_POINTER (param_string_cst));
35512 return integer_zero_node;
35513 }
35514
35515 field = TYPE_FIELDS (__processor_model_type);
35516 field_val = arch_names_table[i].model;
35517
35518 /* CPU types are stored in the next field. */
35519 if (field_val > M_CPU_TYPE_START
35520 && field_val < M_CPU_SUBTYPE_START)
35521 {
35522 field = DECL_CHAIN (field);
35523 field_val -= M_CPU_TYPE_START;
35524 }
35525
35526 /* CPU subtypes are stored in the next field. */
35527 if (field_val > M_CPU_SUBTYPE_START)
35528 {
35529 field = DECL_CHAIN ( DECL_CHAIN (field));
35530 field_val -= M_CPU_SUBTYPE_START;
35531 }
35532
35533 /* Get the appropriate field in __cpu_model. */
35534 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35535 field, NULL_TREE);
35536
35537 /* Check the value. */
35538 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35539 build_int_cstu (unsigned_type_node, field_val));
35540 return build1 (CONVERT_EXPR, integer_type_node, final);
35541 }
35542 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35543 {
35544 tree ref;
35545 tree array_elt;
35546 tree field;
35547 tree final;
35548
35549 unsigned int field_val = 0;
35550 unsigned int NUM_ISA_NAMES
35551 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35552
35553 for (i = 0; i < NUM_ISA_NAMES; i++)
35554 if (strcmp (isa_names_table[i].name,
35555 TREE_STRING_POINTER (param_string_cst)) == 0)
35556 break;
35557
35558 if (i == NUM_ISA_NAMES)
35559 {
35560 error ("Parameter to builtin not valid: %s",
35561 TREE_STRING_POINTER (param_string_cst));
35562 return integer_zero_node;
35563 }
35564
35565 field = TYPE_FIELDS (__processor_model_type);
35566 /* Get the last field, which is __cpu_features. */
35567 while (DECL_CHAIN (field))
35568 field = DECL_CHAIN (field);
35569
35570 /* Get the appropriate field: __cpu_model.__cpu_features */
35571 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35572 field, NULL_TREE);
35573
35574 /* Access the 0th element of __cpu_features array. */
35575 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35576 integer_zero_node, NULL_TREE, NULL_TREE);
35577
35578 field_val = (1 << isa_names_table[i].feature);
35579 /* Return __cpu_model.__cpu_features[0] & field_val */
35580 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35581 build_int_cstu (unsigned_type_node, field_val));
35582 return build1 (CONVERT_EXPR, integer_type_node, final);
35583 }
35584 gcc_unreachable ();
35585 }
35586
35587 static tree
35588 ix86_fold_builtin (tree fndecl, int n_args,
35589 tree *args, bool ignore ATTRIBUTE_UNUSED)
35590 {
35591 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35592 {
35593 enum ix86_builtins fn_code = (enum ix86_builtins)
35594 DECL_FUNCTION_CODE (fndecl);
35595 if (fn_code == IX86_BUILTIN_CPU_IS
35596 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35597 {
35598 gcc_assert (n_args == 1);
35599 return fold_builtin_cpu (fndecl, args);
35600 }
35601 }
35602
35603 #ifdef SUBTARGET_FOLD_BUILTIN
35604 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35605 #endif
35606
35607 return NULL_TREE;
35608 }
35609
35610 /* Make builtins to detect cpu type and features supported. NAME is
35611 the builtin name, CODE is the builtin code, and FTYPE is the function
35612 type of the builtin. */
35613
35614 static void
35615 make_cpu_type_builtin (const char* name, int code,
35616 enum ix86_builtin_func_type ftype, bool is_const)
35617 {
35618 tree decl;
35619 tree type;
35620
35621 type = ix86_get_builtin_func_type (ftype);
35622 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35623 NULL, NULL_TREE);
35624 gcc_assert (decl != NULL_TREE);
35625 ix86_builtins[(int) code] = decl;
35626 TREE_READONLY (decl) = is_const;
35627 }
35628
35629 /* Make builtins to get CPU type and features supported. The created
35630 builtins are :
35631
35632 __builtin_cpu_init (), to detect cpu type and features,
35633 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35634 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35635 */
35636
35637 static void
35638 ix86_init_platform_type_builtins (void)
35639 {
35640 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35641 INT_FTYPE_VOID, false);
35642 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35643 INT_FTYPE_PCCHAR, true);
35644 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35645 INT_FTYPE_PCCHAR, true);
35646 }
35647
35648 /* Internal method for ix86_init_builtins. */
35649
35650 static void
35651 ix86_init_builtins_va_builtins_abi (void)
35652 {
35653 tree ms_va_ref, sysv_va_ref;
35654 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35655 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35656 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35657 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35658
35659 if (!TARGET_64BIT)
35660 return;
35661 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35662 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35663 ms_va_ref = build_reference_type (ms_va_list_type_node);
35664 sysv_va_ref =
35665 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35666
35667 fnvoid_va_end_ms =
35668 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35669 fnvoid_va_start_ms =
35670 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35671 fnvoid_va_end_sysv =
35672 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35673 fnvoid_va_start_sysv =
35674 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35675 NULL_TREE);
35676 fnvoid_va_copy_ms =
35677 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35678 NULL_TREE);
35679 fnvoid_va_copy_sysv =
35680 build_function_type_list (void_type_node, sysv_va_ref,
35681 sysv_va_ref, NULL_TREE);
35682
35683 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35684 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35685 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35686 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35687 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35688 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35689 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35690 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35691 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35692 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35693 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35694 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35695 }
35696
35697 static void
35698 ix86_init_builtin_types (void)
35699 {
35700 tree float128_type_node, float80_type_node;
35701
35702 /* The __float80 type. */
35703 float80_type_node = long_double_type_node;
35704 if (TYPE_MODE (float80_type_node) != XFmode)
35705 {
35706 /* The __float80 type. */
35707 float80_type_node = make_node (REAL_TYPE);
35708
35709 TYPE_PRECISION (float80_type_node) = 80;
35710 layout_type (float80_type_node);
35711 }
35712 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35713
35714 /* The __float128 type. */
35715 float128_type_node = make_node (REAL_TYPE);
35716 TYPE_PRECISION (float128_type_node) = 128;
35717 layout_type (float128_type_node);
35718 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35719
35720 /* This macro is built by i386-builtin-types.awk. */
35721 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35722 }
35723
35724 static void
35725 ix86_init_builtins (void)
35726 {
35727 tree t;
35728
35729 ix86_init_builtin_types ();
35730
35731 /* Builtins to get CPU type and features. */
35732 ix86_init_platform_type_builtins ();
35733
35734 /* TFmode support builtins. */
35735 def_builtin_const (0, "__builtin_infq",
35736 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35737 def_builtin_const (0, "__builtin_huge_valq",
35738 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35739
35740 /* We will expand them to normal call if SSE isn't available since
35741 they are used by libgcc. */
35742 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35743 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35744 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35745 TREE_READONLY (t) = 1;
35746 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35747
35748 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35749 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35750 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35751 TREE_READONLY (t) = 1;
35752 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35753
35754 ix86_init_tm_builtins ();
35755 ix86_init_mmx_sse_builtins ();
35756 ix86_init_mpx_builtins ();
35757
35758 if (TARGET_LP64)
35759 ix86_init_builtins_va_builtins_abi ();
35760
35761 #ifdef SUBTARGET_INIT_BUILTINS
35762 SUBTARGET_INIT_BUILTINS;
35763 #endif
35764 }
35765
35766 /* Return the ix86 builtin for CODE. */
35767
35768 static tree
35769 ix86_builtin_decl (unsigned code, bool)
35770 {
35771 if (code >= IX86_BUILTIN_MAX)
35772 return error_mark_node;
35773
35774 return ix86_builtins[code];
35775 }
35776
35777 /* Errors in the source file can cause expand_expr to return const0_rtx
35778 where we expect a vector. To avoid crashing, use one of the vector
35779 clear instructions. */
35780 static rtx
35781 safe_vector_operand (rtx x, machine_mode mode)
35782 {
35783 if (x == const0_rtx)
35784 x = CONST0_RTX (mode);
35785 return x;
35786 }
35787
35788 /* Fixup modeless constants to fit required mode. */
35789 static rtx
35790 fixup_modeless_constant (rtx x, machine_mode mode)
35791 {
35792 if (GET_MODE (x) == VOIDmode)
35793 x = convert_to_mode (mode, x, 1);
35794 return x;
35795 }
35796
35797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35798
35799 static rtx
35800 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35801 {
35802 rtx pat;
35803 tree arg0 = CALL_EXPR_ARG (exp, 0);
35804 tree arg1 = CALL_EXPR_ARG (exp, 1);
35805 rtx op0 = expand_normal (arg0);
35806 rtx op1 = expand_normal (arg1);
35807 machine_mode tmode = insn_data[icode].operand[0].mode;
35808 machine_mode mode0 = insn_data[icode].operand[1].mode;
35809 machine_mode mode1 = insn_data[icode].operand[2].mode;
35810
35811 if (VECTOR_MODE_P (mode0))
35812 op0 = safe_vector_operand (op0, mode0);
35813 if (VECTOR_MODE_P (mode1))
35814 op1 = safe_vector_operand (op1, mode1);
35815
35816 if (optimize || !target
35817 || GET_MODE (target) != tmode
35818 || !insn_data[icode].operand[0].predicate (target, tmode))
35819 target = gen_reg_rtx (tmode);
35820
35821 if (GET_MODE (op1) == SImode && mode1 == TImode)
35822 {
35823 rtx x = gen_reg_rtx (V4SImode);
35824 emit_insn (gen_sse2_loadd (x, op1));
35825 op1 = gen_lowpart (TImode, x);
35826 }
35827
35828 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35829 op0 = copy_to_mode_reg (mode0, op0);
35830 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35831 op1 = copy_to_mode_reg (mode1, op1);
35832
35833 pat = GEN_FCN (icode) (target, op0, op1);
35834 if (! pat)
35835 return 0;
35836
35837 emit_insn (pat);
35838
35839 return target;
35840 }
35841
35842 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35843
35844 static rtx
35845 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35846 enum ix86_builtin_func_type m_type,
35847 enum rtx_code sub_code)
35848 {
35849 rtx pat;
35850 int i;
35851 int nargs;
35852 bool comparison_p = false;
35853 bool tf_p = false;
35854 bool last_arg_constant = false;
35855 int num_memory = 0;
35856 struct {
35857 rtx op;
35858 machine_mode mode;
35859 } args[4];
35860
35861 machine_mode tmode = insn_data[icode].operand[0].mode;
35862
35863 switch (m_type)
35864 {
35865 case MULTI_ARG_4_DF2_DI_I:
35866 case MULTI_ARG_4_DF2_DI_I1:
35867 case MULTI_ARG_4_SF2_SI_I:
35868 case MULTI_ARG_4_SF2_SI_I1:
35869 nargs = 4;
35870 last_arg_constant = true;
35871 break;
35872
35873 case MULTI_ARG_3_SF:
35874 case MULTI_ARG_3_DF:
35875 case MULTI_ARG_3_SF2:
35876 case MULTI_ARG_3_DF2:
35877 case MULTI_ARG_3_DI:
35878 case MULTI_ARG_3_SI:
35879 case MULTI_ARG_3_SI_DI:
35880 case MULTI_ARG_3_HI:
35881 case MULTI_ARG_3_HI_SI:
35882 case MULTI_ARG_3_QI:
35883 case MULTI_ARG_3_DI2:
35884 case MULTI_ARG_3_SI2:
35885 case MULTI_ARG_3_HI2:
35886 case MULTI_ARG_3_QI2:
35887 nargs = 3;
35888 break;
35889
35890 case MULTI_ARG_2_SF:
35891 case MULTI_ARG_2_DF:
35892 case MULTI_ARG_2_DI:
35893 case MULTI_ARG_2_SI:
35894 case MULTI_ARG_2_HI:
35895 case MULTI_ARG_2_QI:
35896 nargs = 2;
35897 break;
35898
35899 case MULTI_ARG_2_DI_IMM:
35900 case MULTI_ARG_2_SI_IMM:
35901 case MULTI_ARG_2_HI_IMM:
35902 case MULTI_ARG_2_QI_IMM:
35903 nargs = 2;
35904 last_arg_constant = true;
35905 break;
35906
35907 case MULTI_ARG_1_SF:
35908 case MULTI_ARG_1_DF:
35909 case MULTI_ARG_1_SF2:
35910 case MULTI_ARG_1_DF2:
35911 case MULTI_ARG_1_DI:
35912 case MULTI_ARG_1_SI:
35913 case MULTI_ARG_1_HI:
35914 case MULTI_ARG_1_QI:
35915 case MULTI_ARG_1_SI_DI:
35916 case MULTI_ARG_1_HI_DI:
35917 case MULTI_ARG_1_HI_SI:
35918 case MULTI_ARG_1_QI_DI:
35919 case MULTI_ARG_1_QI_SI:
35920 case MULTI_ARG_1_QI_HI:
35921 nargs = 1;
35922 break;
35923
35924 case MULTI_ARG_2_DI_CMP:
35925 case MULTI_ARG_2_SI_CMP:
35926 case MULTI_ARG_2_HI_CMP:
35927 case MULTI_ARG_2_QI_CMP:
35928 nargs = 2;
35929 comparison_p = true;
35930 break;
35931
35932 case MULTI_ARG_2_SF_TF:
35933 case MULTI_ARG_2_DF_TF:
35934 case MULTI_ARG_2_DI_TF:
35935 case MULTI_ARG_2_SI_TF:
35936 case MULTI_ARG_2_HI_TF:
35937 case MULTI_ARG_2_QI_TF:
35938 nargs = 2;
35939 tf_p = true;
35940 break;
35941
35942 default:
35943 gcc_unreachable ();
35944 }
35945
35946 if (optimize || !target
35947 || GET_MODE (target) != tmode
35948 || !insn_data[icode].operand[0].predicate (target, tmode))
35949 target = gen_reg_rtx (tmode);
35950
35951 gcc_assert (nargs <= 4);
35952
35953 for (i = 0; i < nargs; i++)
35954 {
35955 tree arg = CALL_EXPR_ARG (exp, i);
35956 rtx op = expand_normal (arg);
35957 int adjust = (comparison_p) ? 1 : 0;
35958 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35959
35960 if (last_arg_constant && i == nargs - 1)
35961 {
35962 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35963 {
35964 enum insn_code new_icode = icode;
35965 switch (icode)
35966 {
35967 case CODE_FOR_xop_vpermil2v2df3:
35968 case CODE_FOR_xop_vpermil2v4sf3:
35969 case CODE_FOR_xop_vpermil2v4df3:
35970 case CODE_FOR_xop_vpermil2v8sf3:
35971 error ("the last argument must be a 2-bit immediate");
35972 return gen_reg_rtx (tmode);
35973 case CODE_FOR_xop_rotlv2di3:
35974 new_icode = CODE_FOR_rotlv2di3;
35975 goto xop_rotl;
35976 case CODE_FOR_xop_rotlv4si3:
35977 new_icode = CODE_FOR_rotlv4si3;
35978 goto xop_rotl;
35979 case CODE_FOR_xop_rotlv8hi3:
35980 new_icode = CODE_FOR_rotlv8hi3;
35981 goto xop_rotl;
35982 case CODE_FOR_xop_rotlv16qi3:
35983 new_icode = CODE_FOR_rotlv16qi3;
35984 xop_rotl:
35985 if (CONST_INT_P (op))
35986 {
35987 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35988 op = GEN_INT (INTVAL (op) & mask);
35989 gcc_checking_assert
35990 (insn_data[icode].operand[i + 1].predicate (op, mode));
35991 }
35992 else
35993 {
35994 gcc_checking_assert
35995 (nargs == 2
35996 && insn_data[new_icode].operand[0].mode == tmode
35997 && insn_data[new_icode].operand[1].mode == tmode
35998 && insn_data[new_icode].operand[2].mode == mode
35999 && insn_data[new_icode].operand[0].predicate
36000 == insn_data[icode].operand[0].predicate
36001 && insn_data[new_icode].operand[1].predicate
36002 == insn_data[icode].operand[1].predicate);
36003 icode = new_icode;
36004 goto non_constant;
36005 }
36006 break;
36007 default:
36008 gcc_unreachable ();
36009 }
36010 }
36011 }
36012 else
36013 {
36014 non_constant:
36015 if (VECTOR_MODE_P (mode))
36016 op = safe_vector_operand (op, mode);
36017
36018 /* If we aren't optimizing, only allow one memory operand to be
36019 generated. */
36020 if (memory_operand (op, mode))
36021 num_memory++;
36022
36023 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36024
36025 if (optimize
36026 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36027 || num_memory > 1)
36028 op = force_reg (mode, op);
36029 }
36030
36031 args[i].op = op;
36032 args[i].mode = mode;
36033 }
36034
36035 switch (nargs)
36036 {
36037 case 1:
36038 pat = GEN_FCN (icode) (target, args[0].op);
36039 break;
36040
36041 case 2:
36042 if (tf_p)
36043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36044 GEN_INT ((int)sub_code));
36045 else if (! comparison_p)
36046 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36047 else
36048 {
36049 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36050 args[0].op,
36051 args[1].op);
36052
36053 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36054 }
36055 break;
36056
36057 case 3:
36058 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36059 break;
36060
36061 case 4:
36062 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36063 break;
36064
36065 default:
36066 gcc_unreachable ();
36067 }
36068
36069 if (! pat)
36070 return 0;
36071
36072 emit_insn (pat);
36073 return target;
36074 }
36075
36076 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36077 insns with vec_merge. */
36078
36079 static rtx
36080 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36081 rtx target)
36082 {
36083 rtx pat;
36084 tree arg0 = CALL_EXPR_ARG (exp, 0);
36085 rtx op1, op0 = expand_normal (arg0);
36086 machine_mode tmode = insn_data[icode].operand[0].mode;
36087 machine_mode mode0 = insn_data[icode].operand[1].mode;
36088
36089 if (optimize || !target
36090 || GET_MODE (target) != tmode
36091 || !insn_data[icode].operand[0].predicate (target, tmode))
36092 target = gen_reg_rtx (tmode);
36093
36094 if (VECTOR_MODE_P (mode0))
36095 op0 = safe_vector_operand (op0, mode0);
36096
36097 if ((optimize && !register_operand (op0, mode0))
36098 || !insn_data[icode].operand[1].predicate (op0, mode0))
36099 op0 = copy_to_mode_reg (mode0, op0);
36100
36101 op1 = op0;
36102 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36103 op1 = copy_to_mode_reg (mode0, op1);
36104
36105 pat = GEN_FCN (icode) (target, op0, op1);
36106 if (! pat)
36107 return 0;
36108 emit_insn (pat);
36109 return target;
36110 }
36111
36112 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36113
36114 static rtx
36115 ix86_expand_sse_compare (const struct builtin_description *d,
36116 tree exp, rtx target, bool swap)
36117 {
36118 rtx pat;
36119 tree arg0 = CALL_EXPR_ARG (exp, 0);
36120 tree arg1 = CALL_EXPR_ARG (exp, 1);
36121 rtx op0 = expand_normal (arg0);
36122 rtx op1 = expand_normal (arg1);
36123 rtx op2;
36124 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36125 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36126 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36127 enum rtx_code comparison = d->comparison;
36128
36129 if (VECTOR_MODE_P (mode0))
36130 op0 = safe_vector_operand (op0, mode0);
36131 if (VECTOR_MODE_P (mode1))
36132 op1 = safe_vector_operand (op1, mode1);
36133
36134 /* Swap operands if we have a comparison that isn't available in
36135 hardware. */
36136 if (swap)
36137 std::swap (op0, op1);
36138
36139 if (optimize || !target
36140 || GET_MODE (target) != tmode
36141 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36142 target = gen_reg_rtx (tmode);
36143
36144 if ((optimize && !register_operand (op0, mode0))
36145 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36146 op0 = copy_to_mode_reg (mode0, op0);
36147 if ((optimize && !register_operand (op1, mode1))
36148 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36149 op1 = copy_to_mode_reg (mode1, op1);
36150
36151 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36152 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36153 if (! pat)
36154 return 0;
36155 emit_insn (pat);
36156 return target;
36157 }
36158
36159 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36160
36161 static rtx
36162 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36163 rtx target)
36164 {
36165 rtx pat;
36166 tree arg0 = CALL_EXPR_ARG (exp, 0);
36167 tree arg1 = CALL_EXPR_ARG (exp, 1);
36168 rtx op0 = expand_normal (arg0);
36169 rtx op1 = expand_normal (arg1);
36170 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36171 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36172 enum rtx_code comparison = d->comparison;
36173
36174 if (VECTOR_MODE_P (mode0))
36175 op0 = safe_vector_operand (op0, mode0);
36176 if (VECTOR_MODE_P (mode1))
36177 op1 = safe_vector_operand (op1, mode1);
36178
36179 /* Swap operands if we have a comparison that isn't available in
36180 hardware. */
36181 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36182 std::swap (op0, op1);
36183
36184 target = gen_reg_rtx (SImode);
36185 emit_move_insn (target, const0_rtx);
36186 target = gen_rtx_SUBREG (QImode, target, 0);
36187
36188 if ((optimize && !register_operand (op0, mode0))
36189 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36190 op0 = copy_to_mode_reg (mode0, op0);
36191 if ((optimize && !register_operand (op1, mode1))
36192 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36193 op1 = copy_to_mode_reg (mode1, op1);
36194
36195 pat = GEN_FCN (d->icode) (op0, op1);
36196 if (! pat)
36197 return 0;
36198 emit_insn (pat);
36199 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36200 gen_rtx_fmt_ee (comparison, QImode,
36201 SET_DEST (pat),
36202 const0_rtx)));
36203
36204 return SUBREG_REG (target);
36205 }
36206
36207 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36208
36209 static rtx
36210 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36211 rtx target)
36212 {
36213 rtx pat;
36214 tree arg0 = CALL_EXPR_ARG (exp, 0);
36215 rtx op1, op0 = expand_normal (arg0);
36216 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36217 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36218
36219 if (optimize || target == 0
36220 || GET_MODE (target) != tmode
36221 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36222 target = gen_reg_rtx (tmode);
36223
36224 if (VECTOR_MODE_P (mode0))
36225 op0 = safe_vector_operand (op0, mode0);
36226
36227 if ((optimize && !register_operand (op0, mode0))
36228 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36229 op0 = copy_to_mode_reg (mode0, op0);
36230
36231 op1 = GEN_INT (d->comparison);
36232
36233 pat = GEN_FCN (d->icode) (target, op0, op1);
36234 if (! pat)
36235 return 0;
36236 emit_insn (pat);
36237 return target;
36238 }
36239
36240 static rtx
36241 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36242 tree exp, rtx target)
36243 {
36244 rtx pat;
36245 tree arg0 = CALL_EXPR_ARG (exp, 0);
36246 tree arg1 = CALL_EXPR_ARG (exp, 1);
36247 rtx op0 = expand_normal (arg0);
36248 rtx op1 = expand_normal (arg1);
36249 rtx op2;
36250 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36251 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36252 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36253
36254 if (optimize || target == 0
36255 || GET_MODE (target) != tmode
36256 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36257 target = gen_reg_rtx (tmode);
36258
36259 op0 = safe_vector_operand (op0, mode0);
36260 op1 = safe_vector_operand (op1, mode1);
36261
36262 if ((optimize && !register_operand (op0, mode0))
36263 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36264 op0 = copy_to_mode_reg (mode0, op0);
36265 if ((optimize && !register_operand (op1, mode1))
36266 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36267 op1 = copy_to_mode_reg (mode1, op1);
36268
36269 op2 = GEN_INT (d->comparison);
36270
36271 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36272 if (! pat)
36273 return 0;
36274 emit_insn (pat);
36275 return target;
36276 }
36277
36278 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36279
36280 static rtx
36281 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36282 rtx target)
36283 {
36284 rtx pat;
36285 tree arg0 = CALL_EXPR_ARG (exp, 0);
36286 tree arg1 = CALL_EXPR_ARG (exp, 1);
36287 rtx op0 = expand_normal (arg0);
36288 rtx op1 = expand_normal (arg1);
36289 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36290 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36291 enum rtx_code comparison = d->comparison;
36292
36293 if (VECTOR_MODE_P (mode0))
36294 op0 = safe_vector_operand (op0, mode0);
36295 if (VECTOR_MODE_P (mode1))
36296 op1 = safe_vector_operand (op1, mode1);
36297
36298 target = gen_reg_rtx (SImode);
36299 emit_move_insn (target, const0_rtx);
36300 target = gen_rtx_SUBREG (QImode, target, 0);
36301
36302 if ((optimize && !register_operand (op0, mode0))
36303 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36304 op0 = copy_to_mode_reg (mode0, op0);
36305 if ((optimize && !register_operand (op1, mode1))
36306 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36307 op1 = copy_to_mode_reg (mode1, op1);
36308
36309 pat = GEN_FCN (d->icode) (op0, op1);
36310 if (! pat)
36311 return 0;
36312 emit_insn (pat);
36313 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36314 gen_rtx_fmt_ee (comparison, QImode,
36315 SET_DEST (pat),
36316 const0_rtx)));
36317
36318 return SUBREG_REG (target);
36319 }
36320
36321 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36322
36323 static rtx
36324 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36325 tree exp, rtx target)
36326 {
36327 rtx pat;
36328 tree arg0 = CALL_EXPR_ARG (exp, 0);
36329 tree arg1 = CALL_EXPR_ARG (exp, 1);
36330 tree arg2 = CALL_EXPR_ARG (exp, 2);
36331 tree arg3 = CALL_EXPR_ARG (exp, 3);
36332 tree arg4 = CALL_EXPR_ARG (exp, 4);
36333 rtx scratch0, scratch1;
36334 rtx op0 = expand_normal (arg0);
36335 rtx op1 = expand_normal (arg1);
36336 rtx op2 = expand_normal (arg2);
36337 rtx op3 = expand_normal (arg3);
36338 rtx op4 = expand_normal (arg4);
36339 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36340
36341 tmode0 = insn_data[d->icode].operand[0].mode;
36342 tmode1 = insn_data[d->icode].operand[1].mode;
36343 modev2 = insn_data[d->icode].operand[2].mode;
36344 modei3 = insn_data[d->icode].operand[3].mode;
36345 modev4 = insn_data[d->icode].operand[4].mode;
36346 modei5 = insn_data[d->icode].operand[5].mode;
36347 modeimm = insn_data[d->icode].operand[6].mode;
36348
36349 if (VECTOR_MODE_P (modev2))
36350 op0 = safe_vector_operand (op0, modev2);
36351 if (VECTOR_MODE_P (modev4))
36352 op2 = safe_vector_operand (op2, modev4);
36353
36354 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36355 op0 = copy_to_mode_reg (modev2, op0);
36356 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36357 op1 = copy_to_mode_reg (modei3, op1);
36358 if ((optimize && !register_operand (op2, modev4))
36359 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36360 op2 = copy_to_mode_reg (modev4, op2);
36361 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36362 op3 = copy_to_mode_reg (modei5, op3);
36363
36364 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36365 {
36366 error ("the fifth argument must be an 8-bit immediate");
36367 return const0_rtx;
36368 }
36369
36370 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36371 {
36372 if (optimize || !target
36373 || GET_MODE (target) != tmode0
36374 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36375 target = gen_reg_rtx (tmode0);
36376
36377 scratch1 = gen_reg_rtx (tmode1);
36378
36379 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36380 }
36381 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36382 {
36383 if (optimize || !target
36384 || GET_MODE (target) != tmode1
36385 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36386 target = gen_reg_rtx (tmode1);
36387
36388 scratch0 = gen_reg_rtx (tmode0);
36389
36390 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36391 }
36392 else
36393 {
36394 gcc_assert (d->flag);
36395
36396 scratch0 = gen_reg_rtx (tmode0);
36397 scratch1 = gen_reg_rtx (tmode1);
36398
36399 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36400 }
36401
36402 if (! pat)
36403 return 0;
36404
36405 emit_insn (pat);
36406
36407 if (d->flag)
36408 {
36409 target = gen_reg_rtx (SImode);
36410 emit_move_insn (target, const0_rtx);
36411 target = gen_rtx_SUBREG (QImode, target, 0);
36412
36413 emit_insn
36414 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36415 gen_rtx_fmt_ee (EQ, QImode,
36416 gen_rtx_REG ((machine_mode) d->flag,
36417 FLAGS_REG),
36418 const0_rtx)));
36419 return SUBREG_REG (target);
36420 }
36421 else
36422 return target;
36423 }
36424
36425
36426 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36427
36428 static rtx
36429 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36430 tree exp, rtx target)
36431 {
36432 rtx pat;
36433 tree arg0 = CALL_EXPR_ARG (exp, 0);
36434 tree arg1 = CALL_EXPR_ARG (exp, 1);
36435 tree arg2 = CALL_EXPR_ARG (exp, 2);
36436 rtx scratch0, scratch1;
36437 rtx op0 = expand_normal (arg0);
36438 rtx op1 = expand_normal (arg1);
36439 rtx op2 = expand_normal (arg2);
36440 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36441
36442 tmode0 = insn_data[d->icode].operand[0].mode;
36443 tmode1 = insn_data[d->icode].operand[1].mode;
36444 modev2 = insn_data[d->icode].operand[2].mode;
36445 modev3 = insn_data[d->icode].operand[3].mode;
36446 modeimm = insn_data[d->icode].operand[4].mode;
36447
36448 if (VECTOR_MODE_P (modev2))
36449 op0 = safe_vector_operand (op0, modev2);
36450 if (VECTOR_MODE_P (modev3))
36451 op1 = safe_vector_operand (op1, modev3);
36452
36453 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36454 op0 = copy_to_mode_reg (modev2, op0);
36455 if ((optimize && !register_operand (op1, modev3))
36456 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36457 op1 = copy_to_mode_reg (modev3, op1);
36458
36459 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36460 {
36461 error ("the third argument must be an 8-bit immediate");
36462 return const0_rtx;
36463 }
36464
36465 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36466 {
36467 if (optimize || !target
36468 || GET_MODE (target) != tmode0
36469 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36470 target = gen_reg_rtx (tmode0);
36471
36472 scratch1 = gen_reg_rtx (tmode1);
36473
36474 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36475 }
36476 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36477 {
36478 if (optimize || !target
36479 || GET_MODE (target) != tmode1
36480 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36481 target = gen_reg_rtx (tmode1);
36482
36483 scratch0 = gen_reg_rtx (tmode0);
36484
36485 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36486 }
36487 else
36488 {
36489 gcc_assert (d->flag);
36490
36491 scratch0 = gen_reg_rtx (tmode0);
36492 scratch1 = gen_reg_rtx (tmode1);
36493
36494 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36495 }
36496
36497 if (! pat)
36498 return 0;
36499
36500 emit_insn (pat);
36501
36502 if (d->flag)
36503 {
36504 target = gen_reg_rtx (SImode);
36505 emit_move_insn (target, const0_rtx);
36506 target = gen_rtx_SUBREG (QImode, target, 0);
36507
36508 emit_insn
36509 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36510 gen_rtx_fmt_ee (EQ, QImode,
36511 gen_rtx_REG ((machine_mode) d->flag,
36512 FLAGS_REG),
36513 const0_rtx)));
36514 return SUBREG_REG (target);
36515 }
36516 else
36517 return target;
36518 }
36519
36520 /* Subroutine of ix86_expand_builtin to take care of insns with
36521 variable number of operands. */
36522
36523 static rtx
36524 ix86_expand_args_builtin (const struct builtin_description *d,
36525 tree exp, rtx target)
36526 {
36527 rtx pat, real_target;
36528 unsigned int i, nargs;
36529 unsigned int nargs_constant = 0;
36530 unsigned int mask_pos = 0;
36531 int num_memory = 0;
36532 struct
36533 {
36534 rtx op;
36535 machine_mode mode;
36536 } args[6];
36537 bool last_arg_count = false;
36538 enum insn_code icode = d->icode;
36539 const struct insn_data_d *insn_p = &insn_data[icode];
36540 machine_mode tmode = insn_p->operand[0].mode;
36541 machine_mode rmode = VOIDmode;
36542 bool swap = false;
36543 enum rtx_code comparison = d->comparison;
36544
36545 switch ((enum ix86_builtin_func_type) d->flag)
36546 {
36547 case V2DF_FTYPE_V2DF_ROUND:
36548 case V4DF_FTYPE_V4DF_ROUND:
36549 case V4SF_FTYPE_V4SF_ROUND:
36550 case V8SF_FTYPE_V8SF_ROUND:
36551 case V4SI_FTYPE_V4SF_ROUND:
36552 case V8SI_FTYPE_V8SF_ROUND:
36553 return ix86_expand_sse_round (d, exp, target);
36554 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36555 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36556 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36557 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36558 case INT_FTYPE_V8SF_V8SF_PTEST:
36559 case INT_FTYPE_V4DI_V4DI_PTEST:
36560 case INT_FTYPE_V4DF_V4DF_PTEST:
36561 case INT_FTYPE_V4SF_V4SF_PTEST:
36562 case INT_FTYPE_V2DI_V2DI_PTEST:
36563 case INT_FTYPE_V2DF_V2DF_PTEST:
36564 return ix86_expand_sse_ptest (d, exp, target);
36565 case FLOAT128_FTYPE_FLOAT128:
36566 case FLOAT_FTYPE_FLOAT:
36567 case INT_FTYPE_INT:
36568 case UINT64_FTYPE_INT:
36569 case UINT16_FTYPE_UINT16:
36570 case INT64_FTYPE_INT64:
36571 case INT64_FTYPE_V4SF:
36572 case INT64_FTYPE_V2DF:
36573 case INT_FTYPE_V16QI:
36574 case INT_FTYPE_V8QI:
36575 case INT_FTYPE_V8SF:
36576 case INT_FTYPE_V4DF:
36577 case INT_FTYPE_V4SF:
36578 case INT_FTYPE_V2DF:
36579 case INT_FTYPE_V32QI:
36580 case V16QI_FTYPE_V16QI:
36581 case V8SI_FTYPE_V8SF:
36582 case V8SI_FTYPE_V4SI:
36583 case V8HI_FTYPE_V8HI:
36584 case V8HI_FTYPE_V16QI:
36585 case V8QI_FTYPE_V8QI:
36586 case V8SF_FTYPE_V8SF:
36587 case V8SF_FTYPE_V8SI:
36588 case V8SF_FTYPE_V4SF:
36589 case V8SF_FTYPE_V8HI:
36590 case V4SI_FTYPE_V4SI:
36591 case V4SI_FTYPE_V16QI:
36592 case V4SI_FTYPE_V4SF:
36593 case V4SI_FTYPE_V8SI:
36594 case V4SI_FTYPE_V8HI:
36595 case V4SI_FTYPE_V4DF:
36596 case V4SI_FTYPE_V2DF:
36597 case V4HI_FTYPE_V4HI:
36598 case V4DF_FTYPE_V4DF:
36599 case V4DF_FTYPE_V4SI:
36600 case V4DF_FTYPE_V4SF:
36601 case V4DF_FTYPE_V2DF:
36602 case V4SF_FTYPE_V4SF:
36603 case V4SF_FTYPE_V4SI:
36604 case V4SF_FTYPE_V8SF:
36605 case V4SF_FTYPE_V4DF:
36606 case V4SF_FTYPE_V8HI:
36607 case V4SF_FTYPE_V2DF:
36608 case V2DI_FTYPE_V2DI:
36609 case V2DI_FTYPE_V16QI:
36610 case V2DI_FTYPE_V8HI:
36611 case V2DI_FTYPE_V4SI:
36612 case V2DF_FTYPE_V2DF:
36613 case V2DF_FTYPE_V4SI:
36614 case V2DF_FTYPE_V4DF:
36615 case V2DF_FTYPE_V4SF:
36616 case V2DF_FTYPE_V2SI:
36617 case V2SI_FTYPE_V2SI:
36618 case V2SI_FTYPE_V4SF:
36619 case V2SI_FTYPE_V2SF:
36620 case V2SI_FTYPE_V2DF:
36621 case V2SF_FTYPE_V2SF:
36622 case V2SF_FTYPE_V2SI:
36623 case V32QI_FTYPE_V32QI:
36624 case V32QI_FTYPE_V16QI:
36625 case V16HI_FTYPE_V16HI:
36626 case V16HI_FTYPE_V8HI:
36627 case V8SI_FTYPE_V8SI:
36628 case V16HI_FTYPE_V16QI:
36629 case V8SI_FTYPE_V16QI:
36630 case V4DI_FTYPE_V16QI:
36631 case V8SI_FTYPE_V8HI:
36632 case V4DI_FTYPE_V8HI:
36633 case V4DI_FTYPE_V4SI:
36634 case V4DI_FTYPE_V2DI:
36635 case HI_FTYPE_HI:
36636 case HI_FTYPE_V16QI:
36637 case SI_FTYPE_V32QI:
36638 case DI_FTYPE_V64QI:
36639 case V16QI_FTYPE_HI:
36640 case V32QI_FTYPE_SI:
36641 case V64QI_FTYPE_DI:
36642 case V8HI_FTYPE_QI:
36643 case V16HI_FTYPE_HI:
36644 case V32HI_FTYPE_SI:
36645 case V4SI_FTYPE_QI:
36646 case V8SI_FTYPE_QI:
36647 case V4SI_FTYPE_HI:
36648 case V8SI_FTYPE_HI:
36649 case QI_FTYPE_V8HI:
36650 case HI_FTYPE_V16HI:
36651 case SI_FTYPE_V32HI:
36652 case QI_FTYPE_V4SI:
36653 case QI_FTYPE_V8SI:
36654 case HI_FTYPE_V16SI:
36655 case QI_FTYPE_V2DI:
36656 case QI_FTYPE_V4DI:
36657 case QI_FTYPE_V8DI:
36658 case UINT_FTYPE_V2DF:
36659 case UINT_FTYPE_V4SF:
36660 case UINT64_FTYPE_V2DF:
36661 case UINT64_FTYPE_V4SF:
36662 case V16QI_FTYPE_V8DI:
36663 case V16HI_FTYPE_V16SI:
36664 case V16SI_FTYPE_HI:
36665 case V2DI_FTYPE_QI:
36666 case V4DI_FTYPE_QI:
36667 case V16SI_FTYPE_V16SI:
36668 case V16SI_FTYPE_INT:
36669 case V16SF_FTYPE_FLOAT:
36670 case V16SF_FTYPE_V8SF:
36671 case V16SI_FTYPE_V8SI:
36672 case V16SF_FTYPE_V4SF:
36673 case V16SI_FTYPE_V4SI:
36674 case V16SF_FTYPE_V16SF:
36675 case V8HI_FTYPE_V8DI:
36676 case V8UHI_FTYPE_V8UHI:
36677 case V8SI_FTYPE_V8DI:
36678 case V8SF_FTYPE_V8DF:
36679 case V8DI_FTYPE_QI:
36680 case V8DI_FTYPE_INT64:
36681 case V8DI_FTYPE_V4DI:
36682 case V8DI_FTYPE_V8DI:
36683 case V8DF_FTYPE_DOUBLE:
36684 case V8DF_FTYPE_V4DF:
36685 case V8DF_FTYPE_V2DF:
36686 case V8DF_FTYPE_V8DF:
36687 case V8DF_FTYPE_V8SI:
36688 nargs = 1;
36689 break;
36690 case V4SF_FTYPE_V4SF_VEC_MERGE:
36691 case V2DF_FTYPE_V2DF_VEC_MERGE:
36692 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36693 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36694 case V16QI_FTYPE_V16QI_V16QI:
36695 case V16QI_FTYPE_V8HI_V8HI:
36696 case V16SI_FTYPE_V16SI_V16SI:
36697 case V16SF_FTYPE_V16SF_V16SF:
36698 case V16SF_FTYPE_V16SF_V16SI:
36699 case V8QI_FTYPE_V8QI_V8QI:
36700 case V8QI_FTYPE_V4HI_V4HI:
36701 case V8HI_FTYPE_V8HI_V8HI:
36702 case V8HI_FTYPE_V16QI_V16QI:
36703 case V8HI_FTYPE_V4SI_V4SI:
36704 case V8SF_FTYPE_V8SF_V8SF:
36705 case V8SF_FTYPE_V8SF_V8SI:
36706 case V8DI_FTYPE_V8DI_V8DI:
36707 case V8DF_FTYPE_V8DF_V8DF:
36708 case V8DF_FTYPE_V8DF_V8DI:
36709 case V4SI_FTYPE_V4SI_V4SI:
36710 case V4SI_FTYPE_V8HI_V8HI:
36711 case V4SI_FTYPE_V4SF_V4SF:
36712 case V4SI_FTYPE_V2DF_V2DF:
36713 case V4HI_FTYPE_V4HI_V4HI:
36714 case V4HI_FTYPE_V8QI_V8QI:
36715 case V4HI_FTYPE_V2SI_V2SI:
36716 case V4DF_FTYPE_V4DF_V4DF:
36717 case V4DF_FTYPE_V4DF_V4DI:
36718 case V4SF_FTYPE_V4SF_V4SF:
36719 case V4SF_FTYPE_V4SF_V4SI:
36720 case V4SF_FTYPE_V4SF_V2SI:
36721 case V4SF_FTYPE_V4SF_V2DF:
36722 case V4SF_FTYPE_V4SF_UINT:
36723 case V4SF_FTYPE_V4SF_UINT64:
36724 case V4SF_FTYPE_V4SF_DI:
36725 case V4SF_FTYPE_V4SF_SI:
36726 case V2DI_FTYPE_V2DI_V2DI:
36727 case V2DI_FTYPE_V16QI_V16QI:
36728 case V2DI_FTYPE_V4SI_V4SI:
36729 case V2UDI_FTYPE_V4USI_V4USI:
36730 case V2DI_FTYPE_V2DI_V16QI:
36731 case V2DI_FTYPE_V2DF_V2DF:
36732 case V2SI_FTYPE_V2SI_V2SI:
36733 case V2SI_FTYPE_V4HI_V4HI:
36734 case V2SI_FTYPE_V2SF_V2SF:
36735 case V2DF_FTYPE_V2DF_V2DF:
36736 case V2DF_FTYPE_V2DF_V4SF:
36737 case V2DF_FTYPE_V2DF_V2DI:
36738 case V2DF_FTYPE_V2DF_DI:
36739 case V2DF_FTYPE_V2DF_SI:
36740 case V2DF_FTYPE_V2DF_UINT:
36741 case V2DF_FTYPE_V2DF_UINT64:
36742 case V2SF_FTYPE_V2SF_V2SF:
36743 case V1DI_FTYPE_V1DI_V1DI:
36744 case V1DI_FTYPE_V8QI_V8QI:
36745 case V1DI_FTYPE_V2SI_V2SI:
36746 case V32QI_FTYPE_V16HI_V16HI:
36747 case V16HI_FTYPE_V8SI_V8SI:
36748 case V32QI_FTYPE_V32QI_V32QI:
36749 case V16HI_FTYPE_V32QI_V32QI:
36750 case V16HI_FTYPE_V16HI_V16HI:
36751 case V8SI_FTYPE_V4DF_V4DF:
36752 case V8SI_FTYPE_V8SI_V8SI:
36753 case V8SI_FTYPE_V16HI_V16HI:
36754 case V4DI_FTYPE_V4DI_V4DI:
36755 case V4DI_FTYPE_V8SI_V8SI:
36756 case V4UDI_FTYPE_V8USI_V8USI:
36757 case QI_FTYPE_V8DI_V8DI:
36758 case V8DI_FTYPE_V64QI_V64QI:
36759 case HI_FTYPE_V16SI_V16SI:
36760 if (comparison == UNKNOWN)
36761 return ix86_expand_binop_builtin (icode, exp, target);
36762 nargs = 2;
36763 break;
36764 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36765 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36766 gcc_assert (comparison != UNKNOWN);
36767 nargs = 2;
36768 swap = true;
36769 break;
36770 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36771 case V16HI_FTYPE_V16HI_SI_COUNT:
36772 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36773 case V8SI_FTYPE_V8SI_SI_COUNT:
36774 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36775 case V4DI_FTYPE_V4DI_INT_COUNT:
36776 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36777 case V8HI_FTYPE_V8HI_SI_COUNT:
36778 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36779 case V4SI_FTYPE_V4SI_SI_COUNT:
36780 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36781 case V4HI_FTYPE_V4HI_SI_COUNT:
36782 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36783 case V2DI_FTYPE_V2DI_SI_COUNT:
36784 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36785 case V2SI_FTYPE_V2SI_SI_COUNT:
36786 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36787 case V1DI_FTYPE_V1DI_SI_COUNT:
36788 nargs = 2;
36789 last_arg_count = true;
36790 break;
36791 case UINT64_FTYPE_UINT64_UINT64:
36792 case UINT_FTYPE_UINT_UINT:
36793 case UINT_FTYPE_UINT_USHORT:
36794 case UINT_FTYPE_UINT_UCHAR:
36795 case UINT16_FTYPE_UINT16_INT:
36796 case UINT8_FTYPE_UINT8_INT:
36797 case HI_FTYPE_HI_HI:
36798 case SI_FTYPE_SI_SI:
36799 case DI_FTYPE_DI_DI:
36800 case V16SI_FTYPE_V8DF_V8DF:
36801 nargs = 2;
36802 break;
36803 case V2DI_FTYPE_V2DI_INT_CONVERT:
36804 nargs = 2;
36805 rmode = V1TImode;
36806 nargs_constant = 1;
36807 break;
36808 case V4DI_FTYPE_V4DI_INT_CONVERT:
36809 nargs = 2;
36810 rmode = V2TImode;
36811 nargs_constant = 1;
36812 break;
36813 case V8DI_FTYPE_V8DI_INT_CONVERT:
36814 nargs = 2;
36815 rmode = V4TImode;
36816 nargs_constant = 1;
36817 break;
36818 case V8HI_FTYPE_V8HI_INT:
36819 case V8HI_FTYPE_V8SF_INT:
36820 case V16HI_FTYPE_V16SF_INT:
36821 case V8HI_FTYPE_V4SF_INT:
36822 case V8SF_FTYPE_V8SF_INT:
36823 case V4SF_FTYPE_V16SF_INT:
36824 case V16SF_FTYPE_V16SF_INT:
36825 case V4SI_FTYPE_V4SI_INT:
36826 case V4SI_FTYPE_V8SI_INT:
36827 case V4HI_FTYPE_V4HI_INT:
36828 case V4DF_FTYPE_V4DF_INT:
36829 case V4DF_FTYPE_V8DF_INT:
36830 case V4SF_FTYPE_V4SF_INT:
36831 case V4SF_FTYPE_V8SF_INT:
36832 case V2DI_FTYPE_V2DI_INT:
36833 case V2DF_FTYPE_V2DF_INT:
36834 case V2DF_FTYPE_V4DF_INT:
36835 case V16HI_FTYPE_V16HI_INT:
36836 case V8SI_FTYPE_V8SI_INT:
36837 case V16SI_FTYPE_V16SI_INT:
36838 case V4SI_FTYPE_V16SI_INT:
36839 case V4DI_FTYPE_V4DI_INT:
36840 case V2DI_FTYPE_V4DI_INT:
36841 case V4DI_FTYPE_V8DI_INT:
36842 case HI_FTYPE_HI_INT:
36843 case QI_FTYPE_V4SF_INT:
36844 case QI_FTYPE_V2DF_INT:
36845 nargs = 2;
36846 nargs_constant = 1;
36847 break;
36848 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36849 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36850 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36851 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36852 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36853 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36854 case HI_FTYPE_V16SI_V16SI_HI:
36855 case QI_FTYPE_V8DI_V8DI_QI:
36856 case V16HI_FTYPE_V16SI_V16HI_HI:
36857 case V16QI_FTYPE_V16SI_V16QI_HI:
36858 case V16QI_FTYPE_V8DI_V16QI_QI:
36859 case V16SF_FTYPE_V16SF_V16SF_HI:
36860 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36861 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36862 case V16SF_FTYPE_V16SI_V16SF_HI:
36863 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36864 case V16SF_FTYPE_V4SF_V16SF_HI:
36865 case V16SI_FTYPE_SI_V16SI_HI:
36866 case V16SI_FTYPE_V16HI_V16SI_HI:
36867 case V16SI_FTYPE_V16QI_V16SI_HI:
36868 case V16SI_FTYPE_V16SF_V16SI_HI:
36869 case V8SF_FTYPE_V4SF_V8SF_QI:
36870 case V4DF_FTYPE_V2DF_V4DF_QI:
36871 case V8SI_FTYPE_V4SI_V8SI_QI:
36872 case V8SI_FTYPE_SI_V8SI_QI:
36873 case V4SI_FTYPE_V4SI_V4SI_QI:
36874 case V4SI_FTYPE_SI_V4SI_QI:
36875 case V4DI_FTYPE_V2DI_V4DI_QI:
36876 case V4DI_FTYPE_DI_V4DI_QI:
36877 case V2DI_FTYPE_V2DI_V2DI_QI:
36878 case V2DI_FTYPE_DI_V2DI_QI:
36879 case V64QI_FTYPE_V64QI_V64QI_DI:
36880 case V64QI_FTYPE_V16QI_V64QI_DI:
36881 case V64QI_FTYPE_QI_V64QI_DI:
36882 case V32QI_FTYPE_V32QI_V32QI_SI:
36883 case V32QI_FTYPE_V16QI_V32QI_SI:
36884 case V32QI_FTYPE_QI_V32QI_SI:
36885 case V16QI_FTYPE_V16QI_V16QI_HI:
36886 case V16QI_FTYPE_QI_V16QI_HI:
36887 case V32HI_FTYPE_V8HI_V32HI_SI:
36888 case V32HI_FTYPE_HI_V32HI_SI:
36889 case V16HI_FTYPE_V8HI_V16HI_HI:
36890 case V16HI_FTYPE_HI_V16HI_HI:
36891 case V8HI_FTYPE_V8HI_V8HI_QI:
36892 case V8HI_FTYPE_HI_V8HI_QI:
36893 case V8SF_FTYPE_V8HI_V8SF_QI:
36894 case V4SF_FTYPE_V8HI_V4SF_QI:
36895 case V8SI_FTYPE_V8SF_V8SI_QI:
36896 case V4SI_FTYPE_V4SF_V4SI_QI:
36897 case V8DI_FTYPE_V8SF_V8DI_QI:
36898 case V4DI_FTYPE_V4SF_V4DI_QI:
36899 case V2DI_FTYPE_V4SF_V2DI_QI:
36900 case V8SF_FTYPE_V8DI_V8SF_QI:
36901 case V4SF_FTYPE_V4DI_V4SF_QI:
36902 case V4SF_FTYPE_V2DI_V4SF_QI:
36903 case V8DF_FTYPE_V8DI_V8DF_QI:
36904 case V4DF_FTYPE_V4DI_V4DF_QI:
36905 case V2DF_FTYPE_V2DI_V2DF_QI:
36906 case V16QI_FTYPE_V8HI_V16QI_QI:
36907 case V16QI_FTYPE_V16HI_V16QI_HI:
36908 case V16QI_FTYPE_V4SI_V16QI_QI:
36909 case V16QI_FTYPE_V8SI_V16QI_QI:
36910 case V8HI_FTYPE_V4SI_V8HI_QI:
36911 case V8HI_FTYPE_V8SI_V8HI_QI:
36912 case V16QI_FTYPE_V2DI_V16QI_QI:
36913 case V16QI_FTYPE_V4DI_V16QI_QI:
36914 case V8HI_FTYPE_V2DI_V8HI_QI:
36915 case V8HI_FTYPE_V4DI_V8HI_QI:
36916 case V4SI_FTYPE_V2DI_V4SI_QI:
36917 case V4SI_FTYPE_V4DI_V4SI_QI:
36918 case V32QI_FTYPE_V32HI_V32QI_SI:
36919 case HI_FTYPE_V16QI_V16QI_HI:
36920 case SI_FTYPE_V32QI_V32QI_SI:
36921 case DI_FTYPE_V64QI_V64QI_DI:
36922 case QI_FTYPE_V8HI_V8HI_QI:
36923 case HI_FTYPE_V16HI_V16HI_HI:
36924 case SI_FTYPE_V32HI_V32HI_SI:
36925 case QI_FTYPE_V4SI_V4SI_QI:
36926 case QI_FTYPE_V8SI_V8SI_QI:
36927 case QI_FTYPE_V2DI_V2DI_QI:
36928 case QI_FTYPE_V4DI_V4DI_QI:
36929 case V4SF_FTYPE_V2DF_V4SF_QI:
36930 case V4SF_FTYPE_V4DF_V4SF_QI:
36931 case V16SI_FTYPE_V16SI_V16SI_HI:
36932 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36933 case V16SI_FTYPE_V4SI_V16SI_HI:
36934 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36935 case V2DI_FTYPE_V4SI_V2DI_QI:
36936 case V2DI_FTYPE_V8HI_V2DI_QI:
36937 case V2DI_FTYPE_V16QI_V2DI_QI:
36938 case V4DI_FTYPE_V4DI_V4DI_QI:
36939 case V4DI_FTYPE_V4SI_V4DI_QI:
36940 case V4DI_FTYPE_V8HI_V4DI_QI:
36941 case V4DI_FTYPE_V16QI_V4DI_QI:
36942 case V8DI_FTYPE_V8DF_V8DI_QI:
36943 case V4DI_FTYPE_V4DF_V4DI_QI:
36944 case V2DI_FTYPE_V2DF_V2DI_QI:
36945 case V4SI_FTYPE_V4DF_V4SI_QI:
36946 case V4SI_FTYPE_V2DF_V4SI_QI:
36947 case V4SI_FTYPE_V8HI_V4SI_QI:
36948 case V4SI_FTYPE_V16QI_V4SI_QI:
36949 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36950 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36951 case V8DF_FTYPE_V2DF_V8DF_QI:
36952 case V8DF_FTYPE_V4DF_V8DF_QI:
36953 case V8DF_FTYPE_V8DF_V8DF_QI:
36954 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36955 case V8SF_FTYPE_V8SF_V8SF_QI:
36956 case V8SF_FTYPE_V8SI_V8SF_QI:
36957 case V4DF_FTYPE_V4DF_V4DF_QI:
36958 case V4SF_FTYPE_V4SF_V4SF_QI:
36959 case V2DF_FTYPE_V2DF_V2DF_QI:
36960 case V2DF_FTYPE_V4SF_V2DF_QI:
36961 case V2DF_FTYPE_V4SI_V2DF_QI:
36962 case V4SF_FTYPE_V4SI_V4SF_QI:
36963 case V4DF_FTYPE_V4SF_V4DF_QI:
36964 case V4DF_FTYPE_V4SI_V4DF_QI:
36965 case V8SI_FTYPE_V8SI_V8SI_QI:
36966 case V8SI_FTYPE_V8HI_V8SI_QI:
36967 case V8SI_FTYPE_V16QI_V8SI_QI:
36968 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36969 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36970 case V8DF_FTYPE_V8SF_V8DF_QI:
36971 case V8DF_FTYPE_V8SI_V8DF_QI:
36972 case V8DI_FTYPE_DI_V8DI_QI:
36973 case V16SF_FTYPE_V8SF_V16SF_HI:
36974 case V16SI_FTYPE_V8SI_V16SI_HI:
36975 case V16HI_FTYPE_V16HI_V16HI_HI:
36976 case V8HI_FTYPE_V16QI_V8HI_QI:
36977 case V16HI_FTYPE_V16QI_V16HI_HI:
36978 case V32HI_FTYPE_V32HI_V32HI_SI:
36979 case V32HI_FTYPE_V32QI_V32HI_SI:
36980 case V8DI_FTYPE_V16QI_V8DI_QI:
36981 case V8DI_FTYPE_V2DI_V8DI_QI:
36982 case V8DI_FTYPE_V4DI_V8DI_QI:
36983 case V8DI_FTYPE_V8DI_V8DI_QI:
36984 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36985 case V8DI_FTYPE_V8HI_V8DI_QI:
36986 case V8DI_FTYPE_V8SI_V8DI_QI:
36987 case V8HI_FTYPE_V8DI_V8HI_QI:
36988 case V8SF_FTYPE_V8DF_V8SF_QI:
36989 case V8SI_FTYPE_V8DF_V8SI_QI:
36990 case V8SI_FTYPE_V8DI_V8SI_QI:
36991 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36992 nargs = 3;
36993 break;
36994 case V32QI_FTYPE_V32QI_V32QI_INT:
36995 case V16HI_FTYPE_V16HI_V16HI_INT:
36996 case V16QI_FTYPE_V16QI_V16QI_INT:
36997 case V4DI_FTYPE_V4DI_V4DI_INT:
36998 case V8HI_FTYPE_V8HI_V8HI_INT:
36999 case V8SI_FTYPE_V8SI_V8SI_INT:
37000 case V8SI_FTYPE_V8SI_V4SI_INT:
37001 case V8SF_FTYPE_V8SF_V8SF_INT:
37002 case V8SF_FTYPE_V8SF_V4SF_INT:
37003 case V4SI_FTYPE_V4SI_V4SI_INT:
37004 case V4DF_FTYPE_V4DF_V4DF_INT:
37005 case V16SF_FTYPE_V16SF_V16SF_INT:
37006 case V16SF_FTYPE_V16SF_V4SF_INT:
37007 case V16SI_FTYPE_V16SI_V4SI_INT:
37008 case V4DF_FTYPE_V4DF_V2DF_INT:
37009 case V4SF_FTYPE_V4SF_V4SF_INT:
37010 case V2DI_FTYPE_V2DI_V2DI_INT:
37011 case V4DI_FTYPE_V4DI_V2DI_INT:
37012 case V2DF_FTYPE_V2DF_V2DF_INT:
37013 case QI_FTYPE_V8DI_V8DI_INT:
37014 case QI_FTYPE_V8DF_V8DF_INT:
37015 case QI_FTYPE_V2DF_V2DF_INT:
37016 case QI_FTYPE_V4SF_V4SF_INT:
37017 case HI_FTYPE_V16SI_V16SI_INT:
37018 case HI_FTYPE_V16SF_V16SF_INT:
37019 nargs = 3;
37020 nargs_constant = 1;
37021 break;
37022 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37023 nargs = 3;
37024 rmode = V4DImode;
37025 nargs_constant = 1;
37026 break;
37027 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37028 nargs = 3;
37029 rmode = V2DImode;
37030 nargs_constant = 1;
37031 break;
37032 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37033 nargs = 3;
37034 rmode = DImode;
37035 nargs_constant = 1;
37036 break;
37037 case V2DI_FTYPE_V2DI_UINT_UINT:
37038 nargs = 3;
37039 nargs_constant = 2;
37040 break;
37041 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37042 nargs = 3;
37043 rmode = V8DImode;
37044 nargs_constant = 1;
37045 break;
37046 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37047 nargs = 5;
37048 rmode = V8DImode;
37049 mask_pos = 2;
37050 nargs_constant = 1;
37051 break;
37052 case QI_FTYPE_V8DF_INT_QI:
37053 case QI_FTYPE_V4DF_INT_QI:
37054 case QI_FTYPE_V2DF_INT_QI:
37055 case HI_FTYPE_V16SF_INT_HI:
37056 case QI_FTYPE_V8SF_INT_QI:
37057 case QI_FTYPE_V4SF_INT_QI:
37058 nargs = 3;
37059 mask_pos = 1;
37060 nargs_constant = 1;
37061 break;
37062 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37063 nargs = 5;
37064 rmode = V4DImode;
37065 mask_pos = 2;
37066 nargs_constant = 1;
37067 break;
37068 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37069 nargs = 5;
37070 rmode = V2DImode;
37071 mask_pos = 2;
37072 nargs_constant = 1;
37073 break;
37074 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37075 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37076 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37077 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37078 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37079 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37080 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37081 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37082 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37083 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37084 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37085 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37086 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37087 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37088 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37089 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37090 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37091 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37092 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37093 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37094 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37095 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37096 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37097 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37098 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37099 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37100 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37101 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37102 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37103 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37104 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37105 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37106 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37107 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37108 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37109 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37110 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37111 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37112 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37113 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37114 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37115 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37116 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37117 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37118 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37119 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37120 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37121 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37122 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37123 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37124 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37125 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37126 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37127 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37128 nargs = 4;
37129 break;
37130 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37131 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37132 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37133 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37134 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37135 nargs = 4;
37136 nargs_constant = 1;
37137 break;
37138 case QI_FTYPE_V4DI_V4DI_INT_QI:
37139 case QI_FTYPE_V8SI_V8SI_INT_QI:
37140 case QI_FTYPE_V4DF_V4DF_INT_QI:
37141 case QI_FTYPE_V8SF_V8SF_INT_QI:
37142 case QI_FTYPE_V2DI_V2DI_INT_QI:
37143 case QI_FTYPE_V4SI_V4SI_INT_QI:
37144 case QI_FTYPE_V2DF_V2DF_INT_QI:
37145 case QI_FTYPE_V4SF_V4SF_INT_QI:
37146 case DI_FTYPE_V64QI_V64QI_INT_DI:
37147 case SI_FTYPE_V32QI_V32QI_INT_SI:
37148 case HI_FTYPE_V16QI_V16QI_INT_HI:
37149 case SI_FTYPE_V32HI_V32HI_INT_SI:
37150 case HI_FTYPE_V16HI_V16HI_INT_HI:
37151 case QI_FTYPE_V8HI_V8HI_INT_QI:
37152 nargs = 4;
37153 mask_pos = 1;
37154 nargs_constant = 1;
37155 break;
37156 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37157 nargs = 4;
37158 nargs_constant = 2;
37159 break;
37160 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37161 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37162 nargs = 4;
37163 break;
37164 case QI_FTYPE_V8DI_V8DI_INT_QI:
37165 case HI_FTYPE_V16SI_V16SI_INT_HI:
37166 case QI_FTYPE_V8DF_V8DF_INT_QI:
37167 case HI_FTYPE_V16SF_V16SF_INT_HI:
37168 mask_pos = 1;
37169 nargs = 4;
37170 nargs_constant = 1;
37171 break;
37172 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37173 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37174 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37175 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37176 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37177 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37178 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37179 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37180 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37181 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37182 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37183 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37184 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37185 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37186 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37187 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37188 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37189 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37190 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37191 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37192 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37193 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37194 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37195 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37196 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37197 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37198 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37199 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37200 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37201 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37202 nargs = 4;
37203 mask_pos = 2;
37204 nargs_constant = 1;
37205 break;
37206 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37207 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37208 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37209 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37210 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37211 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37212 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37213 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37214 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37215 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37216 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37217 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37218 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37219 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37220 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37221 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37222 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37223 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37224 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37225 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37226 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37227 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37228 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37229 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37230 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37231 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37232 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37233 nargs = 5;
37234 mask_pos = 2;
37235 nargs_constant = 1;
37236 break;
37237 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37238 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37239 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37240 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37241 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37242 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37243 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37244 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37245 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37246 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37247 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37248 nargs = 5;
37249 nargs = 5;
37250 mask_pos = 1;
37251 nargs_constant = 1;
37252 break;
37253
37254 default:
37255 gcc_unreachable ();
37256 }
37257
37258 gcc_assert (nargs <= ARRAY_SIZE (args));
37259
37260 if (comparison != UNKNOWN)
37261 {
37262 gcc_assert (nargs == 2);
37263 return ix86_expand_sse_compare (d, exp, target, swap);
37264 }
37265
37266 if (rmode == VOIDmode || rmode == tmode)
37267 {
37268 if (optimize
37269 || target == 0
37270 || GET_MODE (target) != tmode
37271 || !insn_p->operand[0].predicate (target, tmode))
37272 target = gen_reg_rtx (tmode);
37273 real_target = target;
37274 }
37275 else
37276 {
37277 real_target = gen_reg_rtx (tmode);
37278 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37279 }
37280
37281 for (i = 0; i < nargs; i++)
37282 {
37283 tree arg = CALL_EXPR_ARG (exp, i);
37284 rtx op = expand_normal (arg);
37285 machine_mode mode = insn_p->operand[i + 1].mode;
37286 bool match = insn_p->operand[i + 1].predicate (op, mode);
37287
37288 if (last_arg_count && (i + 1) == nargs)
37289 {
37290 /* SIMD shift insns take either an 8-bit immediate or
37291 register as count. But builtin functions take int as
37292 count. If count doesn't match, we put it in register. */
37293 if (!match)
37294 {
37295 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37296 if (!insn_p->operand[i + 1].predicate (op, mode))
37297 op = copy_to_reg (op);
37298 }
37299 }
37300 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37301 (!mask_pos && (nargs - i) <= nargs_constant))
37302 {
37303 if (!match)
37304 switch (icode)
37305 {
37306 case CODE_FOR_avx_vinsertf128v4di:
37307 case CODE_FOR_avx_vextractf128v4di:
37308 error ("the last argument must be an 1-bit immediate");
37309 return const0_rtx;
37310
37311 case CODE_FOR_avx512f_cmpv8di3_mask:
37312 case CODE_FOR_avx512f_cmpv16si3_mask:
37313 case CODE_FOR_avx512f_ucmpv8di3_mask:
37314 case CODE_FOR_avx512f_ucmpv16si3_mask:
37315 case CODE_FOR_avx512vl_cmpv4di3_mask:
37316 case CODE_FOR_avx512vl_cmpv8si3_mask:
37317 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37318 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37319 case CODE_FOR_avx512vl_cmpv2di3_mask:
37320 case CODE_FOR_avx512vl_cmpv4si3_mask:
37321 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37322 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37323 error ("the last argument must be a 3-bit immediate");
37324 return const0_rtx;
37325
37326 case CODE_FOR_sse4_1_roundsd:
37327 case CODE_FOR_sse4_1_roundss:
37328
37329 case CODE_FOR_sse4_1_roundpd:
37330 case CODE_FOR_sse4_1_roundps:
37331 case CODE_FOR_avx_roundpd256:
37332 case CODE_FOR_avx_roundps256:
37333
37334 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37335 case CODE_FOR_sse4_1_roundps_sfix:
37336 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37337 case CODE_FOR_avx_roundps_sfix256:
37338
37339 case CODE_FOR_sse4_1_blendps:
37340 case CODE_FOR_avx_blendpd256:
37341 case CODE_FOR_avx_vpermilv4df:
37342 case CODE_FOR_avx_vpermilv4df_mask:
37343 case CODE_FOR_avx512f_getmantv8df_mask:
37344 case CODE_FOR_avx512f_getmantv16sf_mask:
37345 case CODE_FOR_avx512vl_getmantv8sf_mask:
37346 case CODE_FOR_avx512vl_getmantv4df_mask:
37347 case CODE_FOR_avx512vl_getmantv4sf_mask:
37348 case CODE_FOR_avx512vl_getmantv2df_mask:
37349 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37350 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37351 case CODE_FOR_avx512dq_rangepv4df_mask:
37352 case CODE_FOR_avx512dq_rangepv8sf_mask:
37353 case CODE_FOR_avx512dq_rangepv2df_mask:
37354 case CODE_FOR_avx512dq_rangepv4sf_mask:
37355 case CODE_FOR_avx_shufpd256_mask:
37356 error ("the last argument must be a 4-bit immediate");
37357 return const0_rtx;
37358
37359 case CODE_FOR_sha1rnds4:
37360 case CODE_FOR_sse4_1_blendpd:
37361 case CODE_FOR_avx_vpermilv2df:
37362 case CODE_FOR_avx_vpermilv2df_mask:
37363 case CODE_FOR_xop_vpermil2v2df3:
37364 case CODE_FOR_xop_vpermil2v4sf3:
37365 case CODE_FOR_xop_vpermil2v4df3:
37366 case CODE_FOR_xop_vpermil2v8sf3:
37367 case CODE_FOR_avx512f_vinsertf32x4_mask:
37368 case CODE_FOR_avx512f_vinserti32x4_mask:
37369 case CODE_FOR_avx512f_vextractf32x4_mask:
37370 case CODE_FOR_avx512f_vextracti32x4_mask:
37371 case CODE_FOR_sse2_shufpd:
37372 case CODE_FOR_sse2_shufpd_mask:
37373 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37374 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37375 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37376 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37377 error ("the last argument must be a 2-bit immediate");
37378 return const0_rtx;
37379
37380 case CODE_FOR_avx_vextractf128v4df:
37381 case CODE_FOR_avx_vextractf128v8sf:
37382 case CODE_FOR_avx_vextractf128v8si:
37383 case CODE_FOR_avx_vinsertf128v4df:
37384 case CODE_FOR_avx_vinsertf128v8sf:
37385 case CODE_FOR_avx_vinsertf128v8si:
37386 case CODE_FOR_avx512f_vinsertf64x4_mask:
37387 case CODE_FOR_avx512f_vinserti64x4_mask:
37388 case CODE_FOR_avx512f_vextractf64x4_mask:
37389 case CODE_FOR_avx512f_vextracti64x4_mask:
37390 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37391 case CODE_FOR_avx512dq_vinserti32x8_mask:
37392 case CODE_FOR_avx512vl_vinsertv4df:
37393 case CODE_FOR_avx512vl_vinsertv4di:
37394 case CODE_FOR_avx512vl_vinsertv8sf:
37395 case CODE_FOR_avx512vl_vinsertv8si:
37396 error ("the last argument must be a 1-bit immediate");
37397 return const0_rtx;
37398
37399 case CODE_FOR_avx_vmcmpv2df3:
37400 case CODE_FOR_avx_vmcmpv4sf3:
37401 case CODE_FOR_avx_cmpv2df3:
37402 case CODE_FOR_avx_cmpv4sf3:
37403 case CODE_FOR_avx_cmpv4df3:
37404 case CODE_FOR_avx_cmpv8sf3:
37405 case CODE_FOR_avx512f_cmpv8df3_mask:
37406 case CODE_FOR_avx512f_cmpv16sf3_mask:
37407 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37408 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37409 error ("the last argument must be a 5-bit immediate");
37410 return const0_rtx;
37411
37412 default:
37413 switch (nargs_constant)
37414 {
37415 case 2:
37416 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37417 (!mask_pos && (nargs - i) == nargs_constant))
37418 {
37419 error ("the next to last argument must be an 8-bit immediate");
37420 break;
37421 }
37422 case 1:
37423 error ("the last argument must be an 8-bit immediate");
37424 break;
37425 default:
37426 gcc_unreachable ();
37427 }
37428 return const0_rtx;
37429 }
37430 }
37431 else
37432 {
37433 if (VECTOR_MODE_P (mode))
37434 op = safe_vector_operand (op, mode);
37435
37436 /* If we aren't optimizing, only allow one memory operand to
37437 be generated. */
37438 if (memory_operand (op, mode))
37439 num_memory++;
37440
37441 op = fixup_modeless_constant (op, mode);
37442
37443 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37444 {
37445 if (optimize || !match || num_memory > 1)
37446 op = copy_to_mode_reg (mode, op);
37447 }
37448 else
37449 {
37450 op = copy_to_reg (op);
37451 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37452 }
37453 }
37454
37455 args[i].op = op;
37456 args[i].mode = mode;
37457 }
37458
37459 switch (nargs)
37460 {
37461 case 1:
37462 pat = GEN_FCN (icode) (real_target, args[0].op);
37463 break;
37464 case 2:
37465 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37466 break;
37467 case 3:
37468 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37469 args[2].op);
37470 break;
37471 case 4:
37472 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37473 args[2].op, args[3].op);
37474 break;
37475 case 5:
37476 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37477 args[2].op, args[3].op, args[4].op);
37478 case 6:
37479 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37480 args[2].op, args[3].op, args[4].op,
37481 args[5].op);
37482 break;
37483 default:
37484 gcc_unreachable ();
37485 }
37486
37487 if (! pat)
37488 return 0;
37489
37490 emit_insn (pat);
37491 return target;
37492 }
37493
37494 /* Transform pattern of following layout:
37495 (parallel [
37496 set (A B)
37497 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37498 ])
37499 into:
37500 (set (A B))
37501
37502 Or:
37503 (parallel [ A B
37504 ...
37505 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37506 ...
37507 ])
37508 into:
37509 (parallel [ A B ... ]) */
37510
37511 static rtx
37512 ix86_erase_embedded_rounding (rtx pat)
37513 {
37514 if (GET_CODE (pat) == INSN)
37515 pat = PATTERN (pat);
37516
37517 gcc_assert (GET_CODE (pat) == PARALLEL);
37518
37519 if (XVECLEN (pat, 0) == 2)
37520 {
37521 rtx p0 = XVECEXP (pat, 0, 0);
37522 rtx p1 = XVECEXP (pat, 0, 1);
37523
37524 gcc_assert (GET_CODE (p0) == SET
37525 && GET_CODE (p1) == UNSPEC
37526 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37527
37528 return p0;
37529 }
37530 else
37531 {
37532 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37533 int i = 0;
37534 int j = 0;
37535
37536 for (; i < XVECLEN (pat, 0); ++i)
37537 {
37538 rtx elem = XVECEXP (pat, 0, i);
37539 if (GET_CODE (elem) != UNSPEC
37540 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37541 res [j++] = elem;
37542 }
37543
37544 /* No more than 1 occurence was removed. */
37545 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37546
37547 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37548 }
37549 }
37550
37551 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37552 with rounding. */
37553 static rtx
37554 ix86_expand_sse_comi_round (const struct builtin_description *d,
37555 tree exp, rtx target)
37556 {
37557 rtx pat, set_dst;
37558 tree arg0 = CALL_EXPR_ARG (exp, 0);
37559 tree arg1 = CALL_EXPR_ARG (exp, 1);
37560 tree arg2 = CALL_EXPR_ARG (exp, 2);
37561 tree arg3 = CALL_EXPR_ARG (exp, 3);
37562 rtx op0 = expand_normal (arg0);
37563 rtx op1 = expand_normal (arg1);
37564 rtx op2 = expand_normal (arg2);
37565 rtx op3 = expand_normal (arg3);
37566 enum insn_code icode = d->icode;
37567 const struct insn_data_d *insn_p = &insn_data[icode];
37568 machine_mode mode0 = insn_p->operand[0].mode;
37569 machine_mode mode1 = insn_p->operand[1].mode;
37570 enum rtx_code comparison = UNEQ;
37571 bool need_ucomi = false;
37572
37573 /* See avxintrin.h for values. */
37574 enum rtx_code comi_comparisons[32] =
37575 {
37576 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37577 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37578 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37579 };
37580 bool need_ucomi_values[32] =
37581 {
37582 true, false, false, true, true, false, false, true,
37583 true, false, false, true, true, false, false, true,
37584 false, true, true, false, false, true, true, false,
37585 false, true, true, false, false, true, true, false
37586 };
37587
37588 if (!CONST_INT_P (op2))
37589 {
37590 error ("the third argument must be comparison constant");
37591 return const0_rtx;
37592 }
37593 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37594 {
37595 error ("incorrect comparison mode");
37596 return const0_rtx;
37597 }
37598
37599 if (!insn_p->operand[2].predicate (op3, SImode))
37600 {
37601 error ("incorrect rounding operand");
37602 return const0_rtx;
37603 }
37604
37605 comparison = comi_comparisons[INTVAL (op2)];
37606 need_ucomi = need_ucomi_values[INTVAL (op2)];
37607
37608 if (VECTOR_MODE_P (mode0))
37609 op0 = safe_vector_operand (op0, mode0);
37610 if (VECTOR_MODE_P (mode1))
37611 op1 = safe_vector_operand (op1, mode1);
37612
37613 target = gen_reg_rtx (SImode);
37614 emit_move_insn (target, const0_rtx);
37615 target = gen_rtx_SUBREG (QImode, target, 0);
37616
37617 if ((optimize && !register_operand (op0, mode0))
37618 || !insn_p->operand[0].predicate (op0, mode0))
37619 op0 = copy_to_mode_reg (mode0, op0);
37620 if ((optimize && !register_operand (op1, mode1))
37621 || !insn_p->operand[1].predicate (op1, mode1))
37622 op1 = copy_to_mode_reg (mode1, op1);
37623
37624 if (need_ucomi)
37625 icode = icode == CODE_FOR_sse_comi_round
37626 ? CODE_FOR_sse_ucomi_round
37627 : CODE_FOR_sse2_ucomi_round;
37628
37629 pat = GEN_FCN (icode) (op0, op1, op3);
37630 if (! pat)
37631 return 0;
37632
37633 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37634 if (INTVAL (op3) == NO_ROUND)
37635 {
37636 pat = ix86_erase_embedded_rounding (pat);
37637 if (! pat)
37638 return 0;
37639
37640 set_dst = SET_DEST (pat);
37641 }
37642 else
37643 {
37644 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37645 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37646 }
37647
37648 emit_insn (pat);
37649 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37650 gen_rtx_fmt_ee (comparison, QImode,
37651 set_dst,
37652 const0_rtx)));
37653
37654 return SUBREG_REG (target);
37655 }
37656
37657 static rtx
37658 ix86_expand_round_builtin (const struct builtin_description *d,
37659 tree exp, rtx target)
37660 {
37661 rtx pat;
37662 unsigned int i, nargs;
37663 struct
37664 {
37665 rtx op;
37666 machine_mode mode;
37667 } args[6];
37668 enum insn_code icode = d->icode;
37669 const struct insn_data_d *insn_p = &insn_data[icode];
37670 machine_mode tmode = insn_p->operand[0].mode;
37671 unsigned int nargs_constant = 0;
37672 unsigned int redundant_embed_rnd = 0;
37673
37674 switch ((enum ix86_builtin_func_type) d->flag)
37675 {
37676 case UINT64_FTYPE_V2DF_INT:
37677 case UINT64_FTYPE_V4SF_INT:
37678 case UINT_FTYPE_V2DF_INT:
37679 case UINT_FTYPE_V4SF_INT:
37680 case INT64_FTYPE_V2DF_INT:
37681 case INT64_FTYPE_V4SF_INT:
37682 case INT_FTYPE_V2DF_INT:
37683 case INT_FTYPE_V4SF_INT:
37684 nargs = 2;
37685 break;
37686 case V4SF_FTYPE_V4SF_UINT_INT:
37687 case V4SF_FTYPE_V4SF_UINT64_INT:
37688 case V2DF_FTYPE_V2DF_UINT64_INT:
37689 case V4SF_FTYPE_V4SF_INT_INT:
37690 case V4SF_FTYPE_V4SF_INT64_INT:
37691 case V2DF_FTYPE_V2DF_INT64_INT:
37692 case V4SF_FTYPE_V4SF_V4SF_INT:
37693 case V2DF_FTYPE_V2DF_V2DF_INT:
37694 case V4SF_FTYPE_V4SF_V2DF_INT:
37695 case V2DF_FTYPE_V2DF_V4SF_INT:
37696 nargs = 3;
37697 break;
37698 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37699 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37700 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37701 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37702 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37703 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37704 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37705 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37706 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37707 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37708 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37709 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37710 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37711 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37712 nargs = 4;
37713 break;
37714 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37715 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37716 nargs_constant = 2;
37717 nargs = 4;
37718 break;
37719 case INT_FTYPE_V4SF_V4SF_INT_INT:
37720 case INT_FTYPE_V2DF_V2DF_INT_INT:
37721 return ix86_expand_sse_comi_round (d, exp, target);
37722 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37723 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37724 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37725 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37726 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37727 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37728 nargs = 5;
37729 break;
37730 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37731 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37732 nargs_constant = 4;
37733 nargs = 5;
37734 break;
37735 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37736 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37737 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37738 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37739 nargs_constant = 3;
37740 nargs = 5;
37741 break;
37742 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37743 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37744 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37745 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37746 nargs = 6;
37747 nargs_constant = 4;
37748 break;
37749 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37750 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37751 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37752 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37753 nargs = 6;
37754 nargs_constant = 3;
37755 break;
37756 default:
37757 gcc_unreachable ();
37758 }
37759 gcc_assert (nargs <= ARRAY_SIZE (args));
37760
37761 if (optimize
37762 || target == 0
37763 || GET_MODE (target) != tmode
37764 || !insn_p->operand[0].predicate (target, tmode))
37765 target = gen_reg_rtx (tmode);
37766
37767 for (i = 0; i < nargs; i++)
37768 {
37769 tree arg = CALL_EXPR_ARG (exp, i);
37770 rtx op = expand_normal (arg);
37771 machine_mode mode = insn_p->operand[i + 1].mode;
37772 bool match = insn_p->operand[i + 1].predicate (op, mode);
37773
37774 if (i == nargs - nargs_constant)
37775 {
37776 if (!match)
37777 {
37778 switch (icode)
37779 {
37780 case CODE_FOR_avx512f_getmantv8df_mask_round:
37781 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37782 case CODE_FOR_avx512f_vgetmantv2df_round:
37783 case CODE_FOR_avx512f_vgetmantv4sf_round:
37784 error ("the immediate argument must be a 4-bit immediate");
37785 return const0_rtx;
37786 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37787 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37788 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37789 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37790 error ("the immediate argument must be a 5-bit immediate");
37791 return const0_rtx;
37792 default:
37793 error ("the immediate argument must be an 8-bit immediate");
37794 return const0_rtx;
37795 }
37796 }
37797 }
37798 else if (i == nargs-1)
37799 {
37800 if (!insn_p->operand[nargs].predicate (op, SImode))
37801 {
37802 error ("incorrect rounding operand");
37803 return const0_rtx;
37804 }
37805
37806 /* If there is no rounding use normal version of the pattern. */
37807 if (INTVAL (op) == NO_ROUND)
37808 redundant_embed_rnd = 1;
37809 }
37810 else
37811 {
37812 if (VECTOR_MODE_P (mode))
37813 op = safe_vector_operand (op, mode);
37814
37815 op = fixup_modeless_constant (op, mode);
37816
37817 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37818 {
37819 if (optimize || !match)
37820 op = copy_to_mode_reg (mode, op);
37821 }
37822 else
37823 {
37824 op = copy_to_reg (op);
37825 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37826 }
37827 }
37828
37829 args[i].op = op;
37830 args[i].mode = mode;
37831 }
37832
37833 switch (nargs)
37834 {
37835 case 1:
37836 pat = GEN_FCN (icode) (target, args[0].op);
37837 break;
37838 case 2:
37839 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37840 break;
37841 case 3:
37842 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37843 args[2].op);
37844 break;
37845 case 4:
37846 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37847 args[2].op, args[3].op);
37848 break;
37849 case 5:
37850 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37851 args[2].op, args[3].op, args[4].op);
37852 case 6:
37853 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37854 args[2].op, args[3].op, args[4].op,
37855 args[5].op);
37856 break;
37857 default:
37858 gcc_unreachable ();
37859 }
37860
37861 if (!pat)
37862 return 0;
37863
37864 if (redundant_embed_rnd)
37865 pat = ix86_erase_embedded_rounding (pat);
37866
37867 emit_insn (pat);
37868 return target;
37869 }
37870
37871 /* Subroutine of ix86_expand_builtin to take care of special insns
37872 with variable number of operands. */
37873
37874 static rtx
37875 ix86_expand_special_args_builtin (const struct builtin_description *d,
37876 tree exp, rtx target)
37877 {
37878 tree arg;
37879 rtx pat, op;
37880 unsigned int i, nargs, arg_adjust, memory;
37881 bool aligned_mem = false;
37882 struct
37883 {
37884 rtx op;
37885 machine_mode mode;
37886 } args[3];
37887 enum insn_code icode = d->icode;
37888 bool last_arg_constant = false;
37889 const struct insn_data_d *insn_p = &insn_data[icode];
37890 machine_mode tmode = insn_p->operand[0].mode;
37891 enum { load, store } klass;
37892
37893 switch ((enum ix86_builtin_func_type) d->flag)
37894 {
37895 case VOID_FTYPE_VOID:
37896 emit_insn (GEN_FCN (icode) (target));
37897 return 0;
37898 case VOID_FTYPE_UINT64:
37899 case VOID_FTYPE_UNSIGNED:
37900 nargs = 0;
37901 klass = store;
37902 memory = 0;
37903 break;
37904
37905 case INT_FTYPE_VOID:
37906 case USHORT_FTYPE_VOID:
37907 case UINT64_FTYPE_VOID:
37908 case UNSIGNED_FTYPE_VOID:
37909 nargs = 0;
37910 klass = load;
37911 memory = 0;
37912 break;
37913 case UINT64_FTYPE_PUNSIGNED:
37914 case V2DI_FTYPE_PV2DI:
37915 case V4DI_FTYPE_PV4DI:
37916 case V32QI_FTYPE_PCCHAR:
37917 case V16QI_FTYPE_PCCHAR:
37918 case V8SF_FTYPE_PCV4SF:
37919 case V8SF_FTYPE_PCFLOAT:
37920 case V4SF_FTYPE_PCFLOAT:
37921 case V4DF_FTYPE_PCV2DF:
37922 case V4DF_FTYPE_PCDOUBLE:
37923 case V2DF_FTYPE_PCDOUBLE:
37924 case VOID_FTYPE_PVOID:
37925 case V16SI_FTYPE_PV4SI:
37926 case V16SF_FTYPE_PV4SF:
37927 case V8DI_FTYPE_PV4DI:
37928 case V8DI_FTYPE_PV8DI:
37929 case V8DF_FTYPE_PV4DF:
37930 nargs = 1;
37931 klass = load;
37932 memory = 0;
37933 switch (icode)
37934 {
37935 case CODE_FOR_sse4_1_movntdqa:
37936 case CODE_FOR_avx2_movntdqa:
37937 case CODE_FOR_avx512f_movntdqa:
37938 aligned_mem = true;
37939 break;
37940 default:
37941 break;
37942 }
37943 break;
37944 case VOID_FTYPE_PV2SF_V4SF:
37945 case VOID_FTYPE_PV8DI_V8DI:
37946 case VOID_FTYPE_PV4DI_V4DI:
37947 case VOID_FTYPE_PV2DI_V2DI:
37948 case VOID_FTYPE_PCHAR_V32QI:
37949 case VOID_FTYPE_PCHAR_V16QI:
37950 case VOID_FTYPE_PFLOAT_V16SF:
37951 case VOID_FTYPE_PFLOAT_V8SF:
37952 case VOID_FTYPE_PFLOAT_V4SF:
37953 case VOID_FTYPE_PDOUBLE_V8DF:
37954 case VOID_FTYPE_PDOUBLE_V4DF:
37955 case VOID_FTYPE_PDOUBLE_V2DF:
37956 case VOID_FTYPE_PLONGLONG_LONGLONG:
37957 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37958 case VOID_FTYPE_PINT_INT:
37959 nargs = 1;
37960 klass = store;
37961 /* Reserve memory operand for target. */
37962 memory = ARRAY_SIZE (args);
37963 switch (icode)
37964 {
37965 /* These builtins and instructions require the memory
37966 to be properly aligned. */
37967 case CODE_FOR_avx_movntv4di:
37968 case CODE_FOR_sse2_movntv2di:
37969 case CODE_FOR_avx_movntv8sf:
37970 case CODE_FOR_sse_movntv4sf:
37971 case CODE_FOR_sse4a_vmmovntv4sf:
37972 case CODE_FOR_avx_movntv4df:
37973 case CODE_FOR_sse2_movntv2df:
37974 case CODE_FOR_sse4a_vmmovntv2df:
37975 case CODE_FOR_sse2_movntidi:
37976 case CODE_FOR_sse_movntq:
37977 case CODE_FOR_sse2_movntisi:
37978 case CODE_FOR_avx512f_movntv16sf:
37979 case CODE_FOR_avx512f_movntv8df:
37980 case CODE_FOR_avx512f_movntv8di:
37981 aligned_mem = true;
37982 break;
37983 default:
37984 break;
37985 }
37986 break;
37987 case V4SF_FTYPE_V4SF_PCV2SF:
37988 case V2DF_FTYPE_V2DF_PCDOUBLE:
37989 nargs = 2;
37990 klass = load;
37991 memory = 1;
37992 break;
37993 case V8SF_FTYPE_PCV8SF_V8SI:
37994 case V4DF_FTYPE_PCV4DF_V4DI:
37995 case V4SF_FTYPE_PCV4SF_V4SI:
37996 case V2DF_FTYPE_PCV2DF_V2DI:
37997 case V8SI_FTYPE_PCV8SI_V8SI:
37998 case V4DI_FTYPE_PCV4DI_V4DI:
37999 case V4SI_FTYPE_PCV4SI_V4SI:
38000 case V2DI_FTYPE_PCV2DI_V2DI:
38001 nargs = 2;
38002 klass = load;
38003 memory = 0;
38004 break;
38005 case VOID_FTYPE_PV8DF_V8DF_QI:
38006 case VOID_FTYPE_PV16SF_V16SF_HI:
38007 case VOID_FTYPE_PV8DI_V8DI_QI:
38008 case VOID_FTYPE_PV4DI_V4DI_QI:
38009 case VOID_FTYPE_PV2DI_V2DI_QI:
38010 case VOID_FTYPE_PV16SI_V16SI_HI:
38011 case VOID_FTYPE_PV8SI_V8SI_QI:
38012 case VOID_FTYPE_PV4SI_V4SI_QI:
38013 switch (icode)
38014 {
38015 /* These builtins and instructions require the memory
38016 to be properly aligned. */
38017 case CODE_FOR_avx512f_storev16sf_mask:
38018 case CODE_FOR_avx512f_storev16si_mask:
38019 case CODE_FOR_avx512f_storev8df_mask:
38020 case CODE_FOR_avx512f_storev8di_mask:
38021 case CODE_FOR_avx512vl_storev8sf_mask:
38022 case CODE_FOR_avx512vl_storev8si_mask:
38023 case CODE_FOR_avx512vl_storev4df_mask:
38024 case CODE_FOR_avx512vl_storev4di_mask:
38025 case CODE_FOR_avx512vl_storev4sf_mask:
38026 case CODE_FOR_avx512vl_storev4si_mask:
38027 case CODE_FOR_avx512vl_storev2df_mask:
38028 case CODE_FOR_avx512vl_storev2di_mask:
38029 aligned_mem = true;
38030 break;
38031 default:
38032 break;
38033 }
38034 /* FALLTHRU */
38035 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38036 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38037 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38038 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38039 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38040 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38041 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38042 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38043 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38044 case VOID_FTYPE_PFLOAT_V4SF_QI:
38045 case VOID_FTYPE_PV8SI_V8DI_QI:
38046 case VOID_FTYPE_PV8HI_V8DI_QI:
38047 case VOID_FTYPE_PV16HI_V16SI_HI:
38048 case VOID_FTYPE_PV16QI_V8DI_QI:
38049 case VOID_FTYPE_PV16QI_V16SI_HI:
38050 case VOID_FTYPE_PV4SI_V4DI_QI:
38051 case VOID_FTYPE_PV4SI_V2DI_QI:
38052 case VOID_FTYPE_PV8HI_V4DI_QI:
38053 case VOID_FTYPE_PV8HI_V2DI_QI:
38054 case VOID_FTYPE_PV8HI_V8SI_QI:
38055 case VOID_FTYPE_PV8HI_V4SI_QI:
38056 case VOID_FTYPE_PV16QI_V4DI_QI:
38057 case VOID_FTYPE_PV16QI_V2DI_QI:
38058 case VOID_FTYPE_PV16QI_V8SI_QI:
38059 case VOID_FTYPE_PV16QI_V4SI_QI:
38060 case VOID_FTYPE_PV8HI_V8HI_QI:
38061 case VOID_FTYPE_PV16HI_V16HI_HI:
38062 case VOID_FTYPE_PV32HI_V32HI_SI:
38063 case VOID_FTYPE_PV16QI_V16QI_HI:
38064 case VOID_FTYPE_PV32QI_V32QI_SI:
38065 case VOID_FTYPE_PV64QI_V64QI_DI:
38066 case VOID_FTYPE_PV4DF_V4DF_QI:
38067 case VOID_FTYPE_PV2DF_V2DF_QI:
38068 case VOID_FTYPE_PV8SF_V8SF_QI:
38069 case VOID_FTYPE_PV4SF_V4SF_QI:
38070 nargs = 2;
38071 klass = store;
38072 /* Reserve memory operand for target. */
38073 memory = ARRAY_SIZE (args);
38074 break;
38075 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38076 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38077 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38078 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38079 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38080 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38081 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38082 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38083 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38084 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38085 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38086 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38087 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38088 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38089 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38090 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38091 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38092 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38093 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38094 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38095 nargs = 3;
38096 klass = load;
38097 memory = 0;
38098 switch (icode)
38099 {
38100 /* These builtins and instructions require the memory
38101 to be properly aligned. */
38102 case CODE_FOR_avx512f_loadv16sf_mask:
38103 case CODE_FOR_avx512f_loadv16si_mask:
38104 case CODE_FOR_avx512f_loadv8df_mask:
38105 case CODE_FOR_avx512f_loadv8di_mask:
38106 case CODE_FOR_avx512vl_loadv8sf_mask:
38107 case CODE_FOR_avx512vl_loadv8si_mask:
38108 case CODE_FOR_avx512vl_loadv4df_mask:
38109 case CODE_FOR_avx512vl_loadv4di_mask:
38110 case CODE_FOR_avx512vl_loadv4sf_mask:
38111 case CODE_FOR_avx512vl_loadv4si_mask:
38112 case CODE_FOR_avx512vl_loadv2df_mask:
38113 case CODE_FOR_avx512vl_loadv2di_mask:
38114 case CODE_FOR_avx512bw_loadv64qi_mask:
38115 case CODE_FOR_avx512vl_loadv32qi_mask:
38116 case CODE_FOR_avx512vl_loadv16qi_mask:
38117 case CODE_FOR_avx512bw_loadv32hi_mask:
38118 case CODE_FOR_avx512vl_loadv16hi_mask:
38119 case CODE_FOR_avx512vl_loadv8hi_mask:
38120 aligned_mem = true;
38121 break;
38122 default:
38123 break;
38124 }
38125 break;
38126 case VOID_FTYPE_UINT_UINT_UINT:
38127 case VOID_FTYPE_UINT64_UINT_UINT:
38128 case UCHAR_FTYPE_UINT_UINT_UINT:
38129 case UCHAR_FTYPE_UINT64_UINT_UINT:
38130 nargs = 3;
38131 klass = load;
38132 memory = ARRAY_SIZE (args);
38133 last_arg_constant = true;
38134 break;
38135 default:
38136 gcc_unreachable ();
38137 }
38138
38139 gcc_assert (nargs <= ARRAY_SIZE (args));
38140
38141 if (klass == store)
38142 {
38143 arg = CALL_EXPR_ARG (exp, 0);
38144 op = expand_normal (arg);
38145 gcc_assert (target == 0);
38146 if (memory)
38147 {
38148 op = ix86_zero_extend_to_Pmode (op);
38149 target = gen_rtx_MEM (tmode, op);
38150 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38151 on it. Try to improve it using get_pointer_alignment,
38152 and if the special builtin is one that requires strict
38153 mode alignment, also from it's GET_MODE_ALIGNMENT.
38154 Failure to do so could lead to ix86_legitimate_combined_insn
38155 rejecting all changes to such insns. */
38156 unsigned int align = get_pointer_alignment (arg);
38157 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38158 align = GET_MODE_ALIGNMENT (tmode);
38159 if (MEM_ALIGN (target) < align)
38160 set_mem_align (target, align);
38161 }
38162 else
38163 target = force_reg (tmode, op);
38164 arg_adjust = 1;
38165 }
38166 else
38167 {
38168 arg_adjust = 0;
38169 if (optimize
38170 || target == 0
38171 || !register_operand (target, tmode)
38172 || GET_MODE (target) != tmode)
38173 target = gen_reg_rtx (tmode);
38174 }
38175
38176 for (i = 0; i < nargs; i++)
38177 {
38178 machine_mode mode = insn_p->operand[i + 1].mode;
38179 bool match;
38180
38181 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38182 op = expand_normal (arg);
38183 match = insn_p->operand[i + 1].predicate (op, mode);
38184
38185 if (last_arg_constant && (i + 1) == nargs)
38186 {
38187 if (!match)
38188 {
38189 if (icode == CODE_FOR_lwp_lwpvalsi3
38190 || icode == CODE_FOR_lwp_lwpinssi3
38191 || icode == CODE_FOR_lwp_lwpvaldi3
38192 || icode == CODE_FOR_lwp_lwpinsdi3)
38193 error ("the last argument must be a 32-bit immediate");
38194 else
38195 error ("the last argument must be an 8-bit immediate");
38196 return const0_rtx;
38197 }
38198 }
38199 else
38200 {
38201 if (i == memory)
38202 {
38203 /* This must be the memory operand. */
38204 op = ix86_zero_extend_to_Pmode (op);
38205 op = gen_rtx_MEM (mode, op);
38206 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38207 on it. Try to improve it using get_pointer_alignment,
38208 and if the special builtin is one that requires strict
38209 mode alignment, also from it's GET_MODE_ALIGNMENT.
38210 Failure to do so could lead to ix86_legitimate_combined_insn
38211 rejecting all changes to such insns. */
38212 unsigned int align = get_pointer_alignment (arg);
38213 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38214 align = GET_MODE_ALIGNMENT (mode);
38215 if (MEM_ALIGN (op) < align)
38216 set_mem_align (op, align);
38217 }
38218 else
38219 {
38220 /* This must be register. */
38221 if (VECTOR_MODE_P (mode))
38222 op = safe_vector_operand (op, mode);
38223
38224 op = fixup_modeless_constant (op, mode);
38225
38226 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38227 op = copy_to_mode_reg (mode, op);
38228 else
38229 {
38230 op = copy_to_reg (op);
38231 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38232 }
38233 }
38234 }
38235
38236 args[i].op = op;
38237 args[i].mode = mode;
38238 }
38239
38240 switch (nargs)
38241 {
38242 case 0:
38243 pat = GEN_FCN (icode) (target);
38244 break;
38245 case 1:
38246 pat = GEN_FCN (icode) (target, args[0].op);
38247 break;
38248 case 2:
38249 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38250 break;
38251 case 3:
38252 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38253 break;
38254 default:
38255 gcc_unreachable ();
38256 }
38257
38258 if (! pat)
38259 return 0;
38260 emit_insn (pat);
38261 return klass == store ? 0 : target;
38262 }
38263
38264 /* Return the integer constant in ARG. Constrain it to be in the range
38265 of the subparts of VEC_TYPE; issue an error if not. */
38266
38267 static int
38268 get_element_number (tree vec_type, tree arg)
38269 {
38270 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38271
38272 if (!tree_fits_uhwi_p (arg)
38273 || (elt = tree_to_uhwi (arg), elt > max))
38274 {
38275 error ("selector must be an integer constant in the range 0..%wi", max);
38276 return 0;
38277 }
38278
38279 return elt;
38280 }
38281
38282 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38283 ix86_expand_vector_init. We DO have language-level syntax for this, in
38284 the form of (type){ init-list }. Except that since we can't place emms
38285 instructions from inside the compiler, we can't allow the use of MMX
38286 registers unless the user explicitly asks for it. So we do *not* define
38287 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38288 we have builtins invoked by mmintrin.h that gives us license to emit
38289 these sorts of instructions. */
38290
38291 static rtx
38292 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38293 {
38294 machine_mode tmode = TYPE_MODE (type);
38295 machine_mode inner_mode = GET_MODE_INNER (tmode);
38296 int i, n_elt = GET_MODE_NUNITS (tmode);
38297 rtvec v = rtvec_alloc (n_elt);
38298
38299 gcc_assert (VECTOR_MODE_P (tmode));
38300 gcc_assert (call_expr_nargs (exp) == n_elt);
38301
38302 for (i = 0; i < n_elt; ++i)
38303 {
38304 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38305 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38306 }
38307
38308 if (!target || !register_operand (target, tmode))
38309 target = gen_reg_rtx (tmode);
38310
38311 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38312 return target;
38313 }
38314
38315 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38316 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38317 had a language-level syntax for referencing vector elements. */
38318
38319 static rtx
38320 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38321 {
38322 machine_mode tmode, mode0;
38323 tree arg0, arg1;
38324 int elt;
38325 rtx op0;
38326
38327 arg0 = CALL_EXPR_ARG (exp, 0);
38328 arg1 = CALL_EXPR_ARG (exp, 1);
38329
38330 op0 = expand_normal (arg0);
38331 elt = get_element_number (TREE_TYPE (arg0), arg1);
38332
38333 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38334 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38335 gcc_assert (VECTOR_MODE_P (mode0));
38336
38337 op0 = force_reg (mode0, op0);
38338
38339 if (optimize || !target || !register_operand (target, tmode))
38340 target = gen_reg_rtx (tmode);
38341
38342 ix86_expand_vector_extract (true, target, op0, elt);
38343
38344 return target;
38345 }
38346
38347 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38348 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38349 a language-level syntax for referencing vector elements. */
38350
38351 static rtx
38352 ix86_expand_vec_set_builtin (tree exp)
38353 {
38354 machine_mode tmode, mode1;
38355 tree arg0, arg1, arg2;
38356 int elt;
38357 rtx op0, op1, target;
38358
38359 arg0 = CALL_EXPR_ARG (exp, 0);
38360 arg1 = CALL_EXPR_ARG (exp, 1);
38361 arg2 = CALL_EXPR_ARG (exp, 2);
38362
38363 tmode = TYPE_MODE (TREE_TYPE (arg0));
38364 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38365 gcc_assert (VECTOR_MODE_P (tmode));
38366
38367 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38368 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38369 elt = get_element_number (TREE_TYPE (arg0), arg2);
38370
38371 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38372 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38373
38374 op0 = force_reg (tmode, op0);
38375 op1 = force_reg (mode1, op1);
38376
38377 /* OP0 is the source of these builtin functions and shouldn't be
38378 modified. Create a copy, use it and return it as target. */
38379 target = gen_reg_rtx (tmode);
38380 emit_move_insn (target, op0);
38381 ix86_expand_vector_set (true, target, op1, elt);
38382
38383 return target;
38384 }
38385
38386 /* Emit conditional move of SRC to DST with condition
38387 OP1 CODE OP2. */
38388 static void
38389 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38390 {
38391 rtx t;
38392
38393 if (TARGET_CMOVE)
38394 {
38395 t = ix86_expand_compare (code, op1, op2);
38396 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38397 src, dst)));
38398 }
38399 else
38400 {
38401 rtx nomove = gen_label_rtx ();
38402 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38403 const0_rtx, GET_MODE (op1), 1, nomove);
38404 emit_move_insn (dst, src);
38405 emit_label (nomove);
38406 }
38407 }
38408
38409 /* Choose max of DST and SRC and put it to DST. */
38410 static void
38411 ix86_emit_move_max (rtx dst, rtx src)
38412 {
38413 ix86_emit_cmove (dst, src, LTU, dst, src);
38414 }
38415
38416 /* Expand an expression EXP that calls a built-in function,
38417 with result going to TARGET if that's convenient
38418 (and in mode MODE if that's convenient).
38419 SUBTARGET may be used as the target for computing one of EXP's operands.
38420 IGNORE is nonzero if the value is to be ignored. */
38421
38422 static rtx
38423 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38424 machine_mode mode, int ignore)
38425 {
38426 const struct builtin_description *d;
38427 size_t i;
38428 enum insn_code icode;
38429 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38430 tree arg0, arg1, arg2, arg3, arg4;
38431 rtx op0, op1, op2, op3, op4, pat, insn;
38432 machine_mode mode0, mode1, mode2, mode3, mode4;
38433 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38434
38435 /* For CPU builtins that can be folded, fold first and expand the fold. */
38436 switch (fcode)
38437 {
38438 case IX86_BUILTIN_CPU_INIT:
38439 {
38440 /* Make it call __cpu_indicator_init in libgcc. */
38441 tree call_expr, fndecl, type;
38442 type = build_function_type_list (integer_type_node, NULL_TREE);
38443 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38444 call_expr = build_call_expr (fndecl, 0);
38445 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38446 }
38447 case IX86_BUILTIN_CPU_IS:
38448 case IX86_BUILTIN_CPU_SUPPORTS:
38449 {
38450 tree arg0 = CALL_EXPR_ARG (exp, 0);
38451 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38452 gcc_assert (fold_expr != NULL_TREE);
38453 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38454 }
38455 }
38456
38457 /* Determine whether the builtin function is available under the current ISA.
38458 Originally the builtin was not created if it wasn't applicable to the
38459 current ISA based on the command line switches. With function specific
38460 options, we need to check in the context of the function making the call
38461 whether it is supported. */
38462 if (ix86_builtins_isa[fcode].isa
38463 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38464 {
38465 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38466 NULL, (enum fpmath_unit) 0, false);
38467
38468 if (!opts)
38469 error ("%qE needs unknown isa option", fndecl);
38470 else
38471 {
38472 gcc_assert (opts != NULL);
38473 error ("%qE needs isa option %s", fndecl, opts);
38474 free (opts);
38475 }
38476 return const0_rtx;
38477 }
38478
38479 switch (fcode)
38480 {
38481 case IX86_BUILTIN_BNDMK:
38482 if (!target
38483 || GET_MODE (target) != BNDmode
38484 || !register_operand (target, BNDmode))
38485 target = gen_reg_rtx (BNDmode);
38486
38487 arg0 = CALL_EXPR_ARG (exp, 0);
38488 arg1 = CALL_EXPR_ARG (exp, 1);
38489
38490 op0 = expand_normal (arg0);
38491 op1 = expand_normal (arg1);
38492
38493 if (!register_operand (op0, Pmode))
38494 op0 = ix86_zero_extend_to_Pmode (op0);
38495 if (!register_operand (op1, Pmode))
38496 op1 = ix86_zero_extend_to_Pmode (op1);
38497
38498 /* Builtin arg1 is size of block but instruction op1 should
38499 be (size - 1). */
38500 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38501 NULL_RTX, 1, OPTAB_DIRECT);
38502
38503 emit_insn (BNDmode == BND64mode
38504 ? gen_bnd64_mk (target, op0, op1)
38505 : gen_bnd32_mk (target, op0, op1));
38506 return target;
38507
38508 case IX86_BUILTIN_BNDSTX:
38509 arg0 = CALL_EXPR_ARG (exp, 0);
38510 arg1 = CALL_EXPR_ARG (exp, 1);
38511 arg2 = CALL_EXPR_ARG (exp, 2);
38512
38513 op0 = expand_normal (arg0);
38514 op1 = expand_normal (arg1);
38515 op2 = expand_normal (arg2);
38516
38517 if (!register_operand (op0, Pmode))
38518 op0 = ix86_zero_extend_to_Pmode (op0);
38519 if (!register_operand (op1, BNDmode))
38520 op1 = copy_to_mode_reg (BNDmode, op1);
38521 if (!register_operand (op2, Pmode))
38522 op2 = ix86_zero_extend_to_Pmode (op2);
38523
38524 emit_insn (BNDmode == BND64mode
38525 ? gen_bnd64_stx (op2, op0, op1)
38526 : gen_bnd32_stx (op2, op0, op1));
38527 return 0;
38528
38529 case IX86_BUILTIN_BNDLDX:
38530 if (!target
38531 || GET_MODE (target) != BNDmode
38532 || !register_operand (target, BNDmode))
38533 target = gen_reg_rtx (BNDmode);
38534
38535 arg0 = CALL_EXPR_ARG (exp, 0);
38536 arg1 = CALL_EXPR_ARG (exp, 1);
38537
38538 op0 = expand_normal (arg0);
38539 op1 = expand_normal (arg1);
38540
38541 if (!register_operand (op0, Pmode))
38542 op0 = ix86_zero_extend_to_Pmode (op0);
38543 if (!register_operand (op1, Pmode))
38544 op1 = ix86_zero_extend_to_Pmode (op1);
38545
38546 emit_insn (BNDmode == BND64mode
38547 ? gen_bnd64_ldx (target, op0, op1)
38548 : gen_bnd32_ldx (target, op0, op1));
38549 return target;
38550
38551 case IX86_BUILTIN_BNDCL:
38552 arg0 = CALL_EXPR_ARG (exp, 0);
38553 arg1 = CALL_EXPR_ARG (exp, 1);
38554
38555 op0 = expand_normal (arg0);
38556 op1 = expand_normal (arg1);
38557
38558 if (!register_operand (op0, Pmode))
38559 op0 = ix86_zero_extend_to_Pmode (op0);
38560 if (!register_operand (op1, BNDmode))
38561 op1 = copy_to_mode_reg (BNDmode, op1);
38562
38563 emit_insn (BNDmode == BND64mode
38564 ? gen_bnd64_cl (op1, op0)
38565 : gen_bnd32_cl (op1, op0));
38566 return 0;
38567
38568 case IX86_BUILTIN_BNDCU:
38569 arg0 = CALL_EXPR_ARG (exp, 0);
38570 arg1 = CALL_EXPR_ARG (exp, 1);
38571
38572 op0 = expand_normal (arg0);
38573 op1 = expand_normal (arg1);
38574
38575 if (!register_operand (op0, Pmode))
38576 op0 = ix86_zero_extend_to_Pmode (op0);
38577 if (!register_operand (op1, BNDmode))
38578 op1 = copy_to_mode_reg (BNDmode, op1);
38579
38580 emit_insn (BNDmode == BND64mode
38581 ? gen_bnd64_cu (op1, op0)
38582 : gen_bnd32_cu (op1, op0));
38583 return 0;
38584
38585 case IX86_BUILTIN_BNDRET:
38586 arg0 = CALL_EXPR_ARG (exp, 0);
38587 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38588 target = chkp_get_rtl_bounds (arg0);
38589
38590 /* If no bounds were specified for returned value,
38591 then use INIT bounds. It usually happens when
38592 some built-in function is expanded. */
38593 if (!target)
38594 {
38595 rtx t1 = gen_reg_rtx (Pmode);
38596 rtx t2 = gen_reg_rtx (Pmode);
38597 target = gen_reg_rtx (BNDmode);
38598 emit_move_insn (t1, const0_rtx);
38599 emit_move_insn (t2, constm1_rtx);
38600 emit_insn (BNDmode == BND64mode
38601 ? gen_bnd64_mk (target, t1, t2)
38602 : gen_bnd32_mk (target, t1, t2));
38603 }
38604
38605 gcc_assert (target && REG_P (target));
38606 return target;
38607
38608 case IX86_BUILTIN_BNDNARROW:
38609 {
38610 rtx m1, m1h1, m1h2, lb, ub, t1;
38611
38612 /* Return value and lb. */
38613 arg0 = CALL_EXPR_ARG (exp, 0);
38614 /* Bounds. */
38615 arg1 = CALL_EXPR_ARG (exp, 1);
38616 /* Size. */
38617 arg2 = CALL_EXPR_ARG (exp, 2);
38618
38619 lb = expand_normal (arg0);
38620 op1 = expand_normal (arg1);
38621 op2 = expand_normal (arg2);
38622
38623 /* Size was passed but we need to use (size - 1) as for bndmk. */
38624 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38625 NULL_RTX, 1, OPTAB_DIRECT);
38626
38627 /* Add LB to size and inverse to get UB. */
38628 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38629 op2, 1, OPTAB_DIRECT);
38630 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38631
38632 if (!register_operand (lb, Pmode))
38633 lb = ix86_zero_extend_to_Pmode (lb);
38634 if (!register_operand (ub, Pmode))
38635 ub = ix86_zero_extend_to_Pmode (ub);
38636
38637 /* We need to move bounds to memory before any computations. */
38638 if (MEM_P (op1))
38639 m1 = op1;
38640 else
38641 {
38642 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38643 emit_move_insn (m1, op1);
38644 }
38645
38646 /* Generate mem expression to be used for access to LB and UB. */
38647 m1h1 = adjust_address (m1, Pmode, 0);
38648 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38649
38650 t1 = gen_reg_rtx (Pmode);
38651
38652 /* Compute LB. */
38653 emit_move_insn (t1, m1h1);
38654 ix86_emit_move_max (t1, lb);
38655 emit_move_insn (m1h1, t1);
38656
38657 /* Compute UB. UB is stored in 1's complement form. Therefore
38658 we also use max here. */
38659 emit_move_insn (t1, m1h2);
38660 ix86_emit_move_max (t1, ub);
38661 emit_move_insn (m1h2, t1);
38662
38663 op2 = gen_reg_rtx (BNDmode);
38664 emit_move_insn (op2, m1);
38665
38666 return chkp_join_splitted_slot (lb, op2);
38667 }
38668
38669 case IX86_BUILTIN_BNDINT:
38670 {
38671 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38672
38673 if (!target
38674 || GET_MODE (target) != BNDmode
38675 || !register_operand (target, BNDmode))
38676 target = gen_reg_rtx (BNDmode);
38677
38678 arg0 = CALL_EXPR_ARG (exp, 0);
38679 arg1 = CALL_EXPR_ARG (exp, 1);
38680
38681 op0 = expand_normal (arg0);
38682 op1 = expand_normal (arg1);
38683
38684 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38685 rh1 = adjust_address (res, Pmode, 0);
38686 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38687
38688 /* Put first bounds to temporaries. */
38689 lb1 = gen_reg_rtx (Pmode);
38690 ub1 = gen_reg_rtx (Pmode);
38691 if (MEM_P (op0))
38692 {
38693 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38694 emit_move_insn (ub1, adjust_address (op0, Pmode,
38695 GET_MODE_SIZE (Pmode)));
38696 }
38697 else
38698 {
38699 emit_move_insn (res, op0);
38700 emit_move_insn (lb1, rh1);
38701 emit_move_insn (ub1, rh2);
38702 }
38703
38704 /* Put second bounds to temporaries. */
38705 lb2 = gen_reg_rtx (Pmode);
38706 ub2 = gen_reg_rtx (Pmode);
38707 if (MEM_P (op1))
38708 {
38709 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38710 emit_move_insn (ub2, adjust_address (op1, Pmode,
38711 GET_MODE_SIZE (Pmode)));
38712 }
38713 else
38714 {
38715 emit_move_insn (res, op1);
38716 emit_move_insn (lb2, rh1);
38717 emit_move_insn (ub2, rh2);
38718 }
38719
38720 /* Compute LB. */
38721 ix86_emit_move_max (lb1, lb2);
38722 emit_move_insn (rh1, lb1);
38723
38724 /* Compute UB. UB is stored in 1's complement form. Therefore
38725 we also use max here. */
38726 ix86_emit_move_max (ub1, ub2);
38727 emit_move_insn (rh2, ub1);
38728
38729 emit_move_insn (target, res);
38730
38731 return target;
38732 }
38733
38734 case IX86_BUILTIN_SIZEOF:
38735 {
38736 tree name;
38737 rtx symbol;
38738
38739 if (!target
38740 || GET_MODE (target) != Pmode
38741 || !register_operand (target, Pmode))
38742 target = gen_reg_rtx (Pmode);
38743
38744 arg0 = CALL_EXPR_ARG (exp, 0);
38745 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38746
38747 name = DECL_ASSEMBLER_NAME (arg0);
38748 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38749
38750 emit_insn (Pmode == SImode
38751 ? gen_move_size_reloc_si (target, symbol)
38752 : gen_move_size_reloc_di (target, symbol));
38753
38754 return target;
38755 }
38756
38757 case IX86_BUILTIN_BNDLOWER:
38758 {
38759 rtx mem, hmem;
38760
38761 if (!target
38762 || GET_MODE (target) != Pmode
38763 || !register_operand (target, Pmode))
38764 target = gen_reg_rtx (Pmode);
38765
38766 arg0 = CALL_EXPR_ARG (exp, 0);
38767 op0 = expand_normal (arg0);
38768
38769 /* We need to move bounds to memory first. */
38770 if (MEM_P (op0))
38771 mem = op0;
38772 else
38773 {
38774 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38775 emit_move_insn (mem, op0);
38776 }
38777
38778 /* Generate mem expression to access LB and load it. */
38779 hmem = adjust_address (mem, Pmode, 0);
38780 emit_move_insn (target, hmem);
38781
38782 return target;
38783 }
38784
38785 case IX86_BUILTIN_BNDUPPER:
38786 {
38787 rtx mem, hmem, res;
38788
38789 if (!target
38790 || GET_MODE (target) != Pmode
38791 || !register_operand (target, Pmode))
38792 target = gen_reg_rtx (Pmode);
38793
38794 arg0 = CALL_EXPR_ARG (exp, 0);
38795 op0 = expand_normal (arg0);
38796
38797 /* We need to move bounds to memory first. */
38798 if (MEM_P (op0))
38799 mem = op0;
38800 else
38801 {
38802 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38803 emit_move_insn (mem, op0);
38804 }
38805
38806 /* Generate mem expression to access UB. */
38807 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38808
38809 /* We need to inverse all bits of UB. */
38810 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38811
38812 if (res != target)
38813 emit_move_insn (target, res);
38814
38815 return target;
38816 }
38817
38818 case IX86_BUILTIN_MASKMOVQ:
38819 case IX86_BUILTIN_MASKMOVDQU:
38820 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38821 ? CODE_FOR_mmx_maskmovq
38822 : CODE_FOR_sse2_maskmovdqu);
38823 /* Note the arg order is different from the operand order. */
38824 arg1 = CALL_EXPR_ARG (exp, 0);
38825 arg2 = CALL_EXPR_ARG (exp, 1);
38826 arg0 = CALL_EXPR_ARG (exp, 2);
38827 op0 = expand_normal (arg0);
38828 op1 = expand_normal (arg1);
38829 op2 = expand_normal (arg2);
38830 mode0 = insn_data[icode].operand[0].mode;
38831 mode1 = insn_data[icode].operand[1].mode;
38832 mode2 = insn_data[icode].operand[2].mode;
38833
38834 op0 = ix86_zero_extend_to_Pmode (op0);
38835 op0 = gen_rtx_MEM (mode1, op0);
38836
38837 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38838 op0 = copy_to_mode_reg (mode0, op0);
38839 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38840 op1 = copy_to_mode_reg (mode1, op1);
38841 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38842 op2 = copy_to_mode_reg (mode2, op2);
38843 pat = GEN_FCN (icode) (op0, op1, op2);
38844 if (! pat)
38845 return 0;
38846 emit_insn (pat);
38847 return 0;
38848
38849 case IX86_BUILTIN_LDMXCSR:
38850 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38851 target = assign_386_stack_local (SImode, SLOT_TEMP);
38852 emit_move_insn (target, op0);
38853 emit_insn (gen_sse_ldmxcsr (target));
38854 return 0;
38855
38856 case IX86_BUILTIN_STMXCSR:
38857 target = assign_386_stack_local (SImode, SLOT_TEMP);
38858 emit_insn (gen_sse_stmxcsr (target));
38859 return copy_to_mode_reg (SImode, target);
38860
38861 case IX86_BUILTIN_CLFLUSH:
38862 arg0 = CALL_EXPR_ARG (exp, 0);
38863 op0 = expand_normal (arg0);
38864 icode = CODE_FOR_sse2_clflush;
38865 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38866 op0 = ix86_zero_extend_to_Pmode (op0);
38867
38868 emit_insn (gen_sse2_clflush (op0));
38869 return 0;
38870
38871 case IX86_BUILTIN_CLWB:
38872 arg0 = CALL_EXPR_ARG (exp, 0);
38873 op0 = expand_normal (arg0);
38874 icode = CODE_FOR_clwb;
38875 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38876 op0 = ix86_zero_extend_to_Pmode (op0);
38877
38878 emit_insn (gen_clwb (op0));
38879 return 0;
38880
38881 case IX86_BUILTIN_CLFLUSHOPT:
38882 arg0 = CALL_EXPR_ARG (exp, 0);
38883 op0 = expand_normal (arg0);
38884 icode = CODE_FOR_clflushopt;
38885 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38886 op0 = ix86_zero_extend_to_Pmode (op0);
38887
38888 emit_insn (gen_clflushopt (op0));
38889 return 0;
38890
38891 case IX86_BUILTIN_MONITOR:
38892 arg0 = CALL_EXPR_ARG (exp, 0);
38893 arg1 = CALL_EXPR_ARG (exp, 1);
38894 arg2 = CALL_EXPR_ARG (exp, 2);
38895 op0 = expand_normal (arg0);
38896 op1 = expand_normal (arg1);
38897 op2 = expand_normal (arg2);
38898 if (!REG_P (op0))
38899 op0 = ix86_zero_extend_to_Pmode (op0);
38900 if (!REG_P (op1))
38901 op1 = copy_to_mode_reg (SImode, op1);
38902 if (!REG_P (op2))
38903 op2 = copy_to_mode_reg (SImode, op2);
38904 emit_insn (ix86_gen_monitor (op0, op1, op2));
38905 return 0;
38906
38907 case IX86_BUILTIN_MWAIT:
38908 arg0 = CALL_EXPR_ARG (exp, 0);
38909 arg1 = CALL_EXPR_ARG (exp, 1);
38910 op0 = expand_normal (arg0);
38911 op1 = expand_normal (arg1);
38912 if (!REG_P (op0))
38913 op0 = copy_to_mode_reg (SImode, op0);
38914 if (!REG_P (op1))
38915 op1 = copy_to_mode_reg (SImode, op1);
38916 emit_insn (gen_sse3_mwait (op0, op1));
38917 return 0;
38918
38919 case IX86_BUILTIN_VEC_INIT_V2SI:
38920 case IX86_BUILTIN_VEC_INIT_V4HI:
38921 case IX86_BUILTIN_VEC_INIT_V8QI:
38922 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38923
38924 case IX86_BUILTIN_VEC_EXT_V2DF:
38925 case IX86_BUILTIN_VEC_EXT_V2DI:
38926 case IX86_BUILTIN_VEC_EXT_V4SF:
38927 case IX86_BUILTIN_VEC_EXT_V4SI:
38928 case IX86_BUILTIN_VEC_EXT_V8HI:
38929 case IX86_BUILTIN_VEC_EXT_V2SI:
38930 case IX86_BUILTIN_VEC_EXT_V4HI:
38931 case IX86_BUILTIN_VEC_EXT_V16QI:
38932 return ix86_expand_vec_ext_builtin (exp, target);
38933
38934 case IX86_BUILTIN_VEC_SET_V2DI:
38935 case IX86_BUILTIN_VEC_SET_V4SF:
38936 case IX86_BUILTIN_VEC_SET_V4SI:
38937 case IX86_BUILTIN_VEC_SET_V8HI:
38938 case IX86_BUILTIN_VEC_SET_V4HI:
38939 case IX86_BUILTIN_VEC_SET_V16QI:
38940 return ix86_expand_vec_set_builtin (exp);
38941
38942 case IX86_BUILTIN_INFQ:
38943 case IX86_BUILTIN_HUGE_VALQ:
38944 {
38945 REAL_VALUE_TYPE inf;
38946 rtx tmp;
38947
38948 real_inf (&inf);
38949 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38950
38951 tmp = validize_mem (force_const_mem (mode, tmp));
38952
38953 if (target == 0)
38954 target = gen_reg_rtx (mode);
38955
38956 emit_move_insn (target, tmp);
38957 return target;
38958 }
38959
38960 case IX86_BUILTIN_RDPMC:
38961 case IX86_BUILTIN_RDTSC:
38962 case IX86_BUILTIN_RDTSCP:
38963
38964 op0 = gen_reg_rtx (DImode);
38965 op1 = gen_reg_rtx (DImode);
38966
38967 if (fcode == IX86_BUILTIN_RDPMC)
38968 {
38969 arg0 = CALL_EXPR_ARG (exp, 0);
38970 op2 = expand_normal (arg0);
38971 if (!register_operand (op2, SImode))
38972 op2 = copy_to_mode_reg (SImode, op2);
38973
38974 insn = (TARGET_64BIT
38975 ? gen_rdpmc_rex64 (op0, op1, op2)
38976 : gen_rdpmc (op0, op2));
38977 emit_insn (insn);
38978 }
38979 else if (fcode == IX86_BUILTIN_RDTSC)
38980 {
38981 insn = (TARGET_64BIT
38982 ? gen_rdtsc_rex64 (op0, op1)
38983 : gen_rdtsc (op0));
38984 emit_insn (insn);
38985 }
38986 else
38987 {
38988 op2 = gen_reg_rtx (SImode);
38989
38990 insn = (TARGET_64BIT
38991 ? gen_rdtscp_rex64 (op0, op1, op2)
38992 : gen_rdtscp (op0, op2));
38993 emit_insn (insn);
38994
38995 arg0 = CALL_EXPR_ARG (exp, 0);
38996 op4 = expand_normal (arg0);
38997 if (!address_operand (op4, VOIDmode))
38998 {
38999 op4 = convert_memory_address (Pmode, op4);
39000 op4 = copy_addr_to_reg (op4);
39001 }
39002 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39003 }
39004
39005 if (target == 0)
39006 {
39007 /* mode is VOIDmode if __builtin_rd* has been called
39008 without lhs. */
39009 if (mode == VOIDmode)
39010 return target;
39011 target = gen_reg_rtx (mode);
39012 }
39013
39014 if (TARGET_64BIT)
39015 {
39016 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39017 op1, 1, OPTAB_DIRECT);
39018 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39019 op0, 1, OPTAB_DIRECT);
39020 }
39021
39022 emit_move_insn (target, op0);
39023 return target;
39024
39025 case IX86_BUILTIN_FXSAVE:
39026 case IX86_BUILTIN_FXRSTOR:
39027 case IX86_BUILTIN_FXSAVE64:
39028 case IX86_BUILTIN_FXRSTOR64:
39029 case IX86_BUILTIN_FNSTENV:
39030 case IX86_BUILTIN_FLDENV:
39031 mode0 = BLKmode;
39032 switch (fcode)
39033 {
39034 case IX86_BUILTIN_FXSAVE:
39035 icode = CODE_FOR_fxsave;
39036 break;
39037 case IX86_BUILTIN_FXRSTOR:
39038 icode = CODE_FOR_fxrstor;
39039 break;
39040 case IX86_BUILTIN_FXSAVE64:
39041 icode = CODE_FOR_fxsave64;
39042 break;
39043 case IX86_BUILTIN_FXRSTOR64:
39044 icode = CODE_FOR_fxrstor64;
39045 break;
39046 case IX86_BUILTIN_FNSTENV:
39047 icode = CODE_FOR_fnstenv;
39048 break;
39049 case IX86_BUILTIN_FLDENV:
39050 icode = CODE_FOR_fldenv;
39051 break;
39052 default:
39053 gcc_unreachable ();
39054 }
39055
39056 arg0 = CALL_EXPR_ARG (exp, 0);
39057 op0 = expand_normal (arg0);
39058
39059 if (!address_operand (op0, VOIDmode))
39060 {
39061 op0 = convert_memory_address (Pmode, op0);
39062 op0 = copy_addr_to_reg (op0);
39063 }
39064 op0 = gen_rtx_MEM (mode0, op0);
39065
39066 pat = GEN_FCN (icode) (op0);
39067 if (pat)
39068 emit_insn (pat);
39069 return 0;
39070
39071 case IX86_BUILTIN_XSAVE:
39072 case IX86_BUILTIN_XRSTOR:
39073 case IX86_BUILTIN_XSAVE64:
39074 case IX86_BUILTIN_XRSTOR64:
39075 case IX86_BUILTIN_XSAVEOPT:
39076 case IX86_BUILTIN_XSAVEOPT64:
39077 case IX86_BUILTIN_XSAVES:
39078 case IX86_BUILTIN_XRSTORS:
39079 case IX86_BUILTIN_XSAVES64:
39080 case IX86_BUILTIN_XRSTORS64:
39081 case IX86_BUILTIN_XSAVEC:
39082 case IX86_BUILTIN_XSAVEC64:
39083 arg0 = CALL_EXPR_ARG (exp, 0);
39084 arg1 = CALL_EXPR_ARG (exp, 1);
39085 op0 = expand_normal (arg0);
39086 op1 = expand_normal (arg1);
39087
39088 if (!address_operand (op0, VOIDmode))
39089 {
39090 op0 = convert_memory_address (Pmode, op0);
39091 op0 = copy_addr_to_reg (op0);
39092 }
39093 op0 = gen_rtx_MEM (BLKmode, op0);
39094
39095 op1 = force_reg (DImode, op1);
39096
39097 if (TARGET_64BIT)
39098 {
39099 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39100 NULL, 1, OPTAB_DIRECT);
39101 switch (fcode)
39102 {
39103 case IX86_BUILTIN_XSAVE:
39104 icode = CODE_FOR_xsave_rex64;
39105 break;
39106 case IX86_BUILTIN_XRSTOR:
39107 icode = CODE_FOR_xrstor_rex64;
39108 break;
39109 case IX86_BUILTIN_XSAVE64:
39110 icode = CODE_FOR_xsave64;
39111 break;
39112 case IX86_BUILTIN_XRSTOR64:
39113 icode = CODE_FOR_xrstor64;
39114 break;
39115 case IX86_BUILTIN_XSAVEOPT:
39116 icode = CODE_FOR_xsaveopt_rex64;
39117 break;
39118 case IX86_BUILTIN_XSAVEOPT64:
39119 icode = CODE_FOR_xsaveopt64;
39120 break;
39121 case IX86_BUILTIN_XSAVES:
39122 icode = CODE_FOR_xsaves_rex64;
39123 break;
39124 case IX86_BUILTIN_XRSTORS:
39125 icode = CODE_FOR_xrstors_rex64;
39126 break;
39127 case IX86_BUILTIN_XSAVES64:
39128 icode = CODE_FOR_xsaves64;
39129 break;
39130 case IX86_BUILTIN_XRSTORS64:
39131 icode = CODE_FOR_xrstors64;
39132 break;
39133 case IX86_BUILTIN_XSAVEC:
39134 icode = CODE_FOR_xsavec_rex64;
39135 break;
39136 case IX86_BUILTIN_XSAVEC64:
39137 icode = CODE_FOR_xsavec64;
39138 break;
39139 default:
39140 gcc_unreachable ();
39141 }
39142
39143 op2 = gen_lowpart (SImode, op2);
39144 op1 = gen_lowpart (SImode, op1);
39145 pat = GEN_FCN (icode) (op0, op1, op2);
39146 }
39147 else
39148 {
39149 switch (fcode)
39150 {
39151 case IX86_BUILTIN_XSAVE:
39152 icode = CODE_FOR_xsave;
39153 break;
39154 case IX86_BUILTIN_XRSTOR:
39155 icode = CODE_FOR_xrstor;
39156 break;
39157 case IX86_BUILTIN_XSAVEOPT:
39158 icode = CODE_FOR_xsaveopt;
39159 break;
39160 case IX86_BUILTIN_XSAVES:
39161 icode = CODE_FOR_xsaves;
39162 break;
39163 case IX86_BUILTIN_XRSTORS:
39164 icode = CODE_FOR_xrstors;
39165 break;
39166 case IX86_BUILTIN_XSAVEC:
39167 icode = CODE_FOR_xsavec;
39168 break;
39169 default:
39170 gcc_unreachable ();
39171 }
39172 pat = GEN_FCN (icode) (op0, op1);
39173 }
39174
39175 if (pat)
39176 emit_insn (pat);
39177 return 0;
39178
39179 case IX86_BUILTIN_LLWPCB:
39180 arg0 = CALL_EXPR_ARG (exp, 0);
39181 op0 = expand_normal (arg0);
39182 icode = CODE_FOR_lwp_llwpcb;
39183 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39184 op0 = ix86_zero_extend_to_Pmode (op0);
39185 emit_insn (gen_lwp_llwpcb (op0));
39186 return 0;
39187
39188 case IX86_BUILTIN_SLWPCB:
39189 icode = CODE_FOR_lwp_slwpcb;
39190 if (!target
39191 || !insn_data[icode].operand[0].predicate (target, Pmode))
39192 target = gen_reg_rtx (Pmode);
39193 emit_insn (gen_lwp_slwpcb (target));
39194 return target;
39195
39196 case IX86_BUILTIN_BEXTRI32:
39197 case IX86_BUILTIN_BEXTRI64:
39198 arg0 = CALL_EXPR_ARG (exp, 0);
39199 arg1 = CALL_EXPR_ARG (exp, 1);
39200 op0 = expand_normal (arg0);
39201 op1 = expand_normal (arg1);
39202 icode = (fcode == IX86_BUILTIN_BEXTRI32
39203 ? CODE_FOR_tbm_bextri_si
39204 : CODE_FOR_tbm_bextri_di);
39205 if (!CONST_INT_P (op1))
39206 {
39207 error ("last argument must be an immediate");
39208 return const0_rtx;
39209 }
39210 else
39211 {
39212 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39213 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39214 op1 = GEN_INT (length);
39215 op2 = GEN_INT (lsb_index);
39216 pat = GEN_FCN (icode) (target, op0, op1, op2);
39217 if (pat)
39218 emit_insn (pat);
39219 return target;
39220 }
39221
39222 case IX86_BUILTIN_RDRAND16_STEP:
39223 icode = CODE_FOR_rdrandhi_1;
39224 mode0 = HImode;
39225 goto rdrand_step;
39226
39227 case IX86_BUILTIN_RDRAND32_STEP:
39228 icode = CODE_FOR_rdrandsi_1;
39229 mode0 = SImode;
39230 goto rdrand_step;
39231
39232 case IX86_BUILTIN_RDRAND64_STEP:
39233 icode = CODE_FOR_rdranddi_1;
39234 mode0 = DImode;
39235
39236 rdrand_step:
39237 op0 = gen_reg_rtx (mode0);
39238 emit_insn (GEN_FCN (icode) (op0));
39239
39240 arg0 = CALL_EXPR_ARG (exp, 0);
39241 op1 = expand_normal (arg0);
39242 if (!address_operand (op1, VOIDmode))
39243 {
39244 op1 = convert_memory_address (Pmode, op1);
39245 op1 = copy_addr_to_reg (op1);
39246 }
39247 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39248
39249 op1 = gen_reg_rtx (SImode);
39250 emit_move_insn (op1, CONST1_RTX (SImode));
39251
39252 /* Emit SImode conditional move. */
39253 if (mode0 == HImode)
39254 {
39255 op2 = gen_reg_rtx (SImode);
39256 emit_insn (gen_zero_extendhisi2 (op2, op0));
39257 }
39258 else if (mode0 == SImode)
39259 op2 = op0;
39260 else
39261 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39262
39263 if (target == 0
39264 || !register_operand (target, SImode))
39265 target = gen_reg_rtx (SImode);
39266
39267 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39268 const0_rtx);
39269 emit_insn (gen_rtx_SET (target,
39270 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39271 return target;
39272
39273 case IX86_BUILTIN_RDSEED16_STEP:
39274 icode = CODE_FOR_rdseedhi_1;
39275 mode0 = HImode;
39276 goto rdseed_step;
39277
39278 case IX86_BUILTIN_RDSEED32_STEP:
39279 icode = CODE_FOR_rdseedsi_1;
39280 mode0 = SImode;
39281 goto rdseed_step;
39282
39283 case IX86_BUILTIN_RDSEED64_STEP:
39284 icode = CODE_FOR_rdseeddi_1;
39285 mode0 = DImode;
39286
39287 rdseed_step:
39288 op0 = gen_reg_rtx (mode0);
39289 emit_insn (GEN_FCN (icode) (op0));
39290
39291 arg0 = CALL_EXPR_ARG (exp, 0);
39292 op1 = expand_normal (arg0);
39293 if (!address_operand (op1, VOIDmode))
39294 {
39295 op1 = convert_memory_address (Pmode, op1);
39296 op1 = copy_addr_to_reg (op1);
39297 }
39298 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39299
39300 op2 = gen_reg_rtx (QImode);
39301
39302 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39303 const0_rtx);
39304 emit_insn (gen_rtx_SET (op2, pat));
39305
39306 if (target == 0
39307 || !register_operand (target, SImode))
39308 target = gen_reg_rtx (SImode);
39309
39310 emit_insn (gen_zero_extendqisi2 (target, op2));
39311 return target;
39312
39313 case IX86_BUILTIN_SBB32:
39314 icode = CODE_FOR_subsi3_carry;
39315 mode0 = SImode;
39316 goto addcarryx;
39317
39318 case IX86_BUILTIN_SBB64:
39319 icode = CODE_FOR_subdi3_carry;
39320 mode0 = DImode;
39321 goto addcarryx;
39322
39323 case IX86_BUILTIN_ADDCARRYX32:
39324 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39325 mode0 = SImode;
39326 goto addcarryx;
39327
39328 case IX86_BUILTIN_ADDCARRYX64:
39329 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39330 mode0 = DImode;
39331
39332 addcarryx:
39333 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39334 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39335 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39336 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39337
39338 op0 = gen_reg_rtx (QImode);
39339
39340 /* Generate CF from input operand. */
39341 op1 = expand_normal (arg0);
39342 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39343 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39344
39345 /* Gen ADCX instruction to compute X+Y+CF. */
39346 op2 = expand_normal (arg1);
39347 op3 = expand_normal (arg2);
39348
39349 if (!REG_P (op2))
39350 op2 = copy_to_mode_reg (mode0, op2);
39351 if (!REG_P (op3))
39352 op3 = copy_to_mode_reg (mode0, op3);
39353
39354 op0 = gen_reg_rtx (mode0);
39355
39356 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39357 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39358 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39359
39360 /* Store the result. */
39361 op4 = expand_normal (arg3);
39362 if (!address_operand (op4, VOIDmode))
39363 {
39364 op4 = convert_memory_address (Pmode, op4);
39365 op4 = copy_addr_to_reg (op4);
39366 }
39367 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39368
39369 /* Return current CF value. */
39370 if (target == 0)
39371 target = gen_reg_rtx (QImode);
39372
39373 PUT_MODE (pat, QImode);
39374 emit_insn (gen_rtx_SET (target, pat));
39375 return target;
39376
39377 case IX86_BUILTIN_READ_FLAGS:
39378 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39379
39380 if (optimize
39381 || target == NULL_RTX
39382 || !nonimmediate_operand (target, word_mode)
39383 || GET_MODE (target) != word_mode)
39384 target = gen_reg_rtx (word_mode);
39385
39386 emit_insn (gen_pop (target));
39387 return target;
39388
39389 case IX86_BUILTIN_WRITE_FLAGS:
39390
39391 arg0 = CALL_EXPR_ARG (exp, 0);
39392 op0 = expand_normal (arg0);
39393 if (!general_no_elim_operand (op0, word_mode))
39394 op0 = copy_to_mode_reg (word_mode, op0);
39395
39396 emit_insn (gen_push (op0));
39397 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39398 return 0;
39399
39400 case IX86_BUILTIN_KORTESTC16:
39401 icode = CODE_FOR_kortestchi;
39402 mode0 = HImode;
39403 mode1 = CCCmode;
39404 goto kortest;
39405
39406 case IX86_BUILTIN_KORTESTZ16:
39407 icode = CODE_FOR_kortestzhi;
39408 mode0 = HImode;
39409 mode1 = CCZmode;
39410
39411 kortest:
39412 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39413 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39414 op0 = expand_normal (arg0);
39415 op1 = expand_normal (arg1);
39416
39417 op0 = copy_to_reg (op0);
39418 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39419 op1 = copy_to_reg (op1);
39420 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39421
39422 target = gen_reg_rtx (QImode);
39423 emit_insn (gen_rtx_SET (target, const0_rtx));
39424
39425 /* Emit kortest. */
39426 emit_insn (GEN_FCN (icode) (op0, op1));
39427 /* And use setcc to return result from flags. */
39428 ix86_expand_setcc (target, EQ,
39429 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39430 return target;
39431
39432 case IX86_BUILTIN_GATHERSIV2DF:
39433 icode = CODE_FOR_avx2_gathersiv2df;
39434 goto gather_gen;
39435 case IX86_BUILTIN_GATHERSIV4DF:
39436 icode = CODE_FOR_avx2_gathersiv4df;
39437 goto gather_gen;
39438 case IX86_BUILTIN_GATHERDIV2DF:
39439 icode = CODE_FOR_avx2_gatherdiv2df;
39440 goto gather_gen;
39441 case IX86_BUILTIN_GATHERDIV4DF:
39442 icode = CODE_FOR_avx2_gatherdiv4df;
39443 goto gather_gen;
39444 case IX86_BUILTIN_GATHERSIV4SF:
39445 icode = CODE_FOR_avx2_gathersiv4sf;
39446 goto gather_gen;
39447 case IX86_BUILTIN_GATHERSIV8SF:
39448 icode = CODE_FOR_avx2_gathersiv8sf;
39449 goto gather_gen;
39450 case IX86_BUILTIN_GATHERDIV4SF:
39451 icode = CODE_FOR_avx2_gatherdiv4sf;
39452 goto gather_gen;
39453 case IX86_BUILTIN_GATHERDIV8SF:
39454 icode = CODE_FOR_avx2_gatherdiv8sf;
39455 goto gather_gen;
39456 case IX86_BUILTIN_GATHERSIV2DI:
39457 icode = CODE_FOR_avx2_gathersiv2di;
39458 goto gather_gen;
39459 case IX86_BUILTIN_GATHERSIV4DI:
39460 icode = CODE_FOR_avx2_gathersiv4di;
39461 goto gather_gen;
39462 case IX86_BUILTIN_GATHERDIV2DI:
39463 icode = CODE_FOR_avx2_gatherdiv2di;
39464 goto gather_gen;
39465 case IX86_BUILTIN_GATHERDIV4DI:
39466 icode = CODE_FOR_avx2_gatherdiv4di;
39467 goto gather_gen;
39468 case IX86_BUILTIN_GATHERSIV4SI:
39469 icode = CODE_FOR_avx2_gathersiv4si;
39470 goto gather_gen;
39471 case IX86_BUILTIN_GATHERSIV8SI:
39472 icode = CODE_FOR_avx2_gathersiv8si;
39473 goto gather_gen;
39474 case IX86_BUILTIN_GATHERDIV4SI:
39475 icode = CODE_FOR_avx2_gatherdiv4si;
39476 goto gather_gen;
39477 case IX86_BUILTIN_GATHERDIV8SI:
39478 icode = CODE_FOR_avx2_gatherdiv8si;
39479 goto gather_gen;
39480 case IX86_BUILTIN_GATHERALTSIV4DF:
39481 icode = CODE_FOR_avx2_gathersiv4df;
39482 goto gather_gen;
39483 case IX86_BUILTIN_GATHERALTDIV8SF:
39484 icode = CODE_FOR_avx2_gatherdiv8sf;
39485 goto gather_gen;
39486 case IX86_BUILTIN_GATHERALTSIV4DI:
39487 icode = CODE_FOR_avx2_gathersiv4di;
39488 goto gather_gen;
39489 case IX86_BUILTIN_GATHERALTDIV8SI:
39490 icode = CODE_FOR_avx2_gatherdiv8si;
39491 goto gather_gen;
39492 case IX86_BUILTIN_GATHER3SIV16SF:
39493 icode = CODE_FOR_avx512f_gathersiv16sf;
39494 goto gather_gen;
39495 case IX86_BUILTIN_GATHER3SIV8DF:
39496 icode = CODE_FOR_avx512f_gathersiv8df;
39497 goto gather_gen;
39498 case IX86_BUILTIN_GATHER3DIV16SF:
39499 icode = CODE_FOR_avx512f_gatherdiv16sf;
39500 goto gather_gen;
39501 case IX86_BUILTIN_GATHER3DIV8DF:
39502 icode = CODE_FOR_avx512f_gatherdiv8df;
39503 goto gather_gen;
39504 case IX86_BUILTIN_GATHER3SIV16SI:
39505 icode = CODE_FOR_avx512f_gathersiv16si;
39506 goto gather_gen;
39507 case IX86_BUILTIN_GATHER3SIV8DI:
39508 icode = CODE_FOR_avx512f_gathersiv8di;
39509 goto gather_gen;
39510 case IX86_BUILTIN_GATHER3DIV16SI:
39511 icode = CODE_FOR_avx512f_gatherdiv16si;
39512 goto gather_gen;
39513 case IX86_BUILTIN_GATHER3DIV8DI:
39514 icode = CODE_FOR_avx512f_gatherdiv8di;
39515 goto gather_gen;
39516 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39517 icode = CODE_FOR_avx512f_gathersiv8df;
39518 goto gather_gen;
39519 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39520 icode = CODE_FOR_avx512f_gatherdiv16sf;
39521 goto gather_gen;
39522 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39523 icode = CODE_FOR_avx512f_gathersiv8di;
39524 goto gather_gen;
39525 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39526 icode = CODE_FOR_avx512f_gatherdiv16si;
39527 goto gather_gen;
39528 case IX86_BUILTIN_GATHER3SIV2DF:
39529 icode = CODE_FOR_avx512vl_gathersiv2df;
39530 goto gather_gen;
39531 case IX86_BUILTIN_GATHER3SIV4DF:
39532 icode = CODE_FOR_avx512vl_gathersiv4df;
39533 goto gather_gen;
39534 case IX86_BUILTIN_GATHER3DIV2DF:
39535 icode = CODE_FOR_avx512vl_gatherdiv2df;
39536 goto gather_gen;
39537 case IX86_BUILTIN_GATHER3DIV4DF:
39538 icode = CODE_FOR_avx512vl_gatherdiv4df;
39539 goto gather_gen;
39540 case IX86_BUILTIN_GATHER3SIV4SF:
39541 icode = CODE_FOR_avx512vl_gathersiv4sf;
39542 goto gather_gen;
39543 case IX86_BUILTIN_GATHER3SIV8SF:
39544 icode = CODE_FOR_avx512vl_gathersiv8sf;
39545 goto gather_gen;
39546 case IX86_BUILTIN_GATHER3DIV4SF:
39547 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39548 goto gather_gen;
39549 case IX86_BUILTIN_GATHER3DIV8SF:
39550 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39551 goto gather_gen;
39552 case IX86_BUILTIN_GATHER3SIV2DI:
39553 icode = CODE_FOR_avx512vl_gathersiv2di;
39554 goto gather_gen;
39555 case IX86_BUILTIN_GATHER3SIV4DI:
39556 icode = CODE_FOR_avx512vl_gathersiv4di;
39557 goto gather_gen;
39558 case IX86_BUILTIN_GATHER3DIV2DI:
39559 icode = CODE_FOR_avx512vl_gatherdiv2di;
39560 goto gather_gen;
39561 case IX86_BUILTIN_GATHER3DIV4DI:
39562 icode = CODE_FOR_avx512vl_gatherdiv4di;
39563 goto gather_gen;
39564 case IX86_BUILTIN_GATHER3SIV4SI:
39565 icode = CODE_FOR_avx512vl_gathersiv4si;
39566 goto gather_gen;
39567 case IX86_BUILTIN_GATHER3SIV8SI:
39568 icode = CODE_FOR_avx512vl_gathersiv8si;
39569 goto gather_gen;
39570 case IX86_BUILTIN_GATHER3DIV4SI:
39571 icode = CODE_FOR_avx512vl_gatherdiv4si;
39572 goto gather_gen;
39573 case IX86_BUILTIN_GATHER3DIV8SI:
39574 icode = CODE_FOR_avx512vl_gatherdiv8si;
39575 goto gather_gen;
39576 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39577 icode = CODE_FOR_avx512vl_gathersiv4df;
39578 goto gather_gen;
39579 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39580 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39581 goto gather_gen;
39582 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39583 icode = CODE_FOR_avx512vl_gathersiv4di;
39584 goto gather_gen;
39585 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39586 icode = CODE_FOR_avx512vl_gatherdiv8si;
39587 goto gather_gen;
39588 case IX86_BUILTIN_SCATTERSIV16SF:
39589 icode = CODE_FOR_avx512f_scattersiv16sf;
39590 goto scatter_gen;
39591 case IX86_BUILTIN_SCATTERSIV8DF:
39592 icode = CODE_FOR_avx512f_scattersiv8df;
39593 goto scatter_gen;
39594 case IX86_BUILTIN_SCATTERDIV16SF:
39595 icode = CODE_FOR_avx512f_scatterdiv16sf;
39596 goto scatter_gen;
39597 case IX86_BUILTIN_SCATTERDIV8DF:
39598 icode = CODE_FOR_avx512f_scatterdiv8df;
39599 goto scatter_gen;
39600 case IX86_BUILTIN_SCATTERSIV16SI:
39601 icode = CODE_FOR_avx512f_scattersiv16si;
39602 goto scatter_gen;
39603 case IX86_BUILTIN_SCATTERSIV8DI:
39604 icode = CODE_FOR_avx512f_scattersiv8di;
39605 goto scatter_gen;
39606 case IX86_BUILTIN_SCATTERDIV16SI:
39607 icode = CODE_FOR_avx512f_scatterdiv16si;
39608 goto scatter_gen;
39609 case IX86_BUILTIN_SCATTERDIV8DI:
39610 icode = CODE_FOR_avx512f_scatterdiv8di;
39611 goto scatter_gen;
39612 case IX86_BUILTIN_SCATTERSIV8SF:
39613 icode = CODE_FOR_avx512vl_scattersiv8sf;
39614 goto scatter_gen;
39615 case IX86_BUILTIN_SCATTERSIV4SF:
39616 icode = CODE_FOR_avx512vl_scattersiv4sf;
39617 goto scatter_gen;
39618 case IX86_BUILTIN_SCATTERSIV4DF:
39619 icode = CODE_FOR_avx512vl_scattersiv4df;
39620 goto scatter_gen;
39621 case IX86_BUILTIN_SCATTERSIV2DF:
39622 icode = CODE_FOR_avx512vl_scattersiv2df;
39623 goto scatter_gen;
39624 case IX86_BUILTIN_SCATTERDIV8SF:
39625 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39626 goto scatter_gen;
39627 case IX86_BUILTIN_SCATTERDIV4SF:
39628 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39629 goto scatter_gen;
39630 case IX86_BUILTIN_SCATTERDIV4DF:
39631 icode = CODE_FOR_avx512vl_scatterdiv4df;
39632 goto scatter_gen;
39633 case IX86_BUILTIN_SCATTERDIV2DF:
39634 icode = CODE_FOR_avx512vl_scatterdiv2df;
39635 goto scatter_gen;
39636 case IX86_BUILTIN_SCATTERSIV8SI:
39637 icode = CODE_FOR_avx512vl_scattersiv8si;
39638 goto scatter_gen;
39639 case IX86_BUILTIN_SCATTERSIV4SI:
39640 icode = CODE_FOR_avx512vl_scattersiv4si;
39641 goto scatter_gen;
39642 case IX86_BUILTIN_SCATTERSIV4DI:
39643 icode = CODE_FOR_avx512vl_scattersiv4di;
39644 goto scatter_gen;
39645 case IX86_BUILTIN_SCATTERSIV2DI:
39646 icode = CODE_FOR_avx512vl_scattersiv2di;
39647 goto scatter_gen;
39648 case IX86_BUILTIN_SCATTERDIV8SI:
39649 icode = CODE_FOR_avx512vl_scatterdiv8si;
39650 goto scatter_gen;
39651 case IX86_BUILTIN_SCATTERDIV4SI:
39652 icode = CODE_FOR_avx512vl_scatterdiv4si;
39653 goto scatter_gen;
39654 case IX86_BUILTIN_SCATTERDIV4DI:
39655 icode = CODE_FOR_avx512vl_scatterdiv4di;
39656 goto scatter_gen;
39657 case IX86_BUILTIN_SCATTERDIV2DI:
39658 icode = CODE_FOR_avx512vl_scatterdiv2di;
39659 goto scatter_gen;
39660 case IX86_BUILTIN_GATHERPFDPD:
39661 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39662 goto vec_prefetch_gen;
39663 case IX86_BUILTIN_GATHERPFDPS:
39664 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39665 goto vec_prefetch_gen;
39666 case IX86_BUILTIN_GATHERPFQPD:
39667 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39668 goto vec_prefetch_gen;
39669 case IX86_BUILTIN_GATHERPFQPS:
39670 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39671 goto vec_prefetch_gen;
39672 case IX86_BUILTIN_SCATTERPFDPD:
39673 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39674 goto vec_prefetch_gen;
39675 case IX86_BUILTIN_SCATTERPFDPS:
39676 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39677 goto vec_prefetch_gen;
39678 case IX86_BUILTIN_SCATTERPFQPD:
39679 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39680 goto vec_prefetch_gen;
39681 case IX86_BUILTIN_SCATTERPFQPS:
39682 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39683 goto vec_prefetch_gen;
39684
39685 gather_gen:
39686 rtx half;
39687 rtx (*gen) (rtx, rtx);
39688
39689 arg0 = CALL_EXPR_ARG (exp, 0);
39690 arg1 = CALL_EXPR_ARG (exp, 1);
39691 arg2 = CALL_EXPR_ARG (exp, 2);
39692 arg3 = CALL_EXPR_ARG (exp, 3);
39693 arg4 = CALL_EXPR_ARG (exp, 4);
39694 op0 = expand_normal (arg0);
39695 op1 = expand_normal (arg1);
39696 op2 = expand_normal (arg2);
39697 op3 = expand_normal (arg3);
39698 op4 = expand_normal (arg4);
39699 /* Note the arg order is different from the operand order. */
39700 mode0 = insn_data[icode].operand[1].mode;
39701 mode2 = insn_data[icode].operand[3].mode;
39702 mode3 = insn_data[icode].operand[4].mode;
39703 mode4 = insn_data[icode].operand[5].mode;
39704
39705 if (target == NULL_RTX
39706 || GET_MODE (target) != insn_data[icode].operand[0].mode
39707 || !insn_data[icode].operand[0].predicate (target,
39708 GET_MODE (target)))
39709 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39710 else
39711 subtarget = target;
39712
39713 switch (fcode)
39714 {
39715 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39716 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39717 half = gen_reg_rtx (V8SImode);
39718 if (!nonimmediate_operand (op2, V16SImode))
39719 op2 = copy_to_mode_reg (V16SImode, op2);
39720 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39721 op2 = half;
39722 break;
39723 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39724 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39725 case IX86_BUILTIN_GATHERALTSIV4DF:
39726 case IX86_BUILTIN_GATHERALTSIV4DI:
39727 half = gen_reg_rtx (V4SImode);
39728 if (!nonimmediate_operand (op2, V8SImode))
39729 op2 = copy_to_mode_reg (V8SImode, op2);
39730 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39731 op2 = half;
39732 break;
39733 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39734 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39735 half = gen_reg_rtx (mode0);
39736 if (mode0 == V8SFmode)
39737 gen = gen_vec_extract_lo_v16sf;
39738 else
39739 gen = gen_vec_extract_lo_v16si;
39740 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39741 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39742 emit_insn (gen (half, op0));
39743 op0 = half;
39744 if (GET_MODE (op3) != VOIDmode)
39745 {
39746 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39747 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39748 emit_insn (gen (half, op3));
39749 op3 = half;
39750 }
39751 break;
39752 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39753 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39754 case IX86_BUILTIN_GATHERALTDIV8SF:
39755 case IX86_BUILTIN_GATHERALTDIV8SI:
39756 half = gen_reg_rtx (mode0);
39757 if (mode0 == V4SFmode)
39758 gen = gen_vec_extract_lo_v8sf;
39759 else
39760 gen = gen_vec_extract_lo_v8si;
39761 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39762 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39763 emit_insn (gen (half, op0));
39764 op0 = half;
39765 if (GET_MODE (op3) != VOIDmode)
39766 {
39767 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39768 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39769 emit_insn (gen (half, op3));
39770 op3 = half;
39771 }
39772 break;
39773 default:
39774 break;
39775 }
39776
39777 /* Force memory operand only with base register here. But we
39778 don't want to do it on memory operand for other builtin
39779 functions. */
39780 op1 = ix86_zero_extend_to_Pmode (op1);
39781
39782 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39783 op0 = copy_to_mode_reg (mode0, op0);
39784 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39785 op1 = copy_to_mode_reg (Pmode, op1);
39786 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39787 op2 = copy_to_mode_reg (mode2, op2);
39788
39789 op3 = fixup_modeless_constant (op3, mode3);
39790
39791 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39792 {
39793 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39794 op3 = copy_to_mode_reg (mode3, op3);
39795 }
39796 else
39797 {
39798 op3 = copy_to_reg (op3);
39799 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39800 }
39801 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39802 {
39803 error ("the last argument must be scale 1, 2, 4, 8");
39804 return const0_rtx;
39805 }
39806
39807 /* Optimize. If mask is known to have all high bits set,
39808 replace op0 with pc_rtx to signal that the instruction
39809 overwrites the whole destination and doesn't use its
39810 previous contents. */
39811 if (optimize)
39812 {
39813 if (TREE_CODE (arg3) == INTEGER_CST)
39814 {
39815 if (integer_all_onesp (arg3))
39816 op0 = pc_rtx;
39817 }
39818 else if (TREE_CODE (arg3) == VECTOR_CST)
39819 {
39820 unsigned int negative = 0;
39821 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39822 {
39823 tree cst = VECTOR_CST_ELT (arg3, i);
39824 if (TREE_CODE (cst) == INTEGER_CST
39825 && tree_int_cst_sign_bit (cst))
39826 negative++;
39827 else if (TREE_CODE (cst) == REAL_CST
39828 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39829 negative++;
39830 }
39831 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39832 op0 = pc_rtx;
39833 }
39834 else if (TREE_CODE (arg3) == SSA_NAME
39835 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39836 {
39837 /* Recognize also when mask is like:
39838 __v2df src = _mm_setzero_pd ();
39839 __v2df mask = _mm_cmpeq_pd (src, src);
39840 or
39841 __v8sf src = _mm256_setzero_ps ();
39842 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39843 as that is a cheaper way to load all ones into
39844 a register than having to load a constant from
39845 memory. */
39846 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39847 if (is_gimple_call (def_stmt))
39848 {
39849 tree fndecl = gimple_call_fndecl (def_stmt);
39850 if (fndecl
39851 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39852 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39853 {
39854 case IX86_BUILTIN_CMPPD:
39855 case IX86_BUILTIN_CMPPS:
39856 case IX86_BUILTIN_CMPPD256:
39857 case IX86_BUILTIN_CMPPS256:
39858 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39859 break;
39860 /* FALLTHRU */
39861 case IX86_BUILTIN_CMPEQPD:
39862 case IX86_BUILTIN_CMPEQPS:
39863 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39864 && initializer_zerop (gimple_call_arg (def_stmt,
39865 1)))
39866 op0 = pc_rtx;
39867 break;
39868 default:
39869 break;
39870 }
39871 }
39872 }
39873 }
39874
39875 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39876 if (! pat)
39877 return const0_rtx;
39878 emit_insn (pat);
39879
39880 switch (fcode)
39881 {
39882 case IX86_BUILTIN_GATHER3DIV16SF:
39883 if (target == NULL_RTX)
39884 target = gen_reg_rtx (V8SFmode);
39885 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39886 break;
39887 case IX86_BUILTIN_GATHER3DIV16SI:
39888 if (target == NULL_RTX)
39889 target = gen_reg_rtx (V8SImode);
39890 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39891 break;
39892 case IX86_BUILTIN_GATHER3DIV8SF:
39893 case IX86_BUILTIN_GATHERDIV8SF:
39894 if (target == NULL_RTX)
39895 target = gen_reg_rtx (V4SFmode);
39896 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39897 break;
39898 case IX86_BUILTIN_GATHER3DIV8SI:
39899 case IX86_BUILTIN_GATHERDIV8SI:
39900 if (target == NULL_RTX)
39901 target = gen_reg_rtx (V4SImode);
39902 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39903 break;
39904 default:
39905 target = subtarget;
39906 break;
39907 }
39908 return target;
39909
39910 scatter_gen:
39911 arg0 = CALL_EXPR_ARG (exp, 0);
39912 arg1 = CALL_EXPR_ARG (exp, 1);
39913 arg2 = CALL_EXPR_ARG (exp, 2);
39914 arg3 = CALL_EXPR_ARG (exp, 3);
39915 arg4 = CALL_EXPR_ARG (exp, 4);
39916 op0 = expand_normal (arg0);
39917 op1 = expand_normal (arg1);
39918 op2 = expand_normal (arg2);
39919 op3 = expand_normal (arg3);
39920 op4 = expand_normal (arg4);
39921 mode1 = insn_data[icode].operand[1].mode;
39922 mode2 = insn_data[icode].operand[2].mode;
39923 mode3 = insn_data[icode].operand[3].mode;
39924 mode4 = insn_data[icode].operand[4].mode;
39925
39926 /* Force memory operand only with base register here. But we
39927 don't want to do it on memory operand for other builtin
39928 functions. */
39929 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39930
39931 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39932 op0 = copy_to_mode_reg (Pmode, op0);
39933
39934 op1 = fixup_modeless_constant (op1, mode1);
39935
39936 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39937 {
39938 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39939 op1 = copy_to_mode_reg (mode1, op1);
39940 }
39941 else
39942 {
39943 op1 = copy_to_reg (op1);
39944 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39945 }
39946
39947 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39948 op2 = copy_to_mode_reg (mode2, op2);
39949
39950 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39951 op3 = copy_to_mode_reg (mode3, op3);
39952
39953 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39954 {
39955 error ("the last argument must be scale 1, 2, 4, 8");
39956 return const0_rtx;
39957 }
39958
39959 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39960 if (! pat)
39961 return const0_rtx;
39962
39963 emit_insn (pat);
39964 return 0;
39965
39966 vec_prefetch_gen:
39967 arg0 = CALL_EXPR_ARG (exp, 0);
39968 arg1 = CALL_EXPR_ARG (exp, 1);
39969 arg2 = CALL_EXPR_ARG (exp, 2);
39970 arg3 = CALL_EXPR_ARG (exp, 3);
39971 arg4 = CALL_EXPR_ARG (exp, 4);
39972 op0 = expand_normal (arg0);
39973 op1 = expand_normal (arg1);
39974 op2 = expand_normal (arg2);
39975 op3 = expand_normal (arg3);
39976 op4 = expand_normal (arg4);
39977 mode0 = insn_data[icode].operand[0].mode;
39978 mode1 = insn_data[icode].operand[1].mode;
39979 mode3 = insn_data[icode].operand[3].mode;
39980 mode4 = insn_data[icode].operand[4].mode;
39981
39982 op0 = fixup_modeless_constant (op0, mode0);
39983
39984 if (GET_MODE (op0) == mode0
39985 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39986 {
39987 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39988 op0 = copy_to_mode_reg (mode0, op0);
39989 }
39990 else if (op0 != constm1_rtx)
39991 {
39992 op0 = copy_to_reg (op0);
39993 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39994 }
39995
39996 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39997 op1 = copy_to_mode_reg (mode1, op1);
39998
39999 /* Force memory operand only with base register here. But we
40000 don't want to do it on memory operand for other builtin
40001 functions. */
40002 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40003
40004 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40005 op2 = copy_to_mode_reg (Pmode, op2);
40006
40007 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40008 {
40009 error ("the forth argument must be scale 1, 2, 4, 8");
40010 return const0_rtx;
40011 }
40012
40013 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40014 {
40015 error ("incorrect hint operand");
40016 return const0_rtx;
40017 }
40018
40019 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40020 if (! pat)
40021 return const0_rtx;
40022
40023 emit_insn (pat);
40024
40025 return 0;
40026
40027 case IX86_BUILTIN_XABORT:
40028 icode = CODE_FOR_xabort;
40029 arg0 = CALL_EXPR_ARG (exp, 0);
40030 op0 = expand_normal (arg0);
40031 mode0 = insn_data[icode].operand[0].mode;
40032 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40033 {
40034 error ("the xabort's argument must be an 8-bit immediate");
40035 return const0_rtx;
40036 }
40037 emit_insn (gen_xabort (op0));
40038 return 0;
40039
40040 default:
40041 break;
40042 }
40043
40044 for (i = 0, d = bdesc_special_args;
40045 i < ARRAY_SIZE (bdesc_special_args);
40046 i++, d++)
40047 if (d->code == fcode)
40048 return ix86_expand_special_args_builtin (d, exp, target);
40049
40050 for (i = 0, d = bdesc_args;
40051 i < ARRAY_SIZE (bdesc_args);
40052 i++, d++)
40053 if (d->code == fcode)
40054 switch (fcode)
40055 {
40056 case IX86_BUILTIN_FABSQ:
40057 case IX86_BUILTIN_COPYSIGNQ:
40058 if (!TARGET_SSE)
40059 /* Emit a normal call if SSE isn't available. */
40060 return expand_call (exp, target, ignore);
40061 default:
40062 return ix86_expand_args_builtin (d, exp, target);
40063 }
40064
40065 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40066 if (d->code == fcode)
40067 return ix86_expand_sse_comi (d, exp, target);
40068
40069 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40070 if (d->code == fcode)
40071 return ix86_expand_round_builtin (d, exp, target);
40072
40073 for (i = 0, d = bdesc_pcmpestr;
40074 i < ARRAY_SIZE (bdesc_pcmpestr);
40075 i++, d++)
40076 if (d->code == fcode)
40077 return ix86_expand_sse_pcmpestr (d, exp, target);
40078
40079 for (i = 0, d = bdesc_pcmpistr;
40080 i < ARRAY_SIZE (bdesc_pcmpistr);
40081 i++, d++)
40082 if (d->code == fcode)
40083 return ix86_expand_sse_pcmpistr (d, exp, target);
40084
40085 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40086 if (d->code == fcode)
40087 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40088 (enum ix86_builtin_func_type)
40089 d->flag, d->comparison);
40090
40091 gcc_unreachable ();
40092 }
40093
40094 /* This returns the target-specific builtin with code CODE if
40095 current_function_decl has visibility on this builtin, which is checked
40096 using isa flags. Returns NULL_TREE otherwise. */
40097
40098 static tree ix86_get_builtin (enum ix86_builtins code)
40099 {
40100 struct cl_target_option *opts;
40101 tree target_tree = NULL_TREE;
40102
40103 /* Determine the isa flags of current_function_decl. */
40104
40105 if (current_function_decl)
40106 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40107
40108 if (target_tree == NULL)
40109 target_tree = target_option_default_node;
40110
40111 opts = TREE_TARGET_OPTION (target_tree);
40112
40113 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40114 return ix86_builtin_decl (code, true);
40115 else
40116 return NULL_TREE;
40117 }
40118
40119 /* Return function decl for target specific builtin
40120 for given MPX builtin passed i FCODE. */
40121 static tree
40122 ix86_builtin_mpx_function (unsigned fcode)
40123 {
40124 switch (fcode)
40125 {
40126 case BUILT_IN_CHKP_BNDMK:
40127 return ix86_builtins[IX86_BUILTIN_BNDMK];
40128
40129 case BUILT_IN_CHKP_BNDSTX:
40130 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40131
40132 case BUILT_IN_CHKP_BNDLDX:
40133 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40134
40135 case BUILT_IN_CHKP_BNDCL:
40136 return ix86_builtins[IX86_BUILTIN_BNDCL];
40137
40138 case BUILT_IN_CHKP_BNDCU:
40139 return ix86_builtins[IX86_BUILTIN_BNDCU];
40140
40141 case BUILT_IN_CHKP_BNDRET:
40142 return ix86_builtins[IX86_BUILTIN_BNDRET];
40143
40144 case BUILT_IN_CHKP_INTERSECT:
40145 return ix86_builtins[IX86_BUILTIN_BNDINT];
40146
40147 case BUILT_IN_CHKP_NARROW:
40148 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40149
40150 case BUILT_IN_CHKP_SIZEOF:
40151 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40152
40153 case BUILT_IN_CHKP_EXTRACT_LOWER:
40154 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40155
40156 case BUILT_IN_CHKP_EXTRACT_UPPER:
40157 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40158
40159 default:
40160 return NULL_TREE;
40161 }
40162
40163 gcc_unreachable ();
40164 }
40165
40166 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40167
40168 Return an address to be used to load/store bounds for pointer
40169 passed in SLOT.
40170
40171 SLOT_NO is an integer constant holding number of a target
40172 dependent special slot to be used in case SLOT is not a memory.
40173
40174 SPECIAL_BASE is a pointer to be used as a base of fake address
40175 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40176 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40177
40178 static rtx
40179 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40180 {
40181 rtx addr = NULL;
40182
40183 /* NULL slot means we pass bounds for pointer not passed to the
40184 function at all. Register slot means we pass pointer in a
40185 register. In both these cases bounds are passed via Bounds
40186 Table. Since we do not have actual pointer stored in memory,
40187 we have to use fake addresses to access Bounds Table. We
40188 start with (special_base - sizeof (void*)) and decrease this
40189 address by pointer size to get addresses for other slots. */
40190 if (!slot || REG_P (slot))
40191 {
40192 gcc_assert (CONST_INT_P (slot_no));
40193 addr = plus_constant (Pmode, special_base,
40194 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40195 }
40196 /* If pointer is passed in a memory then its address is used to
40197 access Bounds Table. */
40198 else if (MEM_P (slot))
40199 {
40200 addr = XEXP (slot, 0);
40201 if (!register_operand (addr, Pmode))
40202 addr = copy_addr_to_reg (addr);
40203 }
40204 else
40205 gcc_unreachable ();
40206
40207 return addr;
40208 }
40209
40210 /* Expand pass uses this hook to load bounds for function parameter
40211 PTR passed in SLOT in case its bounds are not passed in a register.
40212
40213 If SLOT is a memory, then bounds are loaded as for regular pointer
40214 loaded from memory. PTR may be NULL in case SLOT is a memory.
40215 In such case value of PTR (if required) may be loaded from SLOT.
40216
40217 If SLOT is NULL or a register then SLOT_NO is an integer constant
40218 holding number of the target dependent special slot which should be
40219 used to obtain bounds.
40220
40221 Return loaded bounds. */
40222
40223 static rtx
40224 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40225 {
40226 rtx reg = gen_reg_rtx (BNDmode);
40227 rtx addr;
40228
40229 /* Get address to be used to access Bounds Table. Special slots start
40230 at the location of return address of the current function. */
40231 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40232
40233 /* Load pointer value from a memory if we don't have it. */
40234 if (!ptr)
40235 {
40236 gcc_assert (MEM_P (slot));
40237 ptr = copy_addr_to_reg (slot);
40238 }
40239
40240 emit_insn (BNDmode == BND64mode
40241 ? gen_bnd64_ldx (reg, addr, ptr)
40242 : gen_bnd32_ldx (reg, addr, ptr));
40243
40244 return reg;
40245 }
40246
40247 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40248 passed in SLOT in case BOUNDS are not passed in a register.
40249
40250 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40251 stored in memory. PTR may be NULL in case SLOT is a memory.
40252 In such case value of PTR (if required) may be loaded from SLOT.
40253
40254 If SLOT is NULL or a register then SLOT_NO is an integer constant
40255 holding number of the target dependent special slot which should be
40256 used to store BOUNDS. */
40257
40258 static void
40259 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40260 {
40261 rtx addr;
40262
40263 /* Get address to be used to access Bounds Table. Special slots start
40264 at the location of return address of a called function. */
40265 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40266
40267 /* Load pointer value from a memory if we don't have it. */
40268 if (!ptr)
40269 {
40270 gcc_assert (MEM_P (slot));
40271 ptr = copy_addr_to_reg (slot);
40272 }
40273
40274 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40275 if (!register_operand (bounds, BNDmode))
40276 bounds = copy_to_mode_reg (BNDmode, bounds);
40277
40278 emit_insn (BNDmode == BND64mode
40279 ? gen_bnd64_stx (addr, ptr, bounds)
40280 : gen_bnd32_stx (addr, ptr, bounds));
40281 }
40282
40283 /* Load and return bounds returned by function in SLOT. */
40284
40285 static rtx
40286 ix86_load_returned_bounds (rtx slot)
40287 {
40288 rtx res;
40289
40290 gcc_assert (REG_P (slot));
40291 res = gen_reg_rtx (BNDmode);
40292 emit_move_insn (res, slot);
40293
40294 return res;
40295 }
40296
40297 /* Store BOUNDS returned by function into SLOT. */
40298
40299 static void
40300 ix86_store_returned_bounds (rtx slot, rtx bounds)
40301 {
40302 gcc_assert (REG_P (slot));
40303 emit_move_insn (slot, bounds);
40304 }
40305
40306 /* Returns a function decl for a vectorized version of the builtin function
40307 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40308 if it is not available. */
40309
40310 static tree
40311 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40312 tree type_in)
40313 {
40314 machine_mode in_mode, out_mode;
40315 int in_n, out_n;
40316 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40317
40318 if (TREE_CODE (type_out) != VECTOR_TYPE
40319 || TREE_CODE (type_in) != VECTOR_TYPE
40320 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40321 return NULL_TREE;
40322
40323 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40324 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40325 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40326 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40327
40328 switch (fn)
40329 {
40330 case BUILT_IN_SQRT:
40331 if (out_mode == DFmode && in_mode == DFmode)
40332 {
40333 if (out_n == 2 && in_n == 2)
40334 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40335 else if (out_n == 4 && in_n == 4)
40336 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40337 else if (out_n == 8 && in_n == 8)
40338 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40339 }
40340 break;
40341
40342 case BUILT_IN_EXP2F:
40343 if (out_mode == SFmode && in_mode == SFmode)
40344 {
40345 if (out_n == 16 && in_n == 16)
40346 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40347 }
40348 break;
40349
40350 case BUILT_IN_SQRTF:
40351 if (out_mode == SFmode && in_mode == SFmode)
40352 {
40353 if (out_n == 4 && in_n == 4)
40354 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40355 else if (out_n == 8 && in_n == 8)
40356 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40357 else if (out_n == 16 && in_n == 16)
40358 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40359 }
40360 break;
40361
40362 case BUILT_IN_IFLOOR:
40363 case BUILT_IN_LFLOOR:
40364 case BUILT_IN_LLFLOOR:
40365 /* The round insn does not trap on denormals. */
40366 if (flag_trapping_math || !TARGET_ROUND)
40367 break;
40368
40369 if (out_mode == SImode && in_mode == DFmode)
40370 {
40371 if (out_n == 4 && in_n == 2)
40372 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40373 else if (out_n == 8 && in_n == 4)
40374 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40375 else if (out_n == 16 && in_n == 8)
40376 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40377 }
40378 break;
40379
40380 case BUILT_IN_IFLOORF:
40381 case BUILT_IN_LFLOORF:
40382 case BUILT_IN_LLFLOORF:
40383 /* The round insn does not trap on denormals. */
40384 if (flag_trapping_math || !TARGET_ROUND)
40385 break;
40386
40387 if (out_mode == SImode && in_mode == SFmode)
40388 {
40389 if (out_n == 4 && in_n == 4)
40390 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40391 else if (out_n == 8 && in_n == 8)
40392 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40393 }
40394 break;
40395
40396 case BUILT_IN_ICEIL:
40397 case BUILT_IN_LCEIL:
40398 case BUILT_IN_LLCEIL:
40399 /* The round insn does not trap on denormals. */
40400 if (flag_trapping_math || !TARGET_ROUND)
40401 break;
40402
40403 if (out_mode == SImode && in_mode == DFmode)
40404 {
40405 if (out_n == 4 && in_n == 2)
40406 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40407 else if (out_n == 8 && in_n == 4)
40408 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40409 else if (out_n == 16 && in_n == 8)
40410 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40411 }
40412 break;
40413
40414 case BUILT_IN_ICEILF:
40415 case BUILT_IN_LCEILF:
40416 case BUILT_IN_LLCEILF:
40417 /* The round insn does not trap on denormals. */
40418 if (flag_trapping_math || !TARGET_ROUND)
40419 break;
40420
40421 if (out_mode == SImode && in_mode == SFmode)
40422 {
40423 if (out_n == 4 && in_n == 4)
40424 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40425 else if (out_n == 8 && in_n == 8)
40426 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40427 }
40428 break;
40429
40430 case BUILT_IN_IRINT:
40431 case BUILT_IN_LRINT:
40432 case BUILT_IN_LLRINT:
40433 if (out_mode == SImode && in_mode == DFmode)
40434 {
40435 if (out_n == 4 && in_n == 2)
40436 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40437 else if (out_n == 8 && in_n == 4)
40438 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40439 }
40440 break;
40441
40442 case BUILT_IN_IRINTF:
40443 case BUILT_IN_LRINTF:
40444 case BUILT_IN_LLRINTF:
40445 if (out_mode == SImode && in_mode == SFmode)
40446 {
40447 if (out_n == 4 && in_n == 4)
40448 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40449 else if (out_n == 8 && in_n == 8)
40450 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40451 }
40452 break;
40453
40454 case BUILT_IN_IROUND:
40455 case BUILT_IN_LROUND:
40456 case BUILT_IN_LLROUND:
40457 /* The round insn does not trap on denormals. */
40458 if (flag_trapping_math || !TARGET_ROUND)
40459 break;
40460
40461 if (out_mode == SImode && in_mode == DFmode)
40462 {
40463 if (out_n == 4 && in_n == 2)
40464 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40465 else if (out_n == 8 && in_n == 4)
40466 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40467 else if (out_n == 16 && in_n == 8)
40468 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40469 }
40470 break;
40471
40472 case BUILT_IN_IROUNDF:
40473 case BUILT_IN_LROUNDF:
40474 case BUILT_IN_LLROUNDF:
40475 /* The round insn does not trap on denormals. */
40476 if (flag_trapping_math || !TARGET_ROUND)
40477 break;
40478
40479 if (out_mode == SImode && in_mode == SFmode)
40480 {
40481 if (out_n == 4 && in_n == 4)
40482 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40483 else if (out_n == 8 && in_n == 8)
40484 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40485 }
40486 break;
40487
40488 case BUILT_IN_COPYSIGN:
40489 if (out_mode == DFmode && in_mode == DFmode)
40490 {
40491 if (out_n == 2 && in_n == 2)
40492 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40493 else if (out_n == 4 && in_n == 4)
40494 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40495 else if (out_n == 8 && in_n == 8)
40496 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40497 }
40498 break;
40499
40500 case BUILT_IN_COPYSIGNF:
40501 if (out_mode == SFmode && in_mode == SFmode)
40502 {
40503 if (out_n == 4 && in_n == 4)
40504 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40505 else if (out_n == 8 && in_n == 8)
40506 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40507 else if (out_n == 16 && in_n == 16)
40508 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40509 }
40510 break;
40511
40512 case BUILT_IN_FLOOR:
40513 /* The round insn does not trap on denormals. */
40514 if (flag_trapping_math || !TARGET_ROUND)
40515 break;
40516
40517 if (out_mode == DFmode && in_mode == DFmode)
40518 {
40519 if (out_n == 2 && in_n == 2)
40520 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40521 else if (out_n == 4 && in_n == 4)
40522 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40523 }
40524 break;
40525
40526 case BUILT_IN_FLOORF:
40527 /* The round insn does not trap on denormals. */
40528 if (flag_trapping_math || !TARGET_ROUND)
40529 break;
40530
40531 if (out_mode == SFmode && in_mode == SFmode)
40532 {
40533 if (out_n == 4 && in_n == 4)
40534 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40535 else if (out_n == 8 && in_n == 8)
40536 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40537 }
40538 break;
40539
40540 case BUILT_IN_CEIL:
40541 /* The round insn does not trap on denormals. */
40542 if (flag_trapping_math || !TARGET_ROUND)
40543 break;
40544
40545 if (out_mode == DFmode && in_mode == DFmode)
40546 {
40547 if (out_n == 2 && in_n == 2)
40548 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40549 else if (out_n == 4 && in_n == 4)
40550 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40551 }
40552 break;
40553
40554 case BUILT_IN_CEILF:
40555 /* The round insn does not trap on denormals. */
40556 if (flag_trapping_math || !TARGET_ROUND)
40557 break;
40558
40559 if (out_mode == SFmode && in_mode == SFmode)
40560 {
40561 if (out_n == 4 && in_n == 4)
40562 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40563 else if (out_n == 8 && in_n == 8)
40564 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40565 }
40566 break;
40567
40568 case BUILT_IN_TRUNC:
40569 /* The round insn does not trap on denormals. */
40570 if (flag_trapping_math || !TARGET_ROUND)
40571 break;
40572
40573 if (out_mode == DFmode && in_mode == DFmode)
40574 {
40575 if (out_n == 2 && in_n == 2)
40576 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40577 else if (out_n == 4 && in_n == 4)
40578 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40579 }
40580 break;
40581
40582 case BUILT_IN_TRUNCF:
40583 /* The round insn does not trap on denormals. */
40584 if (flag_trapping_math || !TARGET_ROUND)
40585 break;
40586
40587 if (out_mode == SFmode && in_mode == SFmode)
40588 {
40589 if (out_n == 4 && in_n == 4)
40590 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40591 else if (out_n == 8 && in_n == 8)
40592 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40593 }
40594 break;
40595
40596 case BUILT_IN_RINT:
40597 /* The round insn does not trap on denormals. */
40598 if (flag_trapping_math || !TARGET_ROUND)
40599 break;
40600
40601 if (out_mode == DFmode && in_mode == DFmode)
40602 {
40603 if (out_n == 2 && in_n == 2)
40604 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40605 else if (out_n == 4 && in_n == 4)
40606 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40607 }
40608 break;
40609
40610 case BUILT_IN_RINTF:
40611 /* The round insn does not trap on denormals. */
40612 if (flag_trapping_math || !TARGET_ROUND)
40613 break;
40614
40615 if (out_mode == SFmode && in_mode == SFmode)
40616 {
40617 if (out_n == 4 && in_n == 4)
40618 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40619 else if (out_n == 8 && in_n == 8)
40620 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40621 }
40622 break;
40623
40624 case BUILT_IN_ROUND:
40625 /* The round insn does not trap on denormals. */
40626 if (flag_trapping_math || !TARGET_ROUND)
40627 break;
40628
40629 if (out_mode == DFmode && in_mode == DFmode)
40630 {
40631 if (out_n == 2 && in_n == 2)
40632 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40633 else if (out_n == 4 && in_n == 4)
40634 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40635 }
40636 break;
40637
40638 case BUILT_IN_ROUNDF:
40639 /* The round insn does not trap on denormals. */
40640 if (flag_trapping_math || !TARGET_ROUND)
40641 break;
40642
40643 if (out_mode == SFmode && in_mode == SFmode)
40644 {
40645 if (out_n == 4 && in_n == 4)
40646 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40647 else if (out_n == 8 && in_n == 8)
40648 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40649 }
40650 break;
40651
40652 case BUILT_IN_FMA:
40653 if (out_mode == DFmode && in_mode == DFmode)
40654 {
40655 if (out_n == 2 && in_n == 2)
40656 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40657 if (out_n == 4 && in_n == 4)
40658 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40659 }
40660 break;
40661
40662 case BUILT_IN_FMAF:
40663 if (out_mode == SFmode && in_mode == SFmode)
40664 {
40665 if (out_n == 4 && in_n == 4)
40666 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40667 if (out_n == 8 && in_n == 8)
40668 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40669 }
40670 break;
40671
40672 default:
40673 break;
40674 }
40675
40676 /* Dispatch to a handler for a vectorization library. */
40677 if (ix86_veclib_handler)
40678 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40679 type_in);
40680
40681 return NULL_TREE;
40682 }
40683
40684 /* Handler for an SVML-style interface to
40685 a library with vectorized intrinsics. */
40686
40687 static tree
40688 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40689 {
40690 char name[20];
40691 tree fntype, new_fndecl, args;
40692 unsigned arity;
40693 const char *bname;
40694 machine_mode el_mode, in_mode;
40695 int n, in_n;
40696
40697 /* The SVML is suitable for unsafe math only. */
40698 if (!flag_unsafe_math_optimizations)
40699 return NULL_TREE;
40700
40701 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40702 n = TYPE_VECTOR_SUBPARTS (type_out);
40703 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40704 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40705 if (el_mode != in_mode
40706 || n != in_n)
40707 return NULL_TREE;
40708
40709 switch (fn)
40710 {
40711 case BUILT_IN_EXP:
40712 case BUILT_IN_LOG:
40713 case BUILT_IN_LOG10:
40714 case BUILT_IN_POW:
40715 case BUILT_IN_TANH:
40716 case BUILT_IN_TAN:
40717 case BUILT_IN_ATAN:
40718 case BUILT_IN_ATAN2:
40719 case BUILT_IN_ATANH:
40720 case BUILT_IN_CBRT:
40721 case BUILT_IN_SINH:
40722 case BUILT_IN_SIN:
40723 case BUILT_IN_ASINH:
40724 case BUILT_IN_ASIN:
40725 case BUILT_IN_COSH:
40726 case BUILT_IN_COS:
40727 case BUILT_IN_ACOSH:
40728 case BUILT_IN_ACOS:
40729 if (el_mode != DFmode || n != 2)
40730 return NULL_TREE;
40731 break;
40732
40733 case BUILT_IN_EXPF:
40734 case BUILT_IN_LOGF:
40735 case BUILT_IN_LOG10F:
40736 case BUILT_IN_POWF:
40737 case BUILT_IN_TANHF:
40738 case BUILT_IN_TANF:
40739 case BUILT_IN_ATANF:
40740 case BUILT_IN_ATAN2F:
40741 case BUILT_IN_ATANHF:
40742 case BUILT_IN_CBRTF:
40743 case BUILT_IN_SINHF:
40744 case BUILT_IN_SINF:
40745 case BUILT_IN_ASINHF:
40746 case BUILT_IN_ASINF:
40747 case BUILT_IN_COSHF:
40748 case BUILT_IN_COSF:
40749 case BUILT_IN_ACOSHF:
40750 case BUILT_IN_ACOSF:
40751 if (el_mode != SFmode || n != 4)
40752 return NULL_TREE;
40753 break;
40754
40755 default:
40756 return NULL_TREE;
40757 }
40758
40759 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40760
40761 if (fn == BUILT_IN_LOGF)
40762 strcpy (name, "vmlsLn4");
40763 else if (fn == BUILT_IN_LOG)
40764 strcpy (name, "vmldLn2");
40765 else if (n == 4)
40766 {
40767 sprintf (name, "vmls%s", bname+10);
40768 name[strlen (name)-1] = '4';
40769 }
40770 else
40771 sprintf (name, "vmld%s2", bname+10);
40772
40773 /* Convert to uppercase. */
40774 name[4] &= ~0x20;
40775
40776 arity = 0;
40777 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40778 args;
40779 args = TREE_CHAIN (args))
40780 arity++;
40781
40782 if (arity == 1)
40783 fntype = build_function_type_list (type_out, type_in, NULL);
40784 else
40785 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40786
40787 /* Build a function declaration for the vectorized function. */
40788 new_fndecl = build_decl (BUILTINS_LOCATION,
40789 FUNCTION_DECL, get_identifier (name), fntype);
40790 TREE_PUBLIC (new_fndecl) = 1;
40791 DECL_EXTERNAL (new_fndecl) = 1;
40792 DECL_IS_NOVOPS (new_fndecl) = 1;
40793 TREE_READONLY (new_fndecl) = 1;
40794
40795 return new_fndecl;
40796 }
40797
40798 /* Handler for an ACML-style interface to
40799 a library with vectorized intrinsics. */
40800
40801 static tree
40802 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40803 {
40804 char name[20] = "__vr.._";
40805 tree fntype, new_fndecl, args;
40806 unsigned arity;
40807 const char *bname;
40808 machine_mode el_mode, in_mode;
40809 int n, in_n;
40810
40811 /* The ACML is 64bits only and suitable for unsafe math only as
40812 it does not correctly support parts of IEEE with the required
40813 precision such as denormals. */
40814 if (!TARGET_64BIT
40815 || !flag_unsafe_math_optimizations)
40816 return NULL_TREE;
40817
40818 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40819 n = TYPE_VECTOR_SUBPARTS (type_out);
40820 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40821 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40822 if (el_mode != in_mode
40823 || n != in_n)
40824 return NULL_TREE;
40825
40826 switch (fn)
40827 {
40828 case BUILT_IN_SIN:
40829 case BUILT_IN_COS:
40830 case BUILT_IN_EXP:
40831 case BUILT_IN_LOG:
40832 case BUILT_IN_LOG2:
40833 case BUILT_IN_LOG10:
40834 name[4] = 'd';
40835 name[5] = '2';
40836 if (el_mode != DFmode
40837 || n != 2)
40838 return NULL_TREE;
40839 break;
40840
40841 case BUILT_IN_SINF:
40842 case BUILT_IN_COSF:
40843 case BUILT_IN_EXPF:
40844 case BUILT_IN_POWF:
40845 case BUILT_IN_LOGF:
40846 case BUILT_IN_LOG2F:
40847 case BUILT_IN_LOG10F:
40848 name[4] = 's';
40849 name[5] = '4';
40850 if (el_mode != SFmode
40851 || n != 4)
40852 return NULL_TREE;
40853 break;
40854
40855 default:
40856 return NULL_TREE;
40857 }
40858
40859 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40860 sprintf (name + 7, "%s", bname+10);
40861
40862 arity = 0;
40863 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40864 args;
40865 args = TREE_CHAIN (args))
40866 arity++;
40867
40868 if (arity == 1)
40869 fntype = build_function_type_list (type_out, type_in, NULL);
40870 else
40871 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40872
40873 /* Build a function declaration for the vectorized function. */
40874 new_fndecl = build_decl (BUILTINS_LOCATION,
40875 FUNCTION_DECL, get_identifier (name), fntype);
40876 TREE_PUBLIC (new_fndecl) = 1;
40877 DECL_EXTERNAL (new_fndecl) = 1;
40878 DECL_IS_NOVOPS (new_fndecl) = 1;
40879 TREE_READONLY (new_fndecl) = 1;
40880
40881 return new_fndecl;
40882 }
40883
40884 /* Returns a decl of a function that implements gather load with
40885 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40886 Return NULL_TREE if it is not available. */
40887
40888 static tree
40889 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40890 const_tree index_type, int scale)
40891 {
40892 bool si;
40893 enum ix86_builtins code;
40894
40895 if (! TARGET_AVX2)
40896 return NULL_TREE;
40897
40898 if ((TREE_CODE (index_type) != INTEGER_TYPE
40899 && !POINTER_TYPE_P (index_type))
40900 || (TYPE_MODE (index_type) != SImode
40901 && TYPE_MODE (index_type) != DImode))
40902 return NULL_TREE;
40903
40904 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40905 return NULL_TREE;
40906
40907 /* v*gather* insn sign extends index to pointer mode. */
40908 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40909 && TYPE_UNSIGNED (index_type))
40910 return NULL_TREE;
40911
40912 if (scale <= 0
40913 || scale > 8
40914 || (scale & (scale - 1)) != 0)
40915 return NULL_TREE;
40916
40917 si = TYPE_MODE (index_type) == SImode;
40918 switch (TYPE_MODE (mem_vectype))
40919 {
40920 case V2DFmode:
40921 if (TARGET_AVX512VL)
40922 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40923 else
40924 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40925 break;
40926 case V4DFmode:
40927 if (TARGET_AVX512VL)
40928 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40929 else
40930 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40931 break;
40932 case V2DImode:
40933 if (TARGET_AVX512VL)
40934 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40935 else
40936 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40937 break;
40938 case V4DImode:
40939 if (TARGET_AVX512VL)
40940 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40941 else
40942 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40943 break;
40944 case V4SFmode:
40945 if (TARGET_AVX512VL)
40946 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40947 else
40948 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40949 break;
40950 case V8SFmode:
40951 if (TARGET_AVX512VL)
40952 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40953 else
40954 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40955 break;
40956 case V4SImode:
40957 if (TARGET_AVX512VL)
40958 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40959 else
40960 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40961 break;
40962 case V8SImode:
40963 if (TARGET_AVX512VL)
40964 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40965 else
40966 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40967 break;
40968 case V8DFmode:
40969 if (TARGET_AVX512F)
40970 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40971 else
40972 return NULL_TREE;
40973 break;
40974 case V8DImode:
40975 if (TARGET_AVX512F)
40976 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40977 else
40978 return NULL_TREE;
40979 break;
40980 case V16SFmode:
40981 if (TARGET_AVX512F)
40982 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40983 else
40984 return NULL_TREE;
40985 break;
40986 case V16SImode:
40987 if (TARGET_AVX512F)
40988 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40989 else
40990 return NULL_TREE;
40991 break;
40992 default:
40993 return NULL_TREE;
40994 }
40995
40996 return ix86_get_builtin (code);
40997 }
40998
40999 /* Returns a code for a target-specific builtin that implements
41000 reciprocal of the function, or NULL_TREE if not available. */
41001
41002 static tree
41003 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41004 {
41005 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41006 && flag_finite_math_only && !flag_trapping_math
41007 && flag_unsafe_math_optimizations))
41008 return NULL_TREE;
41009
41010 if (md_fn)
41011 /* Machine dependent builtins. */
41012 switch (fn)
41013 {
41014 /* Vectorized version of sqrt to rsqrt conversion. */
41015 case IX86_BUILTIN_SQRTPS_NR:
41016 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41017
41018 case IX86_BUILTIN_SQRTPS_NR256:
41019 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41020
41021 default:
41022 return NULL_TREE;
41023 }
41024 else
41025 /* Normal builtins. */
41026 switch (fn)
41027 {
41028 /* Sqrt to rsqrt conversion. */
41029 case BUILT_IN_SQRTF:
41030 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41031
41032 default:
41033 return NULL_TREE;
41034 }
41035 }
41036 \f
41037 /* Helper for avx_vpermilps256_operand et al. This is also used by
41038 the expansion functions to turn the parallel back into a mask.
41039 The return value is 0 for no match and the imm8+1 for a match. */
41040
41041 int
41042 avx_vpermilp_parallel (rtx par, machine_mode mode)
41043 {
41044 unsigned i, nelt = GET_MODE_NUNITS (mode);
41045 unsigned mask = 0;
41046 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41047
41048 if (XVECLEN (par, 0) != (int) nelt)
41049 return 0;
41050
41051 /* Validate that all of the elements are constants, and not totally
41052 out of range. Copy the data into an integral array to make the
41053 subsequent checks easier. */
41054 for (i = 0; i < nelt; ++i)
41055 {
41056 rtx er = XVECEXP (par, 0, i);
41057 unsigned HOST_WIDE_INT ei;
41058
41059 if (!CONST_INT_P (er))
41060 return 0;
41061 ei = INTVAL (er);
41062 if (ei >= nelt)
41063 return 0;
41064 ipar[i] = ei;
41065 }
41066
41067 switch (mode)
41068 {
41069 case V8DFmode:
41070 /* In the 512-bit DFmode case, we can only move elements within
41071 a 128-bit lane. First fill the second part of the mask,
41072 then fallthru. */
41073 for (i = 4; i < 6; ++i)
41074 {
41075 if (ipar[i] < 4 || ipar[i] >= 6)
41076 return 0;
41077 mask |= (ipar[i] - 4) << i;
41078 }
41079 for (i = 6; i < 8; ++i)
41080 {
41081 if (ipar[i] < 6)
41082 return 0;
41083 mask |= (ipar[i] - 6) << i;
41084 }
41085 /* FALLTHRU */
41086
41087 case V4DFmode:
41088 /* In the 256-bit DFmode case, we can only move elements within
41089 a 128-bit lane. */
41090 for (i = 0; i < 2; ++i)
41091 {
41092 if (ipar[i] >= 2)
41093 return 0;
41094 mask |= ipar[i] << i;
41095 }
41096 for (i = 2; i < 4; ++i)
41097 {
41098 if (ipar[i] < 2)
41099 return 0;
41100 mask |= (ipar[i] - 2) << i;
41101 }
41102 break;
41103
41104 case V16SFmode:
41105 /* In 512 bit SFmode case, permutation in the upper 256 bits
41106 must mirror the permutation in the lower 256-bits. */
41107 for (i = 0; i < 8; ++i)
41108 if (ipar[i] + 8 != ipar[i + 8])
41109 return 0;
41110 /* FALLTHRU */
41111
41112 case V8SFmode:
41113 /* In 256 bit SFmode case, we have full freedom of
41114 movement within the low 128-bit lane, but the high 128-bit
41115 lane must mirror the exact same pattern. */
41116 for (i = 0; i < 4; ++i)
41117 if (ipar[i] + 4 != ipar[i + 4])
41118 return 0;
41119 nelt = 4;
41120 /* FALLTHRU */
41121
41122 case V2DFmode:
41123 case V4SFmode:
41124 /* In the 128-bit case, we've full freedom in the placement of
41125 the elements from the source operand. */
41126 for (i = 0; i < nelt; ++i)
41127 mask |= ipar[i] << (i * (nelt / 2));
41128 break;
41129
41130 default:
41131 gcc_unreachable ();
41132 }
41133
41134 /* Make sure success has a non-zero value by adding one. */
41135 return mask + 1;
41136 }
41137
41138 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41139 the expansion functions to turn the parallel back into a mask.
41140 The return value is 0 for no match and the imm8+1 for a match. */
41141
41142 int
41143 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41144 {
41145 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41146 unsigned mask = 0;
41147 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41148
41149 if (XVECLEN (par, 0) != (int) nelt)
41150 return 0;
41151
41152 /* Validate that all of the elements are constants, and not totally
41153 out of range. Copy the data into an integral array to make the
41154 subsequent checks easier. */
41155 for (i = 0; i < nelt; ++i)
41156 {
41157 rtx er = XVECEXP (par, 0, i);
41158 unsigned HOST_WIDE_INT ei;
41159
41160 if (!CONST_INT_P (er))
41161 return 0;
41162 ei = INTVAL (er);
41163 if (ei >= 2 * nelt)
41164 return 0;
41165 ipar[i] = ei;
41166 }
41167
41168 /* Validate that the halves of the permute are halves. */
41169 for (i = 0; i < nelt2 - 1; ++i)
41170 if (ipar[i] + 1 != ipar[i + 1])
41171 return 0;
41172 for (i = nelt2; i < nelt - 1; ++i)
41173 if (ipar[i] + 1 != ipar[i + 1])
41174 return 0;
41175
41176 /* Reconstruct the mask. */
41177 for (i = 0; i < 2; ++i)
41178 {
41179 unsigned e = ipar[i * nelt2];
41180 if (e % nelt2)
41181 return 0;
41182 e /= nelt2;
41183 mask |= e << (i * 4);
41184 }
41185
41186 /* Make sure success has a non-zero value by adding one. */
41187 return mask + 1;
41188 }
41189 \f
41190 /* Return a register priority for hard reg REGNO. */
41191 static int
41192 ix86_register_priority (int hard_regno)
41193 {
41194 /* ebp and r13 as the base always wants a displacement, r12 as the
41195 base always wants an index. So discourage their usage in an
41196 address. */
41197 if (hard_regno == R12_REG || hard_regno == R13_REG)
41198 return 0;
41199 if (hard_regno == BP_REG)
41200 return 1;
41201 /* New x86-64 int registers result in bigger code size. Discourage
41202 them. */
41203 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41204 return 2;
41205 /* New x86-64 SSE registers result in bigger code size. Discourage
41206 them. */
41207 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41208 return 2;
41209 /* Usage of AX register results in smaller code. Prefer it. */
41210 if (hard_regno == AX_REG)
41211 return 4;
41212 return 3;
41213 }
41214
41215 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41216
41217 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41218 QImode must go into class Q_REGS.
41219 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41220 movdf to do mem-to-mem moves through integer regs. */
41221
41222 static reg_class_t
41223 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41224 {
41225 machine_mode mode = GET_MODE (x);
41226
41227 /* We're only allowed to return a subclass of CLASS. Many of the
41228 following checks fail for NO_REGS, so eliminate that early. */
41229 if (regclass == NO_REGS)
41230 return NO_REGS;
41231
41232 /* All classes can load zeros. */
41233 if (x == CONST0_RTX (mode))
41234 return regclass;
41235
41236 /* Force constants into memory if we are loading a (nonzero) constant into
41237 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41238 instructions to load from a constant. */
41239 if (CONSTANT_P (x)
41240 && (MAYBE_MMX_CLASS_P (regclass)
41241 || MAYBE_SSE_CLASS_P (regclass)
41242 || MAYBE_MASK_CLASS_P (regclass)))
41243 return NO_REGS;
41244
41245 /* Prefer SSE regs only, if we can use them for math. */
41246 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41247 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41248
41249 /* Floating-point constants need more complex checks. */
41250 if (CONST_DOUBLE_P (x))
41251 {
41252 /* General regs can load everything. */
41253 if (reg_class_subset_p (regclass, GENERAL_REGS))
41254 return regclass;
41255
41256 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41257 zero above. We only want to wind up preferring 80387 registers if
41258 we plan on doing computation with them. */
41259 if (TARGET_80387
41260 && standard_80387_constant_p (x) > 0)
41261 {
41262 /* Limit class to non-sse. */
41263 if (regclass == FLOAT_SSE_REGS)
41264 return FLOAT_REGS;
41265 if (regclass == FP_TOP_SSE_REGS)
41266 return FP_TOP_REG;
41267 if (regclass == FP_SECOND_SSE_REGS)
41268 return FP_SECOND_REG;
41269 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41270 return regclass;
41271 }
41272
41273 return NO_REGS;
41274 }
41275
41276 /* Generally when we see PLUS here, it's the function invariant
41277 (plus soft-fp const_int). Which can only be computed into general
41278 regs. */
41279 if (GET_CODE (x) == PLUS)
41280 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41281
41282 /* QImode constants are easy to load, but non-constant QImode data
41283 must go into Q_REGS. */
41284 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41285 {
41286 if (reg_class_subset_p (regclass, Q_REGS))
41287 return regclass;
41288 if (reg_class_subset_p (Q_REGS, regclass))
41289 return Q_REGS;
41290 return NO_REGS;
41291 }
41292
41293 return regclass;
41294 }
41295
41296 /* Discourage putting floating-point values in SSE registers unless
41297 SSE math is being used, and likewise for the 387 registers. */
41298 static reg_class_t
41299 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41300 {
41301 machine_mode mode = GET_MODE (x);
41302
41303 /* Restrict the output reload class to the register bank that we are doing
41304 math on. If we would like not to return a subset of CLASS, reject this
41305 alternative: if reload cannot do this, it will still use its choice. */
41306 mode = GET_MODE (x);
41307 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41308 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41309
41310 if (X87_FLOAT_MODE_P (mode))
41311 {
41312 if (regclass == FP_TOP_SSE_REGS)
41313 return FP_TOP_REG;
41314 else if (regclass == FP_SECOND_SSE_REGS)
41315 return FP_SECOND_REG;
41316 else
41317 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41318 }
41319
41320 return regclass;
41321 }
41322
41323 static reg_class_t
41324 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41325 machine_mode mode, secondary_reload_info *sri)
41326 {
41327 /* Double-word spills from general registers to non-offsettable memory
41328 references (zero-extended addresses) require special handling. */
41329 if (TARGET_64BIT
41330 && MEM_P (x)
41331 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41332 && INTEGER_CLASS_P (rclass)
41333 && !offsettable_memref_p (x))
41334 {
41335 sri->icode = (in_p
41336 ? CODE_FOR_reload_noff_load
41337 : CODE_FOR_reload_noff_store);
41338 /* Add the cost of moving address to a temporary. */
41339 sri->extra_cost = 1;
41340
41341 return NO_REGS;
41342 }
41343
41344 /* QImode spills from non-QI registers require
41345 intermediate register on 32bit targets. */
41346 if (mode == QImode
41347 && (MAYBE_MASK_CLASS_P (rclass)
41348 || (!TARGET_64BIT && !in_p
41349 && INTEGER_CLASS_P (rclass)
41350 && MAYBE_NON_Q_CLASS_P (rclass))))
41351 {
41352 int regno;
41353
41354 if (REG_P (x))
41355 regno = REGNO (x);
41356 else
41357 regno = -1;
41358
41359 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41360 regno = true_regnum (x);
41361
41362 /* Return Q_REGS if the operand is in memory. */
41363 if (regno == -1)
41364 return Q_REGS;
41365 }
41366
41367 /* This condition handles corner case where an expression involving
41368 pointers gets vectorized. We're trying to use the address of a
41369 stack slot as a vector initializer.
41370
41371 (set (reg:V2DI 74 [ vect_cst_.2 ])
41372 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41373
41374 Eventually frame gets turned into sp+offset like this:
41375
41376 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41377 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41378 (const_int 392 [0x188]))))
41379
41380 That later gets turned into:
41381
41382 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41383 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41384 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41385
41386 We'll have the following reload recorded:
41387
41388 Reload 0: reload_in (DI) =
41389 (plus:DI (reg/f:DI 7 sp)
41390 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41391 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41392 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41393 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41394 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41395 reload_reg_rtx: (reg:V2DI 22 xmm1)
41396
41397 Which isn't going to work since SSE instructions can't handle scalar
41398 additions. Returning GENERAL_REGS forces the addition into integer
41399 register and reload can handle subsequent reloads without problems. */
41400
41401 if (in_p && GET_CODE (x) == PLUS
41402 && SSE_CLASS_P (rclass)
41403 && SCALAR_INT_MODE_P (mode))
41404 return GENERAL_REGS;
41405
41406 return NO_REGS;
41407 }
41408
41409 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41410
41411 static bool
41412 ix86_class_likely_spilled_p (reg_class_t rclass)
41413 {
41414 switch (rclass)
41415 {
41416 case AREG:
41417 case DREG:
41418 case CREG:
41419 case BREG:
41420 case AD_REGS:
41421 case SIREG:
41422 case DIREG:
41423 case SSE_FIRST_REG:
41424 case FP_TOP_REG:
41425 case FP_SECOND_REG:
41426 case BND_REGS:
41427 return true;
41428
41429 default:
41430 break;
41431 }
41432
41433 return false;
41434 }
41435
41436 /* If we are copying between general and FP registers, we need a memory
41437 location. The same is true for SSE and MMX registers.
41438
41439 To optimize register_move_cost performance, allow inline variant.
41440
41441 The macro can't work reliably when one of the CLASSES is class containing
41442 registers from multiple units (SSE, MMX, integer). We avoid this by never
41443 combining those units in single alternative in the machine description.
41444 Ensure that this constraint holds to avoid unexpected surprises.
41445
41446 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41447 enforce these sanity checks. */
41448
41449 static inline bool
41450 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41451 machine_mode mode, int strict)
41452 {
41453 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41454 return false;
41455 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41456 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41457 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41458 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41459 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41460 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41461 {
41462 gcc_assert (!strict || lra_in_progress);
41463 return true;
41464 }
41465
41466 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41467 return true;
41468
41469 /* Between mask and general, we have moves no larger than word size. */
41470 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41471 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41472 return true;
41473
41474 /* ??? This is a lie. We do have moves between mmx/general, and for
41475 mmx/sse2. But by saying we need secondary memory we discourage the
41476 register allocator from using the mmx registers unless needed. */
41477 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41478 return true;
41479
41480 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41481 {
41482 /* SSE1 doesn't have any direct moves from other classes. */
41483 if (!TARGET_SSE2)
41484 return true;
41485
41486 /* If the target says that inter-unit moves are more expensive
41487 than moving through memory, then don't generate them. */
41488 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41489 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41490 return true;
41491
41492 /* Between SSE and general, we have moves no larger than word size. */
41493 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41494 return true;
41495 }
41496
41497 return false;
41498 }
41499
41500 bool
41501 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41502 machine_mode mode, int strict)
41503 {
41504 return inline_secondary_memory_needed (class1, class2, mode, strict);
41505 }
41506
41507 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41508
41509 On the 80386, this is the size of MODE in words,
41510 except in the FP regs, where a single reg is always enough. */
41511
41512 static unsigned char
41513 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41514 {
41515 if (MAYBE_INTEGER_CLASS_P (rclass))
41516 {
41517 if (mode == XFmode)
41518 return (TARGET_64BIT ? 2 : 3);
41519 else if (mode == XCmode)
41520 return (TARGET_64BIT ? 4 : 6);
41521 else
41522 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41523 }
41524 else
41525 {
41526 if (COMPLEX_MODE_P (mode))
41527 return 2;
41528 else
41529 return 1;
41530 }
41531 }
41532
41533 /* Return true if the registers in CLASS cannot represent the change from
41534 modes FROM to TO. */
41535
41536 bool
41537 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41538 enum reg_class regclass)
41539 {
41540 if (from == to)
41541 return false;
41542
41543 /* x87 registers can't do subreg at all, as all values are reformatted
41544 to extended precision. */
41545 if (MAYBE_FLOAT_CLASS_P (regclass))
41546 return true;
41547
41548 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41549 {
41550 /* Vector registers do not support QI or HImode loads. If we don't
41551 disallow a change to these modes, reload will assume it's ok to
41552 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41553 the vec_dupv4hi pattern. */
41554 if (GET_MODE_SIZE (from) < 4)
41555 return true;
41556 }
41557
41558 return false;
41559 }
41560
41561 /* Return the cost of moving data of mode M between a
41562 register and memory. A value of 2 is the default; this cost is
41563 relative to those in `REGISTER_MOVE_COST'.
41564
41565 This function is used extensively by register_move_cost that is used to
41566 build tables at startup. Make it inline in this case.
41567 When IN is 2, return maximum of in and out move cost.
41568
41569 If moving between registers and memory is more expensive than
41570 between two registers, you should define this macro to express the
41571 relative cost.
41572
41573 Model also increased moving costs of QImode registers in non
41574 Q_REGS classes.
41575 */
41576 static inline int
41577 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41578 int in)
41579 {
41580 int cost;
41581 if (FLOAT_CLASS_P (regclass))
41582 {
41583 int index;
41584 switch (mode)
41585 {
41586 case SFmode:
41587 index = 0;
41588 break;
41589 case DFmode:
41590 index = 1;
41591 break;
41592 case XFmode:
41593 index = 2;
41594 break;
41595 default:
41596 return 100;
41597 }
41598 if (in == 2)
41599 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41600 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41601 }
41602 if (SSE_CLASS_P (regclass))
41603 {
41604 int index;
41605 switch (GET_MODE_SIZE (mode))
41606 {
41607 case 4:
41608 index = 0;
41609 break;
41610 case 8:
41611 index = 1;
41612 break;
41613 case 16:
41614 index = 2;
41615 break;
41616 default:
41617 return 100;
41618 }
41619 if (in == 2)
41620 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41621 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41622 }
41623 if (MMX_CLASS_P (regclass))
41624 {
41625 int index;
41626 switch (GET_MODE_SIZE (mode))
41627 {
41628 case 4:
41629 index = 0;
41630 break;
41631 case 8:
41632 index = 1;
41633 break;
41634 default:
41635 return 100;
41636 }
41637 if (in)
41638 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41639 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41640 }
41641 switch (GET_MODE_SIZE (mode))
41642 {
41643 case 1:
41644 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41645 {
41646 if (!in)
41647 return ix86_cost->int_store[0];
41648 if (TARGET_PARTIAL_REG_DEPENDENCY
41649 && optimize_function_for_speed_p (cfun))
41650 cost = ix86_cost->movzbl_load;
41651 else
41652 cost = ix86_cost->int_load[0];
41653 if (in == 2)
41654 return MAX (cost, ix86_cost->int_store[0]);
41655 return cost;
41656 }
41657 else
41658 {
41659 if (in == 2)
41660 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41661 if (in)
41662 return ix86_cost->movzbl_load;
41663 else
41664 return ix86_cost->int_store[0] + 4;
41665 }
41666 break;
41667 case 2:
41668 if (in == 2)
41669 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41670 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41671 default:
41672 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41673 if (mode == TFmode)
41674 mode = XFmode;
41675 if (in == 2)
41676 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41677 else if (in)
41678 cost = ix86_cost->int_load[2];
41679 else
41680 cost = ix86_cost->int_store[2];
41681 return (cost * (((int) GET_MODE_SIZE (mode)
41682 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41683 }
41684 }
41685
41686 static int
41687 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41688 bool in)
41689 {
41690 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41691 }
41692
41693
41694 /* Return the cost of moving data from a register in class CLASS1 to
41695 one in class CLASS2.
41696
41697 It is not required that the cost always equal 2 when FROM is the same as TO;
41698 on some machines it is expensive to move between registers if they are not
41699 general registers. */
41700
41701 static int
41702 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41703 reg_class_t class2_i)
41704 {
41705 enum reg_class class1 = (enum reg_class) class1_i;
41706 enum reg_class class2 = (enum reg_class) class2_i;
41707
41708 /* In case we require secondary memory, compute cost of the store followed
41709 by load. In order to avoid bad register allocation choices, we need
41710 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41711
41712 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41713 {
41714 int cost = 1;
41715
41716 cost += inline_memory_move_cost (mode, class1, 2);
41717 cost += inline_memory_move_cost (mode, class2, 2);
41718
41719 /* In case of copying from general_purpose_register we may emit multiple
41720 stores followed by single load causing memory size mismatch stall.
41721 Count this as arbitrarily high cost of 20. */
41722 if (targetm.class_max_nregs (class1, mode)
41723 > targetm.class_max_nregs (class2, mode))
41724 cost += 20;
41725
41726 /* In the case of FP/MMX moves, the registers actually overlap, and we
41727 have to switch modes in order to treat them differently. */
41728 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41729 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41730 cost += 20;
41731
41732 return cost;
41733 }
41734
41735 /* Moves between SSE/MMX and integer unit are expensive. */
41736 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41737 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41738
41739 /* ??? By keeping returned value relatively high, we limit the number
41740 of moves between integer and MMX/SSE registers for all targets.
41741 Additionally, high value prevents problem with x86_modes_tieable_p(),
41742 where integer modes in MMX/SSE registers are not tieable
41743 because of missing QImode and HImode moves to, from or between
41744 MMX/SSE registers. */
41745 return MAX (8, ix86_cost->mmxsse_to_integer);
41746
41747 if (MAYBE_FLOAT_CLASS_P (class1))
41748 return ix86_cost->fp_move;
41749 if (MAYBE_SSE_CLASS_P (class1))
41750 return ix86_cost->sse_move;
41751 if (MAYBE_MMX_CLASS_P (class1))
41752 return ix86_cost->mmx_move;
41753 return 2;
41754 }
41755
41756 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41757 MODE. */
41758
41759 bool
41760 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41761 {
41762 /* Flags and only flags can only hold CCmode values. */
41763 if (CC_REGNO_P (regno))
41764 return GET_MODE_CLASS (mode) == MODE_CC;
41765 if (GET_MODE_CLASS (mode) == MODE_CC
41766 || GET_MODE_CLASS (mode) == MODE_RANDOM
41767 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41768 return false;
41769 if (STACK_REGNO_P (regno))
41770 return VALID_FP_MODE_P (mode);
41771 if (MASK_REGNO_P (regno))
41772 return (VALID_MASK_REG_MODE (mode)
41773 || (TARGET_AVX512BW
41774 && VALID_MASK_AVX512BW_MODE (mode)));
41775 if (BND_REGNO_P (regno))
41776 return VALID_BND_REG_MODE (mode);
41777 if (SSE_REGNO_P (regno))
41778 {
41779 /* We implement the move patterns for all vector modes into and
41780 out of SSE registers, even when no operation instructions
41781 are available. */
41782
41783 /* For AVX-512 we allow, regardless of regno:
41784 - XI mode
41785 - any of 512-bit wide vector mode
41786 - any scalar mode. */
41787 if (TARGET_AVX512F
41788 && (mode == XImode
41789 || VALID_AVX512F_REG_MODE (mode)
41790 || VALID_AVX512F_SCALAR_MODE (mode)))
41791 return true;
41792
41793 /* TODO check for QI/HI scalars. */
41794 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41795 if (TARGET_AVX512VL
41796 && (mode == OImode
41797 || mode == TImode
41798 || VALID_AVX256_REG_MODE (mode)
41799 || VALID_AVX512VL_128_REG_MODE (mode)))
41800 return true;
41801
41802 /* xmm16-xmm31 are only available for AVX-512. */
41803 if (EXT_REX_SSE_REGNO_P (regno))
41804 return false;
41805
41806 /* OImode and AVX modes are available only when AVX is enabled. */
41807 return ((TARGET_AVX
41808 && VALID_AVX256_REG_OR_OI_MODE (mode))
41809 || VALID_SSE_REG_MODE (mode)
41810 || VALID_SSE2_REG_MODE (mode)
41811 || VALID_MMX_REG_MODE (mode)
41812 || VALID_MMX_REG_MODE_3DNOW (mode));
41813 }
41814 if (MMX_REGNO_P (regno))
41815 {
41816 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41817 so if the register is available at all, then we can move data of
41818 the given mode into or out of it. */
41819 return (VALID_MMX_REG_MODE (mode)
41820 || VALID_MMX_REG_MODE_3DNOW (mode));
41821 }
41822
41823 if (mode == QImode)
41824 {
41825 /* Take care for QImode values - they can be in non-QI regs,
41826 but then they do cause partial register stalls. */
41827 if (ANY_QI_REGNO_P (regno))
41828 return true;
41829 if (!TARGET_PARTIAL_REG_STALL)
41830 return true;
41831 /* LRA checks if the hard register is OK for the given mode.
41832 QImode values can live in non-QI regs, so we allow all
41833 registers here. */
41834 if (lra_in_progress)
41835 return true;
41836 return !can_create_pseudo_p ();
41837 }
41838 /* We handle both integer and floats in the general purpose registers. */
41839 else if (VALID_INT_MODE_P (mode))
41840 return true;
41841 else if (VALID_FP_MODE_P (mode))
41842 return true;
41843 else if (VALID_DFP_MODE_P (mode))
41844 return true;
41845 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41846 on to use that value in smaller contexts, this can easily force a
41847 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41848 supporting DImode, allow it. */
41849 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41850 return true;
41851
41852 return false;
41853 }
41854
41855 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41856 tieable integer mode. */
41857
41858 static bool
41859 ix86_tieable_integer_mode_p (machine_mode mode)
41860 {
41861 switch (mode)
41862 {
41863 case HImode:
41864 case SImode:
41865 return true;
41866
41867 case QImode:
41868 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41869
41870 case DImode:
41871 return TARGET_64BIT;
41872
41873 default:
41874 return false;
41875 }
41876 }
41877
41878 /* Return true if MODE1 is accessible in a register that can hold MODE2
41879 without copying. That is, all register classes that can hold MODE2
41880 can also hold MODE1. */
41881
41882 bool
41883 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41884 {
41885 if (mode1 == mode2)
41886 return true;
41887
41888 if (ix86_tieable_integer_mode_p (mode1)
41889 && ix86_tieable_integer_mode_p (mode2))
41890 return true;
41891
41892 /* MODE2 being XFmode implies fp stack or general regs, which means we
41893 can tie any smaller floating point modes to it. Note that we do not
41894 tie this with TFmode. */
41895 if (mode2 == XFmode)
41896 return mode1 == SFmode || mode1 == DFmode;
41897
41898 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41899 that we can tie it with SFmode. */
41900 if (mode2 == DFmode)
41901 return mode1 == SFmode;
41902
41903 /* If MODE2 is only appropriate for an SSE register, then tie with
41904 any other mode acceptable to SSE registers. */
41905 if (GET_MODE_SIZE (mode2) == 32
41906 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41907 return (GET_MODE_SIZE (mode1) == 32
41908 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41909 if (GET_MODE_SIZE (mode2) == 16
41910 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41911 return (GET_MODE_SIZE (mode1) == 16
41912 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41913
41914 /* If MODE2 is appropriate for an MMX register, then tie
41915 with any other mode acceptable to MMX registers. */
41916 if (GET_MODE_SIZE (mode2) == 8
41917 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41918 return (GET_MODE_SIZE (mode1) == 8
41919 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41920
41921 return false;
41922 }
41923
41924 /* Return the cost of moving between two registers of mode MODE. */
41925
41926 static int
41927 ix86_set_reg_reg_cost (machine_mode mode)
41928 {
41929 unsigned int units = UNITS_PER_WORD;
41930
41931 switch (GET_MODE_CLASS (mode))
41932 {
41933 default:
41934 break;
41935
41936 case MODE_CC:
41937 units = GET_MODE_SIZE (CCmode);
41938 break;
41939
41940 case MODE_FLOAT:
41941 if ((TARGET_SSE && mode == TFmode)
41942 || (TARGET_80387 && mode == XFmode)
41943 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41944 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41945 units = GET_MODE_SIZE (mode);
41946 break;
41947
41948 case MODE_COMPLEX_FLOAT:
41949 if ((TARGET_SSE && mode == TCmode)
41950 || (TARGET_80387 && mode == XCmode)
41951 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41952 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41953 units = GET_MODE_SIZE (mode);
41954 break;
41955
41956 case MODE_VECTOR_INT:
41957 case MODE_VECTOR_FLOAT:
41958 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41959 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41960 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41961 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41962 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41963 units = GET_MODE_SIZE (mode);
41964 }
41965
41966 /* Return the cost of moving between two registers of mode MODE,
41967 assuming that the move will be in pieces of at most UNITS bytes. */
41968 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41969 }
41970
41971 /* Compute a (partial) cost for rtx X. Return true if the complete
41972 cost has been computed, and false if subexpressions should be
41973 scanned. In either case, *TOTAL contains the cost result. */
41974
41975 static bool
41976 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41977 bool speed)
41978 {
41979 rtx mask;
41980 enum rtx_code code = (enum rtx_code) code_i;
41981 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41982 machine_mode mode = GET_MODE (x);
41983 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41984
41985 switch (code)
41986 {
41987 case SET:
41988 if (register_operand (SET_DEST (x), VOIDmode)
41989 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41990 {
41991 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41992 return true;
41993 }
41994 return false;
41995
41996 case CONST_INT:
41997 case CONST:
41998 case LABEL_REF:
41999 case SYMBOL_REF:
42000 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42001 *total = 3;
42002 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42003 *total = 2;
42004 else if (flag_pic && SYMBOLIC_CONST (x)
42005 && !(TARGET_64BIT
42006 && (GET_CODE (x) == LABEL_REF
42007 || (GET_CODE (x) == SYMBOL_REF
42008 && SYMBOL_REF_LOCAL_P (x)))))
42009 *total = 1;
42010 else
42011 *total = 0;
42012 return true;
42013
42014 case CONST_WIDE_INT:
42015 *total = 0;
42016 return true;
42017
42018 case CONST_DOUBLE:
42019 switch (standard_80387_constant_p (x))
42020 {
42021 case 1: /* 0.0 */
42022 *total = 1;
42023 return true;
42024 default: /* Other constants */
42025 *total = 2;
42026 return true;
42027 case 0:
42028 case -1:
42029 break;
42030 }
42031 if (SSE_FLOAT_MODE_P (mode))
42032 {
42033 case CONST_VECTOR:
42034 switch (standard_sse_constant_p (x))
42035 {
42036 case 0:
42037 break;
42038 case 1: /* 0: xor eliminates false dependency */
42039 *total = 0;
42040 return true;
42041 default: /* -1: cmp contains false dependency */
42042 *total = 1;
42043 return true;
42044 }
42045 }
42046 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42047 it'll probably end up. Add a penalty for size. */
42048 *total = (COSTS_N_INSNS (1)
42049 + (flag_pic != 0 && !TARGET_64BIT)
42050 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42051 return true;
42052
42053 case ZERO_EXTEND:
42054 /* The zero extensions is often completely free on x86_64, so make
42055 it as cheap as possible. */
42056 if (TARGET_64BIT && mode == DImode
42057 && GET_MODE (XEXP (x, 0)) == SImode)
42058 *total = 1;
42059 else if (TARGET_ZERO_EXTEND_WITH_AND)
42060 *total = cost->add;
42061 else
42062 *total = cost->movzx;
42063 return false;
42064
42065 case SIGN_EXTEND:
42066 *total = cost->movsx;
42067 return false;
42068
42069 case ASHIFT:
42070 if (SCALAR_INT_MODE_P (mode)
42071 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42072 && CONST_INT_P (XEXP (x, 1)))
42073 {
42074 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42075 if (value == 1)
42076 {
42077 *total = cost->add;
42078 return false;
42079 }
42080 if ((value == 2 || value == 3)
42081 && cost->lea <= cost->shift_const)
42082 {
42083 *total = cost->lea;
42084 return false;
42085 }
42086 }
42087 /* FALLTHRU */
42088
42089 case ROTATE:
42090 case ASHIFTRT:
42091 case LSHIFTRT:
42092 case ROTATERT:
42093 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42094 {
42095 /* ??? Should be SSE vector operation cost. */
42096 /* At least for published AMD latencies, this really is the same
42097 as the latency for a simple fpu operation like fabs. */
42098 /* V*QImode is emulated with 1-11 insns. */
42099 if (mode == V16QImode || mode == V32QImode)
42100 {
42101 int count = 11;
42102 if (TARGET_XOP && mode == V16QImode)
42103 {
42104 /* For XOP we use vpshab, which requires a broadcast of the
42105 value to the variable shift insn. For constants this
42106 means a V16Q const in mem; even when we can perform the
42107 shift with one insn set the cost to prefer paddb. */
42108 if (CONSTANT_P (XEXP (x, 1)))
42109 {
42110 *total = (cost->fabs
42111 + rtx_cost (XEXP (x, 0), code, 0, speed)
42112 + (speed ? 2 : COSTS_N_BYTES (16)));
42113 return true;
42114 }
42115 count = 3;
42116 }
42117 else if (TARGET_SSSE3)
42118 count = 7;
42119 *total = cost->fabs * count;
42120 }
42121 else
42122 *total = cost->fabs;
42123 }
42124 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42125 {
42126 if (CONST_INT_P (XEXP (x, 1)))
42127 {
42128 if (INTVAL (XEXP (x, 1)) > 32)
42129 *total = cost->shift_const + COSTS_N_INSNS (2);
42130 else
42131 *total = cost->shift_const * 2;
42132 }
42133 else
42134 {
42135 if (GET_CODE (XEXP (x, 1)) == AND)
42136 *total = cost->shift_var * 2;
42137 else
42138 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42139 }
42140 }
42141 else
42142 {
42143 if (CONST_INT_P (XEXP (x, 1)))
42144 *total = cost->shift_const;
42145 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42146 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42147 {
42148 /* Return the cost after shift-and truncation. */
42149 *total = cost->shift_var;
42150 return true;
42151 }
42152 else
42153 *total = cost->shift_var;
42154 }
42155 return false;
42156
42157 case FMA:
42158 {
42159 rtx sub;
42160
42161 gcc_assert (FLOAT_MODE_P (mode));
42162 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42163
42164 /* ??? SSE scalar/vector cost should be used here. */
42165 /* ??? Bald assumption that fma has the same cost as fmul. */
42166 *total = cost->fmul;
42167 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42168
42169 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42170 sub = XEXP (x, 0);
42171 if (GET_CODE (sub) == NEG)
42172 sub = XEXP (sub, 0);
42173 *total += rtx_cost (sub, FMA, 0, speed);
42174
42175 sub = XEXP (x, 2);
42176 if (GET_CODE (sub) == NEG)
42177 sub = XEXP (sub, 0);
42178 *total += rtx_cost (sub, FMA, 2, speed);
42179 return true;
42180 }
42181
42182 case MULT:
42183 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42184 {
42185 /* ??? SSE scalar cost should be used here. */
42186 *total = cost->fmul;
42187 return false;
42188 }
42189 else if (X87_FLOAT_MODE_P (mode))
42190 {
42191 *total = cost->fmul;
42192 return false;
42193 }
42194 else if (FLOAT_MODE_P (mode))
42195 {
42196 /* ??? SSE vector cost should be used here. */
42197 *total = cost->fmul;
42198 return false;
42199 }
42200 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42201 {
42202 /* V*QImode is emulated with 7-13 insns. */
42203 if (mode == V16QImode || mode == V32QImode)
42204 {
42205 int extra = 11;
42206 if (TARGET_XOP && mode == V16QImode)
42207 extra = 5;
42208 else if (TARGET_SSSE3)
42209 extra = 6;
42210 *total = cost->fmul * 2 + cost->fabs * extra;
42211 }
42212 /* V*DImode is emulated with 5-8 insns. */
42213 else if (mode == V2DImode || mode == V4DImode)
42214 {
42215 if (TARGET_XOP && mode == V2DImode)
42216 *total = cost->fmul * 2 + cost->fabs * 3;
42217 else
42218 *total = cost->fmul * 3 + cost->fabs * 5;
42219 }
42220 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42221 insns, including two PMULUDQ. */
42222 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42223 *total = cost->fmul * 2 + cost->fabs * 5;
42224 else
42225 *total = cost->fmul;
42226 return false;
42227 }
42228 else
42229 {
42230 rtx op0 = XEXP (x, 0);
42231 rtx op1 = XEXP (x, 1);
42232 int nbits;
42233 if (CONST_INT_P (XEXP (x, 1)))
42234 {
42235 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42236 for (nbits = 0; value != 0; value &= value - 1)
42237 nbits++;
42238 }
42239 else
42240 /* This is arbitrary. */
42241 nbits = 7;
42242
42243 /* Compute costs correctly for widening multiplication. */
42244 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42245 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42246 == GET_MODE_SIZE (mode))
42247 {
42248 int is_mulwiden = 0;
42249 machine_mode inner_mode = GET_MODE (op0);
42250
42251 if (GET_CODE (op0) == GET_CODE (op1))
42252 is_mulwiden = 1, op1 = XEXP (op1, 0);
42253 else if (CONST_INT_P (op1))
42254 {
42255 if (GET_CODE (op0) == SIGN_EXTEND)
42256 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42257 == INTVAL (op1);
42258 else
42259 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42260 }
42261
42262 if (is_mulwiden)
42263 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42264 }
42265
42266 *total = (cost->mult_init[MODE_INDEX (mode)]
42267 + nbits * cost->mult_bit
42268 + rtx_cost (op0, outer_code, opno, speed)
42269 + rtx_cost (op1, outer_code, opno, speed));
42270
42271 return true;
42272 }
42273
42274 case DIV:
42275 case UDIV:
42276 case MOD:
42277 case UMOD:
42278 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42279 /* ??? SSE cost should be used here. */
42280 *total = cost->fdiv;
42281 else if (X87_FLOAT_MODE_P (mode))
42282 *total = cost->fdiv;
42283 else if (FLOAT_MODE_P (mode))
42284 /* ??? SSE vector cost should be used here. */
42285 *total = cost->fdiv;
42286 else
42287 *total = cost->divide[MODE_INDEX (mode)];
42288 return false;
42289
42290 case PLUS:
42291 if (GET_MODE_CLASS (mode) == MODE_INT
42292 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42293 {
42294 if (GET_CODE (XEXP (x, 0)) == PLUS
42295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42296 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42297 && CONSTANT_P (XEXP (x, 1)))
42298 {
42299 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42300 if (val == 2 || val == 4 || val == 8)
42301 {
42302 *total = cost->lea;
42303 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42304 outer_code, opno, speed);
42305 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42306 outer_code, opno, speed);
42307 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42308 return true;
42309 }
42310 }
42311 else if (GET_CODE (XEXP (x, 0)) == MULT
42312 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42313 {
42314 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42315 if (val == 2 || val == 4 || val == 8)
42316 {
42317 *total = cost->lea;
42318 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42319 outer_code, opno, speed);
42320 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42321 return true;
42322 }
42323 }
42324 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42325 {
42326 *total = cost->lea;
42327 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42328 outer_code, opno, speed);
42329 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42330 outer_code, opno, speed);
42331 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42332 return true;
42333 }
42334 }
42335 /* FALLTHRU */
42336
42337 case MINUS:
42338 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42339 {
42340 /* ??? SSE cost should be used here. */
42341 *total = cost->fadd;
42342 return false;
42343 }
42344 else if (X87_FLOAT_MODE_P (mode))
42345 {
42346 *total = cost->fadd;
42347 return false;
42348 }
42349 else if (FLOAT_MODE_P (mode))
42350 {
42351 /* ??? SSE vector cost should be used here. */
42352 *total = cost->fadd;
42353 return false;
42354 }
42355 /* FALLTHRU */
42356
42357 case AND:
42358 case IOR:
42359 case XOR:
42360 if (GET_MODE_CLASS (mode) == MODE_INT
42361 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42362 {
42363 *total = (cost->add * 2
42364 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42365 << (GET_MODE (XEXP (x, 0)) != DImode))
42366 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42367 << (GET_MODE (XEXP (x, 1)) != DImode)));
42368 return true;
42369 }
42370 /* FALLTHRU */
42371
42372 case NEG:
42373 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42374 {
42375 /* ??? SSE cost should be used here. */
42376 *total = cost->fchs;
42377 return false;
42378 }
42379 else if (X87_FLOAT_MODE_P (mode))
42380 {
42381 *total = cost->fchs;
42382 return false;
42383 }
42384 else if (FLOAT_MODE_P (mode))
42385 {
42386 /* ??? SSE vector cost should be used here. */
42387 *total = cost->fchs;
42388 return false;
42389 }
42390 /* FALLTHRU */
42391
42392 case NOT:
42393 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42394 {
42395 /* ??? Should be SSE vector operation cost. */
42396 /* At least for published AMD latencies, this really is the same
42397 as the latency for a simple fpu operation like fabs. */
42398 *total = cost->fabs;
42399 }
42400 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42401 *total = cost->add * 2;
42402 else
42403 *total = cost->add;
42404 return false;
42405
42406 case COMPARE:
42407 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42408 && XEXP (XEXP (x, 0), 1) == const1_rtx
42409 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42410 && XEXP (x, 1) == const0_rtx)
42411 {
42412 /* This kind of construct is implemented using test[bwl].
42413 Treat it as if we had an AND. */
42414 *total = (cost->add
42415 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42416 + rtx_cost (const1_rtx, outer_code, opno, speed));
42417 return true;
42418 }
42419 return false;
42420
42421 case FLOAT_EXTEND:
42422 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42423 *total = 0;
42424 return false;
42425
42426 case ABS:
42427 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42428 /* ??? SSE cost should be used here. */
42429 *total = cost->fabs;
42430 else if (X87_FLOAT_MODE_P (mode))
42431 *total = cost->fabs;
42432 else if (FLOAT_MODE_P (mode))
42433 /* ??? SSE vector cost should be used here. */
42434 *total = cost->fabs;
42435 return false;
42436
42437 case SQRT:
42438 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42439 /* ??? SSE cost should be used here. */
42440 *total = cost->fsqrt;
42441 else if (X87_FLOAT_MODE_P (mode))
42442 *total = cost->fsqrt;
42443 else if (FLOAT_MODE_P (mode))
42444 /* ??? SSE vector cost should be used here. */
42445 *total = cost->fsqrt;
42446 return false;
42447
42448 case UNSPEC:
42449 if (XINT (x, 1) == UNSPEC_TP)
42450 *total = 0;
42451 return false;
42452
42453 case VEC_SELECT:
42454 case VEC_CONCAT:
42455 case VEC_DUPLICATE:
42456 /* ??? Assume all of these vector manipulation patterns are
42457 recognizable. In which case they all pretty much have the
42458 same cost. */
42459 *total = cost->fabs;
42460 return true;
42461 case VEC_MERGE:
42462 mask = XEXP (x, 2);
42463 /* This is masked instruction, assume the same cost,
42464 as nonmasked variant. */
42465 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42466 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42467 else
42468 *total = cost->fabs;
42469 return true;
42470
42471 default:
42472 return false;
42473 }
42474 }
42475
42476 #if TARGET_MACHO
42477
42478 static int current_machopic_label_num;
42479
42480 /* Given a symbol name and its associated stub, write out the
42481 definition of the stub. */
42482
42483 void
42484 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42485 {
42486 unsigned int length;
42487 char *binder_name, *symbol_name, lazy_ptr_name[32];
42488 int label = ++current_machopic_label_num;
42489
42490 /* For 64-bit we shouldn't get here. */
42491 gcc_assert (!TARGET_64BIT);
42492
42493 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42494 symb = targetm.strip_name_encoding (symb);
42495
42496 length = strlen (stub);
42497 binder_name = XALLOCAVEC (char, length + 32);
42498 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42499
42500 length = strlen (symb);
42501 symbol_name = XALLOCAVEC (char, length + 32);
42502 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42503
42504 sprintf (lazy_ptr_name, "L%d$lz", label);
42505
42506 if (MACHOPIC_ATT_STUB)
42507 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42508 else if (MACHOPIC_PURE)
42509 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42510 else
42511 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42512
42513 fprintf (file, "%s:\n", stub);
42514 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42515
42516 if (MACHOPIC_ATT_STUB)
42517 {
42518 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42519 }
42520 else if (MACHOPIC_PURE)
42521 {
42522 /* PIC stub. */
42523 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42524 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42525 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42526 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42527 label, lazy_ptr_name, label);
42528 fprintf (file, "\tjmp\t*%%ecx\n");
42529 }
42530 else
42531 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42532
42533 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42534 it needs no stub-binding-helper. */
42535 if (MACHOPIC_ATT_STUB)
42536 return;
42537
42538 fprintf (file, "%s:\n", binder_name);
42539
42540 if (MACHOPIC_PURE)
42541 {
42542 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42543 fprintf (file, "\tpushl\t%%ecx\n");
42544 }
42545 else
42546 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42547
42548 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42549
42550 /* N.B. Keep the correspondence of these
42551 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42552 old-pic/new-pic/non-pic stubs; altering this will break
42553 compatibility with existing dylibs. */
42554 if (MACHOPIC_PURE)
42555 {
42556 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42557 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42558 }
42559 else
42560 /* 16-byte -mdynamic-no-pic stub. */
42561 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42562
42563 fprintf (file, "%s:\n", lazy_ptr_name);
42564 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42565 fprintf (file, ASM_LONG "%s\n", binder_name);
42566 }
42567 #endif /* TARGET_MACHO */
42568
42569 /* Order the registers for register allocator. */
42570
42571 void
42572 x86_order_regs_for_local_alloc (void)
42573 {
42574 int pos = 0;
42575 int i;
42576
42577 /* First allocate the local general purpose registers. */
42578 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42579 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42580 reg_alloc_order [pos++] = i;
42581
42582 /* Global general purpose registers. */
42583 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42584 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42585 reg_alloc_order [pos++] = i;
42586
42587 /* x87 registers come first in case we are doing FP math
42588 using them. */
42589 if (!TARGET_SSE_MATH)
42590 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42591 reg_alloc_order [pos++] = i;
42592
42593 /* SSE registers. */
42594 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42595 reg_alloc_order [pos++] = i;
42596 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42597 reg_alloc_order [pos++] = i;
42598
42599 /* Extended REX SSE registers. */
42600 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42601 reg_alloc_order [pos++] = i;
42602
42603 /* Mask register. */
42604 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42605 reg_alloc_order [pos++] = i;
42606
42607 /* MPX bound registers. */
42608 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42609 reg_alloc_order [pos++] = i;
42610
42611 /* x87 registers. */
42612 if (TARGET_SSE_MATH)
42613 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42614 reg_alloc_order [pos++] = i;
42615
42616 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42617 reg_alloc_order [pos++] = i;
42618
42619 /* Initialize the rest of array as we do not allocate some registers
42620 at all. */
42621 while (pos < FIRST_PSEUDO_REGISTER)
42622 reg_alloc_order [pos++] = 0;
42623 }
42624
42625 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42626 in struct attribute_spec handler. */
42627 static tree
42628 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42629 tree args,
42630 int,
42631 bool *no_add_attrs)
42632 {
42633 if (TREE_CODE (*node) != FUNCTION_TYPE
42634 && TREE_CODE (*node) != METHOD_TYPE
42635 && TREE_CODE (*node) != FIELD_DECL
42636 && TREE_CODE (*node) != TYPE_DECL)
42637 {
42638 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42639 name);
42640 *no_add_attrs = true;
42641 return NULL_TREE;
42642 }
42643 if (TARGET_64BIT)
42644 {
42645 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42646 name);
42647 *no_add_attrs = true;
42648 return NULL_TREE;
42649 }
42650 if (is_attribute_p ("callee_pop_aggregate_return", name))
42651 {
42652 tree cst;
42653
42654 cst = TREE_VALUE (args);
42655 if (TREE_CODE (cst) != INTEGER_CST)
42656 {
42657 warning (OPT_Wattributes,
42658 "%qE attribute requires an integer constant argument",
42659 name);
42660 *no_add_attrs = true;
42661 }
42662 else if (compare_tree_int (cst, 0) != 0
42663 && compare_tree_int (cst, 1) != 0)
42664 {
42665 warning (OPT_Wattributes,
42666 "argument to %qE attribute is neither zero, nor one",
42667 name);
42668 *no_add_attrs = true;
42669 }
42670
42671 return NULL_TREE;
42672 }
42673
42674 return NULL_TREE;
42675 }
42676
42677 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42678 struct attribute_spec.handler. */
42679 static tree
42680 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42681 bool *no_add_attrs)
42682 {
42683 if (TREE_CODE (*node) != FUNCTION_TYPE
42684 && TREE_CODE (*node) != METHOD_TYPE
42685 && TREE_CODE (*node) != FIELD_DECL
42686 && TREE_CODE (*node) != TYPE_DECL)
42687 {
42688 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42689 name);
42690 *no_add_attrs = true;
42691 return NULL_TREE;
42692 }
42693
42694 /* Can combine regparm with all attributes but fastcall. */
42695 if (is_attribute_p ("ms_abi", name))
42696 {
42697 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42698 {
42699 error ("ms_abi and sysv_abi attributes are not compatible");
42700 }
42701
42702 return NULL_TREE;
42703 }
42704 else if (is_attribute_p ("sysv_abi", name))
42705 {
42706 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42707 {
42708 error ("ms_abi and sysv_abi attributes are not compatible");
42709 }
42710
42711 return NULL_TREE;
42712 }
42713
42714 return NULL_TREE;
42715 }
42716
42717 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42718 struct attribute_spec.handler. */
42719 static tree
42720 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42721 bool *no_add_attrs)
42722 {
42723 tree *type = NULL;
42724 if (DECL_P (*node))
42725 {
42726 if (TREE_CODE (*node) == TYPE_DECL)
42727 type = &TREE_TYPE (*node);
42728 }
42729 else
42730 type = node;
42731
42732 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42733 {
42734 warning (OPT_Wattributes, "%qE attribute ignored",
42735 name);
42736 *no_add_attrs = true;
42737 }
42738
42739 else if ((is_attribute_p ("ms_struct", name)
42740 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42741 || ((is_attribute_p ("gcc_struct", name)
42742 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42743 {
42744 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42745 name);
42746 *no_add_attrs = true;
42747 }
42748
42749 return NULL_TREE;
42750 }
42751
42752 static tree
42753 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42754 bool *no_add_attrs)
42755 {
42756 if (TREE_CODE (*node) != FUNCTION_DECL)
42757 {
42758 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42759 name);
42760 *no_add_attrs = true;
42761 }
42762 return NULL_TREE;
42763 }
42764
42765 static bool
42766 ix86_ms_bitfield_layout_p (const_tree record_type)
42767 {
42768 return ((TARGET_MS_BITFIELD_LAYOUT
42769 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42770 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42771 }
42772
42773 /* Returns an expression indicating where the this parameter is
42774 located on entry to the FUNCTION. */
42775
42776 static rtx
42777 x86_this_parameter (tree function)
42778 {
42779 tree type = TREE_TYPE (function);
42780 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42781 int nregs;
42782
42783 if (TARGET_64BIT)
42784 {
42785 const int *parm_regs;
42786
42787 if (ix86_function_type_abi (type) == MS_ABI)
42788 parm_regs = x86_64_ms_abi_int_parameter_registers;
42789 else
42790 parm_regs = x86_64_int_parameter_registers;
42791 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42792 }
42793
42794 nregs = ix86_function_regparm (type, function);
42795
42796 if (nregs > 0 && !stdarg_p (type))
42797 {
42798 int regno;
42799 unsigned int ccvt = ix86_get_callcvt (type);
42800
42801 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42802 regno = aggr ? DX_REG : CX_REG;
42803 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42804 {
42805 regno = CX_REG;
42806 if (aggr)
42807 return gen_rtx_MEM (SImode,
42808 plus_constant (Pmode, stack_pointer_rtx, 4));
42809 }
42810 else
42811 {
42812 regno = AX_REG;
42813 if (aggr)
42814 {
42815 regno = DX_REG;
42816 if (nregs == 1)
42817 return gen_rtx_MEM (SImode,
42818 plus_constant (Pmode,
42819 stack_pointer_rtx, 4));
42820 }
42821 }
42822 return gen_rtx_REG (SImode, regno);
42823 }
42824
42825 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42826 aggr ? 8 : 4));
42827 }
42828
42829 /* Determine whether x86_output_mi_thunk can succeed. */
42830
42831 static bool
42832 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42833 const_tree function)
42834 {
42835 /* 64-bit can handle anything. */
42836 if (TARGET_64BIT)
42837 return true;
42838
42839 /* For 32-bit, everything's fine if we have one free register. */
42840 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42841 return true;
42842
42843 /* Need a free register for vcall_offset. */
42844 if (vcall_offset)
42845 return false;
42846
42847 /* Need a free register for GOT references. */
42848 if (flag_pic && !targetm.binds_local_p (function))
42849 return false;
42850
42851 /* Otherwise ok. */
42852 return true;
42853 }
42854
42855 /* Output the assembler code for a thunk function. THUNK_DECL is the
42856 declaration for the thunk function itself, FUNCTION is the decl for
42857 the target function. DELTA is an immediate constant offset to be
42858 added to THIS. If VCALL_OFFSET is nonzero, the word at
42859 *(*this + vcall_offset) should be added to THIS. */
42860
42861 static void
42862 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42863 HOST_WIDE_INT vcall_offset, tree function)
42864 {
42865 rtx this_param = x86_this_parameter (function);
42866 rtx this_reg, tmp, fnaddr;
42867 unsigned int tmp_regno;
42868 rtx_insn *insn;
42869
42870 if (TARGET_64BIT)
42871 tmp_regno = R10_REG;
42872 else
42873 {
42874 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42875 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42876 tmp_regno = AX_REG;
42877 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42878 tmp_regno = DX_REG;
42879 else
42880 tmp_regno = CX_REG;
42881 }
42882
42883 emit_note (NOTE_INSN_PROLOGUE_END);
42884
42885 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42886 pull it in now and let DELTA benefit. */
42887 if (REG_P (this_param))
42888 this_reg = this_param;
42889 else if (vcall_offset)
42890 {
42891 /* Put the this parameter into %eax. */
42892 this_reg = gen_rtx_REG (Pmode, AX_REG);
42893 emit_move_insn (this_reg, this_param);
42894 }
42895 else
42896 this_reg = NULL_RTX;
42897
42898 /* Adjust the this parameter by a fixed constant. */
42899 if (delta)
42900 {
42901 rtx delta_rtx = GEN_INT (delta);
42902 rtx delta_dst = this_reg ? this_reg : this_param;
42903
42904 if (TARGET_64BIT)
42905 {
42906 if (!x86_64_general_operand (delta_rtx, Pmode))
42907 {
42908 tmp = gen_rtx_REG (Pmode, tmp_regno);
42909 emit_move_insn (tmp, delta_rtx);
42910 delta_rtx = tmp;
42911 }
42912 }
42913
42914 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42915 }
42916
42917 /* Adjust the this parameter by a value stored in the vtable. */
42918 if (vcall_offset)
42919 {
42920 rtx vcall_addr, vcall_mem, this_mem;
42921
42922 tmp = gen_rtx_REG (Pmode, tmp_regno);
42923
42924 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42925 if (Pmode != ptr_mode)
42926 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42927 emit_move_insn (tmp, this_mem);
42928
42929 /* Adjust the this parameter. */
42930 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42931 if (TARGET_64BIT
42932 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42933 {
42934 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42935 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42936 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42937 }
42938
42939 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42940 if (Pmode != ptr_mode)
42941 emit_insn (gen_addsi_1_zext (this_reg,
42942 gen_rtx_REG (ptr_mode,
42943 REGNO (this_reg)),
42944 vcall_mem));
42945 else
42946 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42947 }
42948
42949 /* If necessary, drop THIS back to its stack slot. */
42950 if (this_reg && this_reg != this_param)
42951 emit_move_insn (this_param, this_reg);
42952
42953 fnaddr = XEXP (DECL_RTL (function), 0);
42954 if (TARGET_64BIT)
42955 {
42956 if (!flag_pic || targetm.binds_local_p (function)
42957 || TARGET_PECOFF)
42958 ;
42959 else
42960 {
42961 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42962 tmp = gen_rtx_CONST (Pmode, tmp);
42963 fnaddr = gen_const_mem (Pmode, tmp);
42964 }
42965 }
42966 else
42967 {
42968 if (!flag_pic || targetm.binds_local_p (function))
42969 ;
42970 #if TARGET_MACHO
42971 else if (TARGET_MACHO)
42972 {
42973 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42974 fnaddr = XEXP (fnaddr, 0);
42975 }
42976 #endif /* TARGET_MACHO */
42977 else
42978 {
42979 tmp = gen_rtx_REG (Pmode, CX_REG);
42980 output_set_got (tmp, NULL_RTX);
42981
42982 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42983 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42984 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42985 fnaddr = gen_const_mem (Pmode, fnaddr);
42986 }
42987 }
42988
42989 /* Our sibling call patterns do not allow memories, because we have no
42990 predicate that can distinguish between frame and non-frame memory.
42991 For our purposes here, we can get away with (ab)using a jump pattern,
42992 because we're going to do no optimization. */
42993 if (MEM_P (fnaddr))
42994 {
42995 if (sibcall_insn_operand (fnaddr, word_mode))
42996 {
42997 fnaddr = XEXP (DECL_RTL (function), 0);
42998 tmp = gen_rtx_MEM (QImode, fnaddr);
42999 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43000 tmp = emit_call_insn (tmp);
43001 SIBLING_CALL_P (tmp) = 1;
43002 }
43003 else
43004 emit_jump_insn (gen_indirect_jump (fnaddr));
43005 }
43006 else
43007 {
43008 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43009 {
43010 // CM_LARGE_PIC always uses pseudo PIC register which is
43011 // uninitialized. Since FUNCTION is local and calling it
43012 // doesn't go through PLT, we use scratch register %r11 as
43013 // PIC register and initialize it here.
43014 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43015 ix86_init_large_pic_reg (tmp_regno);
43016 fnaddr = legitimize_pic_address (fnaddr,
43017 gen_rtx_REG (Pmode, tmp_regno));
43018 }
43019
43020 if (!sibcall_insn_operand (fnaddr, word_mode))
43021 {
43022 tmp = gen_rtx_REG (word_mode, tmp_regno);
43023 if (GET_MODE (fnaddr) != word_mode)
43024 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43025 emit_move_insn (tmp, fnaddr);
43026 fnaddr = tmp;
43027 }
43028
43029 tmp = gen_rtx_MEM (QImode, fnaddr);
43030 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43031 tmp = emit_call_insn (tmp);
43032 SIBLING_CALL_P (tmp) = 1;
43033 }
43034 emit_barrier ();
43035
43036 /* Emit just enough of rest_of_compilation to get the insns emitted.
43037 Note that use_thunk calls assemble_start_function et al. */
43038 insn = get_insns ();
43039 shorten_branches (insn);
43040 final_start_function (insn, file, 1);
43041 final (insn, file, 1);
43042 final_end_function ();
43043 }
43044
43045 static void
43046 x86_file_start (void)
43047 {
43048 default_file_start ();
43049 if (TARGET_16BIT)
43050 fputs ("\t.code16gcc\n", asm_out_file);
43051 #if TARGET_MACHO
43052 darwin_file_start ();
43053 #endif
43054 if (X86_FILE_START_VERSION_DIRECTIVE)
43055 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43056 if (X86_FILE_START_FLTUSED)
43057 fputs ("\t.global\t__fltused\n", asm_out_file);
43058 if (ix86_asm_dialect == ASM_INTEL)
43059 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43060 }
43061
43062 int
43063 x86_field_alignment (tree field, int computed)
43064 {
43065 machine_mode mode;
43066 tree type = TREE_TYPE (field);
43067
43068 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43069 return computed;
43070 mode = TYPE_MODE (strip_array_types (type));
43071 if (mode == DFmode || mode == DCmode
43072 || GET_MODE_CLASS (mode) == MODE_INT
43073 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43074 return MIN (32, computed);
43075 return computed;
43076 }
43077
43078 /* Print call to TARGET to FILE. */
43079
43080 static void
43081 x86_print_call_or_nop (FILE *file, const char *target)
43082 {
43083 if (flag_nop_mcount)
43084 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43085 else
43086 fprintf (file, "1:\tcall\t%s\n", target);
43087 }
43088
43089 /* Output assembler code to FILE to increment profiler label # LABELNO
43090 for profiling a function entry. */
43091 void
43092 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43093 {
43094 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43095 : MCOUNT_NAME);
43096 if (TARGET_64BIT)
43097 {
43098 #ifndef NO_PROFILE_COUNTERS
43099 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43100 #endif
43101
43102 if (!TARGET_PECOFF && flag_pic)
43103 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43104 else
43105 x86_print_call_or_nop (file, mcount_name);
43106 }
43107 else if (flag_pic)
43108 {
43109 #ifndef NO_PROFILE_COUNTERS
43110 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43111 LPREFIX, labelno);
43112 #endif
43113 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43114 }
43115 else
43116 {
43117 #ifndef NO_PROFILE_COUNTERS
43118 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43119 LPREFIX, labelno);
43120 #endif
43121 x86_print_call_or_nop (file, mcount_name);
43122 }
43123
43124 if (flag_record_mcount)
43125 {
43126 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43127 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43128 fprintf (file, "\t.previous\n");
43129 }
43130 }
43131
43132 /* We don't have exact information about the insn sizes, but we may assume
43133 quite safely that we are informed about all 1 byte insns and memory
43134 address sizes. This is enough to eliminate unnecessary padding in
43135 99% of cases. */
43136
43137 static int
43138 min_insn_size (rtx_insn *insn)
43139 {
43140 int l = 0, len;
43141
43142 if (!INSN_P (insn) || !active_insn_p (insn))
43143 return 0;
43144
43145 /* Discard alignments we've emit and jump instructions. */
43146 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43147 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43148 return 0;
43149
43150 /* Important case - calls are always 5 bytes.
43151 It is common to have many calls in the row. */
43152 if (CALL_P (insn)
43153 && symbolic_reference_mentioned_p (PATTERN (insn))
43154 && !SIBLING_CALL_P (insn))
43155 return 5;
43156 len = get_attr_length (insn);
43157 if (len <= 1)
43158 return 1;
43159
43160 /* For normal instructions we rely on get_attr_length being exact,
43161 with a few exceptions. */
43162 if (!JUMP_P (insn))
43163 {
43164 enum attr_type type = get_attr_type (insn);
43165
43166 switch (type)
43167 {
43168 case TYPE_MULTI:
43169 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43170 || asm_noperands (PATTERN (insn)) >= 0)
43171 return 0;
43172 break;
43173 case TYPE_OTHER:
43174 case TYPE_FCMP:
43175 break;
43176 default:
43177 /* Otherwise trust get_attr_length. */
43178 return len;
43179 }
43180
43181 l = get_attr_length_address (insn);
43182 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43183 l = 4;
43184 }
43185 if (l)
43186 return 1+l;
43187 else
43188 return 2;
43189 }
43190
43191 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43192
43193 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43194 window. */
43195
43196 static void
43197 ix86_avoid_jump_mispredicts (void)
43198 {
43199 rtx_insn *insn, *start = get_insns ();
43200 int nbytes = 0, njumps = 0;
43201 bool isjump = false;
43202
43203 /* Look for all minimal intervals of instructions containing 4 jumps.
43204 The intervals are bounded by START and INSN. NBYTES is the total
43205 size of instructions in the interval including INSN and not including
43206 START. When the NBYTES is smaller than 16 bytes, it is possible
43207 that the end of START and INSN ends up in the same 16byte page.
43208
43209 The smallest offset in the page INSN can start is the case where START
43210 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43211 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43212
43213 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43214 have to, control transfer to label(s) can be performed through other
43215 means, and also we estimate minimum length of all asm stmts as 0. */
43216 for (insn = start; insn; insn = NEXT_INSN (insn))
43217 {
43218 int min_size;
43219
43220 if (LABEL_P (insn))
43221 {
43222 int align = label_to_alignment (insn);
43223 int max_skip = label_to_max_skip (insn);
43224
43225 if (max_skip > 15)
43226 max_skip = 15;
43227 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43228 already in the current 16 byte page, because otherwise
43229 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43230 bytes to reach 16 byte boundary. */
43231 if (align <= 0
43232 || (align <= 3 && max_skip != (1 << align) - 1))
43233 max_skip = 0;
43234 if (dump_file)
43235 fprintf (dump_file, "Label %i with max_skip %i\n",
43236 INSN_UID (insn), max_skip);
43237 if (max_skip)
43238 {
43239 while (nbytes + max_skip >= 16)
43240 {
43241 start = NEXT_INSN (start);
43242 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43243 || CALL_P (start))
43244 njumps--, isjump = true;
43245 else
43246 isjump = false;
43247 nbytes -= min_insn_size (start);
43248 }
43249 }
43250 continue;
43251 }
43252
43253 min_size = min_insn_size (insn);
43254 nbytes += min_size;
43255 if (dump_file)
43256 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43257 INSN_UID (insn), min_size);
43258 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43259 || CALL_P (insn))
43260 njumps++;
43261 else
43262 continue;
43263
43264 while (njumps > 3)
43265 {
43266 start = NEXT_INSN (start);
43267 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43268 || CALL_P (start))
43269 njumps--, isjump = true;
43270 else
43271 isjump = false;
43272 nbytes -= min_insn_size (start);
43273 }
43274 gcc_assert (njumps >= 0);
43275 if (dump_file)
43276 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43277 INSN_UID (start), INSN_UID (insn), nbytes);
43278
43279 if (njumps == 3 && isjump && nbytes < 16)
43280 {
43281 int padsize = 15 - nbytes + min_insn_size (insn);
43282
43283 if (dump_file)
43284 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43285 INSN_UID (insn), padsize);
43286 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43287 }
43288 }
43289 }
43290 #endif
43291
43292 /* AMD Athlon works faster
43293 when RET is not destination of conditional jump or directly preceded
43294 by other jump instruction. We avoid the penalty by inserting NOP just
43295 before the RET instructions in such cases. */
43296 static void
43297 ix86_pad_returns (void)
43298 {
43299 edge e;
43300 edge_iterator ei;
43301
43302 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43303 {
43304 basic_block bb = e->src;
43305 rtx_insn *ret = BB_END (bb);
43306 rtx_insn *prev;
43307 bool replace = false;
43308
43309 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43310 || optimize_bb_for_size_p (bb))
43311 continue;
43312 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43313 if (active_insn_p (prev) || LABEL_P (prev))
43314 break;
43315 if (prev && LABEL_P (prev))
43316 {
43317 edge e;
43318 edge_iterator ei;
43319
43320 FOR_EACH_EDGE (e, ei, bb->preds)
43321 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43322 && !(e->flags & EDGE_FALLTHRU))
43323 {
43324 replace = true;
43325 break;
43326 }
43327 }
43328 if (!replace)
43329 {
43330 prev = prev_active_insn (ret);
43331 if (prev
43332 && ((JUMP_P (prev) && any_condjump_p (prev))
43333 || CALL_P (prev)))
43334 replace = true;
43335 /* Empty functions get branch mispredict even when
43336 the jump destination is not visible to us. */
43337 if (!prev && !optimize_function_for_size_p (cfun))
43338 replace = true;
43339 }
43340 if (replace)
43341 {
43342 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43343 delete_insn (ret);
43344 }
43345 }
43346 }
43347
43348 /* Count the minimum number of instructions in BB. Return 4 if the
43349 number of instructions >= 4. */
43350
43351 static int
43352 ix86_count_insn_bb (basic_block bb)
43353 {
43354 rtx_insn *insn;
43355 int insn_count = 0;
43356
43357 /* Count number of instructions in this block. Return 4 if the number
43358 of instructions >= 4. */
43359 FOR_BB_INSNS (bb, insn)
43360 {
43361 /* Only happen in exit blocks. */
43362 if (JUMP_P (insn)
43363 && ANY_RETURN_P (PATTERN (insn)))
43364 break;
43365
43366 if (NONDEBUG_INSN_P (insn)
43367 && GET_CODE (PATTERN (insn)) != USE
43368 && GET_CODE (PATTERN (insn)) != CLOBBER)
43369 {
43370 insn_count++;
43371 if (insn_count >= 4)
43372 return insn_count;
43373 }
43374 }
43375
43376 return insn_count;
43377 }
43378
43379
43380 /* Count the minimum number of instructions in code path in BB.
43381 Return 4 if the number of instructions >= 4. */
43382
43383 static int
43384 ix86_count_insn (basic_block bb)
43385 {
43386 edge e;
43387 edge_iterator ei;
43388 int min_prev_count;
43389
43390 /* Only bother counting instructions along paths with no
43391 more than 2 basic blocks between entry and exit. Given
43392 that BB has an edge to exit, determine if a predecessor
43393 of BB has an edge from entry. If so, compute the number
43394 of instructions in the predecessor block. If there
43395 happen to be multiple such blocks, compute the minimum. */
43396 min_prev_count = 4;
43397 FOR_EACH_EDGE (e, ei, bb->preds)
43398 {
43399 edge prev_e;
43400 edge_iterator prev_ei;
43401
43402 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43403 {
43404 min_prev_count = 0;
43405 break;
43406 }
43407 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43408 {
43409 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43410 {
43411 int count = ix86_count_insn_bb (e->src);
43412 if (count < min_prev_count)
43413 min_prev_count = count;
43414 break;
43415 }
43416 }
43417 }
43418
43419 if (min_prev_count < 4)
43420 min_prev_count += ix86_count_insn_bb (bb);
43421
43422 return min_prev_count;
43423 }
43424
43425 /* Pad short function to 4 instructions. */
43426
43427 static void
43428 ix86_pad_short_function (void)
43429 {
43430 edge e;
43431 edge_iterator ei;
43432
43433 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43434 {
43435 rtx_insn *ret = BB_END (e->src);
43436 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43437 {
43438 int insn_count = ix86_count_insn (e->src);
43439
43440 /* Pad short function. */
43441 if (insn_count < 4)
43442 {
43443 rtx_insn *insn = ret;
43444
43445 /* Find epilogue. */
43446 while (insn
43447 && (!NOTE_P (insn)
43448 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43449 insn = PREV_INSN (insn);
43450
43451 if (!insn)
43452 insn = ret;
43453
43454 /* Two NOPs count as one instruction. */
43455 insn_count = 2 * (4 - insn_count);
43456 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43457 }
43458 }
43459 }
43460 }
43461
43462 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43463 the epilogue, the Windows system unwinder will apply epilogue logic and
43464 produce incorrect offsets. This can be avoided by adding a nop between
43465 the last insn that can throw and the first insn of the epilogue. */
43466
43467 static void
43468 ix86_seh_fixup_eh_fallthru (void)
43469 {
43470 edge e;
43471 edge_iterator ei;
43472
43473 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43474 {
43475 rtx_insn *insn, *next;
43476
43477 /* Find the beginning of the epilogue. */
43478 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43479 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43480 break;
43481 if (insn == NULL)
43482 continue;
43483
43484 /* We only care about preceding insns that can throw. */
43485 insn = prev_active_insn (insn);
43486 if (insn == NULL || !can_throw_internal (insn))
43487 continue;
43488
43489 /* Do not separate calls from their debug information. */
43490 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43491 if (NOTE_P (next)
43492 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43493 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43494 insn = next;
43495 else
43496 break;
43497
43498 emit_insn_after (gen_nops (const1_rtx), insn);
43499 }
43500 }
43501
43502 /* Implement machine specific optimizations. We implement padding of returns
43503 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43504 static void
43505 ix86_reorg (void)
43506 {
43507 /* We are freeing block_for_insn in the toplev to keep compatibility
43508 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43509 compute_bb_for_insn ();
43510
43511 if (TARGET_SEH && current_function_has_exception_handlers ())
43512 ix86_seh_fixup_eh_fallthru ();
43513
43514 if (optimize && optimize_function_for_speed_p (cfun))
43515 {
43516 if (TARGET_PAD_SHORT_FUNCTION)
43517 ix86_pad_short_function ();
43518 else if (TARGET_PAD_RETURNS)
43519 ix86_pad_returns ();
43520 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43521 if (TARGET_FOUR_JUMP_LIMIT)
43522 ix86_avoid_jump_mispredicts ();
43523 #endif
43524 }
43525 }
43526
43527 /* Return nonzero when QImode register that must be represented via REX prefix
43528 is used. */
43529 bool
43530 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43531 {
43532 int i;
43533 extract_insn_cached (insn);
43534 for (i = 0; i < recog_data.n_operands; i++)
43535 if (GENERAL_REG_P (recog_data.operand[i])
43536 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43537 return true;
43538 return false;
43539 }
43540
43541 /* Return true when INSN mentions register that must be encoded using REX
43542 prefix. */
43543 bool
43544 x86_extended_reg_mentioned_p (rtx insn)
43545 {
43546 subrtx_iterator::array_type array;
43547 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43548 {
43549 const_rtx x = *iter;
43550 if (REG_P (x)
43551 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43552 return true;
43553 }
43554 return false;
43555 }
43556
43557 /* If profitable, negate (without causing overflow) integer constant
43558 of mode MODE at location LOC. Return true in this case. */
43559 bool
43560 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43561 {
43562 HOST_WIDE_INT val;
43563
43564 if (!CONST_INT_P (*loc))
43565 return false;
43566
43567 switch (mode)
43568 {
43569 case DImode:
43570 /* DImode x86_64 constants must fit in 32 bits. */
43571 gcc_assert (x86_64_immediate_operand (*loc, mode));
43572
43573 mode = SImode;
43574 break;
43575
43576 case SImode:
43577 case HImode:
43578 case QImode:
43579 break;
43580
43581 default:
43582 gcc_unreachable ();
43583 }
43584
43585 /* Avoid overflows. */
43586 if (mode_signbit_p (mode, *loc))
43587 return false;
43588
43589 val = INTVAL (*loc);
43590
43591 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43592 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43593 if ((val < 0 && val != -128)
43594 || val == 128)
43595 {
43596 *loc = GEN_INT (-val);
43597 return true;
43598 }
43599
43600 return false;
43601 }
43602
43603 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43604 optabs would emit if we didn't have TFmode patterns. */
43605
43606 void
43607 x86_emit_floatuns (rtx operands[2])
43608 {
43609 rtx_code_label *neglab, *donelab;
43610 rtx i0, i1, f0, in, out;
43611 machine_mode mode, inmode;
43612
43613 inmode = GET_MODE (operands[1]);
43614 gcc_assert (inmode == SImode || inmode == DImode);
43615
43616 out = operands[0];
43617 in = force_reg (inmode, operands[1]);
43618 mode = GET_MODE (out);
43619 neglab = gen_label_rtx ();
43620 donelab = gen_label_rtx ();
43621 f0 = gen_reg_rtx (mode);
43622
43623 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43624
43625 expand_float (out, in, 0);
43626
43627 emit_jump_insn (gen_jump (donelab));
43628 emit_barrier ();
43629
43630 emit_label (neglab);
43631
43632 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43633 1, OPTAB_DIRECT);
43634 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43635 1, OPTAB_DIRECT);
43636 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43637
43638 expand_float (f0, i0, 0);
43639
43640 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43641
43642 emit_label (donelab);
43643 }
43644 \f
43645 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43646 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43647 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43648 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43649
43650 /* Get a vector mode of the same size as the original but with elements
43651 twice as wide. This is only guaranteed to apply to integral vectors. */
43652
43653 static inline machine_mode
43654 get_mode_wider_vector (machine_mode o)
43655 {
43656 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43657 machine_mode n = GET_MODE_WIDER_MODE (o);
43658 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43659 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43660 return n;
43661 }
43662
43663 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43664 fill target with val via vec_duplicate. */
43665
43666 static bool
43667 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43668 {
43669 bool ok;
43670 rtx_insn *insn;
43671 rtx dup;
43672
43673 /* First attempt to recognize VAL as-is. */
43674 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43675 insn = emit_insn (gen_rtx_SET (target, dup));
43676 if (recog_memoized (insn) < 0)
43677 {
43678 rtx_insn *seq;
43679 /* If that fails, force VAL into a register. */
43680
43681 start_sequence ();
43682 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43683 seq = get_insns ();
43684 end_sequence ();
43685 if (seq)
43686 emit_insn_before (seq, insn);
43687
43688 ok = recog_memoized (insn) >= 0;
43689 gcc_assert (ok);
43690 }
43691 return true;
43692 }
43693
43694 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43695 with all elements equal to VAR. Return true if successful. */
43696
43697 static bool
43698 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43699 rtx target, rtx val)
43700 {
43701 bool ok;
43702
43703 switch (mode)
43704 {
43705 case V2SImode:
43706 case V2SFmode:
43707 if (!mmx_ok)
43708 return false;
43709 /* FALLTHRU */
43710
43711 case V4DFmode:
43712 case V4DImode:
43713 case V8SFmode:
43714 case V8SImode:
43715 case V2DFmode:
43716 case V2DImode:
43717 case V4SFmode:
43718 case V4SImode:
43719 case V16SImode:
43720 case V8DImode:
43721 case V16SFmode:
43722 case V8DFmode:
43723 return ix86_vector_duplicate_value (mode, target, val);
43724
43725 case V4HImode:
43726 if (!mmx_ok)
43727 return false;
43728 if (TARGET_SSE || TARGET_3DNOW_A)
43729 {
43730 rtx x;
43731
43732 val = gen_lowpart (SImode, val);
43733 x = gen_rtx_TRUNCATE (HImode, val);
43734 x = gen_rtx_VEC_DUPLICATE (mode, x);
43735 emit_insn (gen_rtx_SET (target, x));
43736 return true;
43737 }
43738 goto widen;
43739
43740 case V8QImode:
43741 if (!mmx_ok)
43742 return false;
43743 goto widen;
43744
43745 case V8HImode:
43746 if (TARGET_AVX2)
43747 return ix86_vector_duplicate_value (mode, target, val);
43748
43749 if (TARGET_SSE2)
43750 {
43751 struct expand_vec_perm_d dperm;
43752 rtx tmp1, tmp2;
43753
43754 permute:
43755 memset (&dperm, 0, sizeof (dperm));
43756 dperm.target = target;
43757 dperm.vmode = mode;
43758 dperm.nelt = GET_MODE_NUNITS (mode);
43759 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43760 dperm.one_operand_p = true;
43761
43762 /* Extend to SImode using a paradoxical SUBREG. */
43763 tmp1 = gen_reg_rtx (SImode);
43764 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43765
43766 /* Insert the SImode value as low element of a V4SImode vector. */
43767 tmp2 = gen_reg_rtx (V4SImode);
43768 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43769 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43770
43771 ok = (expand_vec_perm_1 (&dperm)
43772 || expand_vec_perm_broadcast_1 (&dperm));
43773 gcc_assert (ok);
43774 return ok;
43775 }
43776 goto widen;
43777
43778 case V16QImode:
43779 if (TARGET_AVX2)
43780 return ix86_vector_duplicate_value (mode, target, val);
43781
43782 if (TARGET_SSE2)
43783 goto permute;
43784 goto widen;
43785
43786 widen:
43787 /* Replicate the value once into the next wider mode and recurse. */
43788 {
43789 machine_mode smode, wsmode, wvmode;
43790 rtx x;
43791
43792 smode = GET_MODE_INNER (mode);
43793 wvmode = get_mode_wider_vector (mode);
43794 wsmode = GET_MODE_INNER (wvmode);
43795
43796 val = convert_modes (wsmode, smode, val, true);
43797 x = expand_simple_binop (wsmode, ASHIFT, val,
43798 GEN_INT (GET_MODE_BITSIZE (smode)),
43799 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43800 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43801
43802 x = gen_reg_rtx (wvmode);
43803 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43804 gcc_assert (ok);
43805 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43806 return ok;
43807 }
43808
43809 case V16HImode:
43810 case V32QImode:
43811 if (TARGET_AVX2)
43812 return ix86_vector_duplicate_value (mode, target, val);
43813 else
43814 {
43815 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43816 rtx x = gen_reg_rtx (hvmode);
43817
43818 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43819 gcc_assert (ok);
43820
43821 x = gen_rtx_VEC_CONCAT (mode, x, x);
43822 emit_insn (gen_rtx_SET (target, x));
43823 }
43824 return true;
43825
43826 case V64QImode:
43827 case V32HImode:
43828 if (TARGET_AVX512BW)
43829 return ix86_vector_duplicate_value (mode, target, val);
43830 else
43831 {
43832 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43833 rtx x = gen_reg_rtx (hvmode);
43834
43835 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43836 gcc_assert (ok);
43837
43838 x = gen_rtx_VEC_CONCAT (mode, x, x);
43839 emit_insn (gen_rtx_SET (target, x));
43840 }
43841 return true;
43842
43843 default:
43844 return false;
43845 }
43846 }
43847
43848 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43849 whose ONE_VAR element is VAR, and other elements are zero. Return true
43850 if successful. */
43851
43852 static bool
43853 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43854 rtx target, rtx var, int one_var)
43855 {
43856 machine_mode vsimode;
43857 rtx new_target;
43858 rtx x, tmp;
43859 bool use_vector_set = false;
43860
43861 switch (mode)
43862 {
43863 case V2DImode:
43864 /* For SSE4.1, we normally use vector set. But if the second
43865 element is zero and inter-unit moves are OK, we use movq
43866 instead. */
43867 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43868 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43869 && one_var == 0));
43870 break;
43871 case V16QImode:
43872 case V4SImode:
43873 case V4SFmode:
43874 use_vector_set = TARGET_SSE4_1;
43875 break;
43876 case V8HImode:
43877 use_vector_set = TARGET_SSE2;
43878 break;
43879 case V4HImode:
43880 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43881 break;
43882 case V32QImode:
43883 case V16HImode:
43884 case V8SImode:
43885 case V8SFmode:
43886 case V4DFmode:
43887 use_vector_set = TARGET_AVX;
43888 break;
43889 case V4DImode:
43890 /* Use ix86_expand_vector_set in 64bit mode only. */
43891 use_vector_set = TARGET_AVX && TARGET_64BIT;
43892 break;
43893 default:
43894 break;
43895 }
43896
43897 if (use_vector_set)
43898 {
43899 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
43900 var = force_reg (GET_MODE_INNER (mode), var);
43901 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43902 return true;
43903 }
43904
43905 switch (mode)
43906 {
43907 case V2SFmode:
43908 case V2SImode:
43909 if (!mmx_ok)
43910 return false;
43911 /* FALLTHRU */
43912
43913 case V2DFmode:
43914 case V2DImode:
43915 if (one_var != 0)
43916 return false;
43917 var = force_reg (GET_MODE_INNER (mode), var);
43918 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43919 emit_insn (gen_rtx_SET (target, x));
43920 return true;
43921
43922 case V4SFmode:
43923 case V4SImode:
43924 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43925 new_target = gen_reg_rtx (mode);
43926 else
43927 new_target = target;
43928 var = force_reg (GET_MODE_INNER (mode), var);
43929 x = gen_rtx_VEC_DUPLICATE (mode, var);
43930 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43931 emit_insn (gen_rtx_SET (new_target, x));
43932 if (one_var != 0)
43933 {
43934 /* We need to shuffle the value to the correct position, so
43935 create a new pseudo to store the intermediate result. */
43936
43937 /* With SSE2, we can use the integer shuffle insns. */
43938 if (mode != V4SFmode && TARGET_SSE2)
43939 {
43940 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43941 const1_rtx,
43942 GEN_INT (one_var == 1 ? 0 : 1),
43943 GEN_INT (one_var == 2 ? 0 : 1),
43944 GEN_INT (one_var == 3 ? 0 : 1)));
43945 if (target != new_target)
43946 emit_move_insn (target, new_target);
43947 return true;
43948 }
43949
43950 /* Otherwise convert the intermediate result to V4SFmode and
43951 use the SSE1 shuffle instructions. */
43952 if (mode != V4SFmode)
43953 {
43954 tmp = gen_reg_rtx (V4SFmode);
43955 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43956 }
43957 else
43958 tmp = new_target;
43959
43960 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43961 const1_rtx,
43962 GEN_INT (one_var == 1 ? 0 : 1),
43963 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43964 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43965
43966 if (mode != V4SFmode)
43967 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43968 else if (tmp != target)
43969 emit_move_insn (target, tmp);
43970 }
43971 else if (target != new_target)
43972 emit_move_insn (target, new_target);
43973 return true;
43974
43975 case V8HImode:
43976 case V16QImode:
43977 vsimode = V4SImode;
43978 goto widen;
43979 case V4HImode:
43980 case V8QImode:
43981 if (!mmx_ok)
43982 return false;
43983 vsimode = V2SImode;
43984 goto widen;
43985 widen:
43986 if (one_var != 0)
43987 return false;
43988
43989 /* Zero extend the variable element to SImode and recurse. */
43990 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43991
43992 x = gen_reg_rtx (vsimode);
43993 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43994 var, one_var))
43995 gcc_unreachable ();
43996
43997 emit_move_insn (target, gen_lowpart (mode, x));
43998 return true;
43999
44000 default:
44001 return false;
44002 }
44003 }
44004
44005 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44006 consisting of the values in VALS. It is known that all elements
44007 except ONE_VAR are constants. Return true if successful. */
44008
44009 static bool
44010 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44011 rtx target, rtx vals, int one_var)
44012 {
44013 rtx var = XVECEXP (vals, 0, one_var);
44014 machine_mode wmode;
44015 rtx const_vec, x;
44016
44017 const_vec = copy_rtx (vals);
44018 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44019 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44020
44021 switch (mode)
44022 {
44023 case V2DFmode:
44024 case V2DImode:
44025 case V2SFmode:
44026 case V2SImode:
44027 /* For the two element vectors, it's just as easy to use
44028 the general case. */
44029 return false;
44030
44031 case V4DImode:
44032 /* Use ix86_expand_vector_set in 64bit mode only. */
44033 if (!TARGET_64BIT)
44034 return false;
44035 case V4DFmode:
44036 case V8SFmode:
44037 case V8SImode:
44038 case V16HImode:
44039 case V32QImode:
44040 case V4SFmode:
44041 case V4SImode:
44042 case V8HImode:
44043 case V4HImode:
44044 break;
44045
44046 case V16QImode:
44047 if (TARGET_SSE4_1)
44048 break;
44049 wmode = V8HImode;
44050 goto widen;
44051 case V8QImode:
44052 wmode = V4HImode;
44053 goto widen;
44054 widen:
44055 /* There's no way to set one QImode entry easily. Combine
44056 the variable value with its adjacent constant value, and
44057 promote to an HImode set. */
44058 x = XVECEXP (vals, 0, one_var ^ 1);
44059 if (one_var & 1)
44060 {
44061 var = convert_modes (HImode, QImode, var, true);
44062 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44063 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44064 x = GEN_INT (INTVAL (x) & 0xff);
44065 }
44066 else
44067 {
44068 var = convert_modes (HImode, QImode, var, true);
44069 x = gen_int_mode (INTVAL (x) << 8, HImode);
44070 }
44071 if (x != const0_rtx)
44072 var = expand_simple_binop (HImode, IOR, var, x, var,
44073 1, OPTAB_LIB_WIDEN);
44074
44075 x = gen_reg_rtx (wmode);
44076 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44077 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44078
44079 emit_move_insn (target, gen_lowpart (mode, x));
44080 return true;
44081
44082 default:
44083 return false;
44084 }
44085
44086 emit_move_insn (target, const_vec);
44087 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44088 return true;
44089 }
44090
44091 /* A subroutine of ix86_expand_vector_init_general. Use vector
44092 concatenate to handle the most general case: all values variable,
44093 and none identical. */
44094
44095 static void
44096 ix86_expand_vector_init_concat (machine_mode mode,
44097 rtx target, rtx *ops, int n)
44098 {
44099 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44100 rtx first[16], second[8], third[4];
44101 rtvec v;
44102 int i, j;
44103
44104 switch (n)
44105 {
44106 case 2:
44107 switch (mode)
44108 {
44109 case V16SImode:
44110 cmode = V8SImode;
44111 break;
44112 case V16SFmode:
44113 cmode = V8SFmode;
44114 break;
44115 case V8DImode:
44116 cmode = V4DImode;
44117 break;
44118 case V8DFmode:
44119 cmode = V4DFmode;
44120 break;
44121 case V8SImode:
44122 cmode = V4SImode;
44123 break;
44124 case V8SFmode:
44125 cmode = V4SFmode;
44126 break;
44127 case V4DImode:
44128 cmode = V2DImode;
44129 break;
44130 case V4DFmode:
44131 cmode = V2DFmode;
44132 break;
44133 case V4SImode:
44134 cmode = V2SImode;
44135 break;
44136 case V4SFmode:
44137 cmode = V2SFmode;
44138 break;
44139 case V2DImode:
44140 cmode = DImode;
44141 break;
44142 case V2SImode:
44143 cmode = SImode;
44144 break;
44145 case V2DFmode:
44146 cmode = DFmode;
44147 break;
44148 case V2SFmode:
44149 cmode = SFmode;
44150 break;
44151 default:
44152 gcc_unreachable ();
44153 }
44154
44155 if (!register_operand (ops[1], cmode))
44156 ops[1] = force_reg (cmode, ops[1]);
44157 if (!register_operand (ops[0], cmode))
44158 ops[0] = force_reg (cmode, ops[0]);
44159 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44160 ops[1])));
44161 break;
44162
44163 case 4:
44164 switch (mode)
44165 {
44166 case V4DImode:
44167 cmode = V2DImode;
44168 break;
44169 case V4DFmode:
44170 cmode = V2DFmode;
44171 break;
44172 case V4SImode:
44173 cmode = V2SImode;
44174 break;
44175 case V4SFmode:
44176 cmode = V2SFmode;
44177 break;
44178 default:
44179 gcc_unreachable ();
44180 }
44181 goto half;
44182
44183 case 8:
44184 switch (mode)
44185 {
44186 case V8DImode:
44187 cmode = V2DImode;
44188 hmode = V4DImode;
44189 break;
44190 case V8DFmode:
44191 cmode = V2DFmode;
44192 hmode = V4DFmode;
44193 break;
44194 case V8SImode:
44195 cmode = V2SImode;
44196 hmode = V4SImode;
44197 break;
44198 case V8SFmode:
44199 cmode = V2SFmode;
44200 hmode = V4SFmode;
44201 break;
44202 default:
44203 gcc_unreachable ();
44204 }
44205 goto half;
44206
44207 case 16:
44208 switch (mode)
44209 {
44210 case V16SImode:
44211 cmode = V2SImode;
44212 hmode = V4SImode;
44213 gmode = V8SImode;
44214 break;
44215 case V16SFmode:
44216 cmode = V2SFmode;
44217 hmode = V4SFmode;
44218 gmode = V8SFmode;
44219 break;
44220 default:
44221 gcc_unreachable ();
44222 }
44223 goto half;
44224
44225 half:
44226 /* FIXME: We process inputs backward to help RA. PR 36222. */
44227 i = n - 1;
44228 j = (n >> 1) - 1;
44229 for (; i > 0; i -= 2, j--)
44230 {
44231 first[j] = gen_reg_rtx (cmode);
44232 v = gen_rtvec (2, ops[i - 1], ops[i]);
44233 ix86_expand_vector_init (false, first[j],
44234 gen_rtx_PARALLEL (cmode, v));
44235 }
44236
44237 n >>= 1;
44238 if (n > 4)
44239 {
44240 gcc_assert (hmode != VOIDmode);
44241 gcc_assert (gmode != VOIDmode);
44242 for (i = j = 0; i < n; i += 2, j++)
44243 {
44244 second[j] = gen_reg_rtx (hmode);
44245 ix86_expand_vector_init_concat (hmode, second [j],
44246 &first [i], 2);
44247 }
44248 n >>= 1;
44249 for (i = j = 0; i < n; i += 2, j++)
44250 {
44251 third[j] = gen_reg_rtx (gmode);
44252 ix86_expand_vector_init_concat (gmode, third[j],
44253 &second[i], 2);
44254 }
44255 n >>= 1;
44256 ix86_expand_vector_init_concat (mode, target, third, n);
44257 }
44258 else if (n > 2)
44259 {
44260 gcc_assert (hmode != VOIDmode);
44261 for (i = j = 0; i < n; i += 2, j++)
44262 {
44263 second[j] = gen_reg_rtx (hmode);
44264 ix86_expand_vector_init_concat (hmode, second [j],
44265 &first [i], 2);
44266 }
44267 n >>= 1;
44268 ix86_expand_vector_init_concat (mode, target, second, n);
44269 }
44270 else
44271 ix86_expand_vector_init_concat (mode, target, first, n);
44272 break;
44273
44274 default:
44275 gcc_unreachable ();
44276 }
44277 }
44278
44279 /* A subroutine of ix86_expand_vector_init_general. Use vector
44280 interleave to handle the most general case: all values variable,
44281 and none identical. */
44282
44283 static void
44284 ix86_expand_vector_init_interleave (machine_mode mode,
44285 rtx target, rtx *ops, int n)
44286 {
44287 machine_mode first_imode, second_imode, third_imode, inner_mode;
44288 int i, j;
44289 rtx op0, op1;
44290 rtx (*gen_load_even) (rtx, rtx, rtx);
44291 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44292 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44293
44294 switch (mode)
44295 {
44296 case V8HImode:
44297 gen_load_even = gen_vec_setv8hi;
44298 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44299 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44300 inner_mode = HImode;
44301 first_imode = V4SImode;
44302 second_imode = V2DImode;
44303 third_imode = VOIDmode;
44304 break;
44305 case V16QImode:
44306 gen_load_even = gen_vec_setv16qi;
44307 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44308 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44309 inner_mode = QImode;
44310 first_imode = V8HImode;
44311 second_imode = V4SImode;
44312 third_imode = V2DImode;
44313 break;
44314 default:
44315 gcc_unreachable ();
44316 }
44317
44318 for (i = 0; i < n; i++)
44319 {
44320 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44321 op0 = gen_reg_rtx (SImode);
44322 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44323
44324 /* Insert the SImode value as low element of V4SImode vector. */
44325 op1 = gen_reg_rtx (V4SImode);
44326 op0 = gen_rtx_VEC_MERGE (V4SImode,
44327 gen_rtx_VEC_DUPLICATE (V4SImode,
44328 op0),
44329 CONST0_RTX (V4SImode),
44330 const1_rtx);
44331 emit_insn (gen_rtx_SET (op1, op0));
44332
44333 /* Cast the V4SImode vector back to a vector in orignal mode. */
44334 op0 = gen_reg_rtx (mode);
44335 emit_move_insn (op0, gen_lowpart (mode, op1));
44336
44337 /* Load even elements into the second position. */
44338 emit_insn (gen_load_even (op0,
44339 force_reg (inner_mode,
44340 ops [i + i + 1]),
44341 const1_rtx));
44342
44343 /* Cast vector to FIRST_IMODE vector. */
44344 ops[i] = gen_reg_rtx (first_imode);
44345 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44346 }
44347
44348 /* Interleave low FIRST_IMODE vectors. */
44349 for (i = j = 0; i < n; i += 2, j++)
44350 {
44351 op0 = gen_reg_rtx (first_imode);
44352 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44353
44354 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44355 ops[j] = gen_reg_rtx (second_imode);
44356 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44357 }
44358
44359 /* Interleave low SECOND_IMODE vectors. */
44360 switch (second_imode)
44361 {
44362 case V4SImode:
44363 for (i = j = 0; i < n / 2; i += 2, j++)
44364 {
44365 op0 = gen_reg_rtx (second_imode);
44366 emit_insn (gen_interleave_second_low (op0, ops[i],
44367 ops[i + 1]));
44368
44369 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44370 vector. */
44371 ops[j] = gen_reg_rtx (third_imode);
44372 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44373 }
44374 second_imode = V2DImode;
44375 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44376 /* FALLTHRU */
44377
44378 case V2DImode:
44379 op0 = gen_reg_rtx (second_imode);
44380 emit_insn (gen_interleave_second_low (op0, ops[0],
44381 ops[1]));
44382
44383 /* Cast the SECOND_IMODE vector back to a vector on original
44384 mode. */
44385 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44386 break;
44387
44388 default:
44389 gcc_unreachable ();
44390 }
44391 }
44392
44393 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44394 all values variable, and none identical. */
44395
44396 static void
44397 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44398 rtx target, rtx vals)
44399 {
44400 rtx ops[64], op0, op1, op2, op3, op4, op5;
44401 machine_mode half_mode = VOIDmode;
44402 machine_mode quarter_mode = VOIDmode;
44403 int n, i;
44404
44405 switch (mode)
44406 {
44407 case V2SFmode:
44408 case V2SImode:
44409 if (!mmx_ok && !TARGET_SSE)
44410 break;
44411 /* FALLTHRU */
44412
44413 case V16SImode:
44414 case V16SFmode:
44415 case V8DFmode:
44416 case V8DImode:
44417 case V8SFmode:
44418 case V8SImode:
44419 case V4DFmode:
44420 case V4DImode:
44421 case V4SFmode:
44422 case V4SImode:
44423 case V2DFmode:
44424 case V2DImode:
44425 n = GET_MODE_NUNITS (mode);
44426 for (i = 0; i < n; i++)
44427 ops[i] = XVECEXP (vals, 0, i);
44428 ix86_expand_vector_init_concat (mode, target, ops, n);
44429 return;
44430
44431 case V32QImode:
44432 half_mode = V16QImode;
44433 goto half;
44434
44435 case V16HImode:
44436 half_mode = V8HImode;
44437 goto half;
44438
44439 half:
44440 n = GET_MODE_NUNITS (mode);
44441 for (i = 0; i < n; i++)
44442 ops[i] = XVECEXP (vals, 0, i);
44443 op0 = gen_reg_rtx (half_mode);
44444 op1 = gen_reg_rtx (half_mode);
44445 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44446 n >> 2);
44447 ix86_expand_vector_init_interleave (half_mode, op1,
44448 &ops [n >> 1], n >> 2);
44449 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44450 return;
44451
44452 case V64QImode:
44453 quarter_mode = V16QImode;
44454 half_mode = V32QImode;
44455 goto quarter;
44456
44457 case V32HImode:
44458 quarter_mode = V8HImode;
44459 half_mode = V16HImode;
44460 goto quarter;
44461
44462 quarter:
44463 n = GET_MODE_NUNITS (mode);
44464 for (i = 0; i < n; i++)
44465 ops[i] = XVECEXP (vals, 0, i);
44466 op0 = gen_reg_rtx (quarter_mode);
44467 op1 = gen_reg_rtx (quarter_mode);
44468 op2 = gen_reg_rtx (quarter_mode);
44469 op3 = gen_reg_rtx (quarter_mode);
44470 op4 = gen_reg_rtx (half_mode);
44471 op5 = gen_reg_rtx (half_mode);
44472 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44473 n >> 3);
44474 ix86_expand_vector_init_interleave (quarter_mode, op1,
44475 &ops [n >> 2], n >> 3);
44476 ix86_expand_vector_init_interleave (quarter_mode, op2,
44477 &ops [n >> 1], n >> 3);
44478 ix86_expand_vector_init_interleave (quarter_mode, op3,
44479 &ops [(n >> 1) | (n >> 2)], n >> 3);
44480 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44481 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44482 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44483 return;
44484
44485 case V16QImode:
44486 if (!TARGET_SSE4_1)
44487 break;
44488 /* FALLTHRU */
44489
44490 case V8HImode:
44491 if (!TARGET_SSE2)
44492 break;
44493
44494 /* Don't use ix86_expand_vector_init_interleave if we can't
44495 move from GPR to SSE register directly. */
44496 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44497 break;
44498
44499 n = GET_MODE_NUNITS (mode);
44500 for (i = 0; i < n; i++)
44501 ops[i] = XVECEXP (vals, 0, i);
44502 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44503 return;
44504
44505 case V4HImode:
44506 case V8QImode:
44507 break;
44508
44509 default:
44510 gcc_unreachable ();
44511 }
44512
44513 {
44514 int i, j, n_elts, n_words, n_elt_per_word;
44515 machine_mode inner_mode;
44516 rtx words[4], shift;
44517
44518 inner_mode = GET_MODE_INNER (mode);
44519 n_elts = GET_MODE_NUNITS (mode);
44520 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44521 n_elt_per_word = n_elts / n_words;
44522 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44523
44524 for (i = 0; i < n_words; ++i)
44525 {
44526 rtx word = NULL_RTX;
44527
44528 for (j = 0; j < n_elt_per_word; ++j)
44529 {
44530 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44531 elt = convert_modes (word_mode, inner_mode, elt, true);
44532
44533 if (j == 0)
44534 word = elt;
44535 else
44536 {
44537 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44538 word, 1, OPTAB_LIB_WIDEN);
44539 word = expand_simple_binop (word_mode, IOR, word, elt,
44540 word, 1, OPTAB_LIB_WIDEN);
44541 }
44542 }
44543
44544 words[i] = word;
44545 }
44546
44547 if (n_words == 1)
44548 emit_move_insn (target, gen_lowpart (mode, words[0]));
44549 else if (n_words == 2)
44550 {
44551 rtx tmp = gen_reg_rtx (mode);
44552 emit_clobber (tmp);
44553 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44554 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44555 emit_move_insn (target, tmp);
44556 }
44557 else if (n_words == 4)
44558 {
44559 rtx tmp = gen_reg_rtx (V4SImode);
44560 gcc_assert (word_mode == SImode);
44561 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44562 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44563 emit_move_insn (target, gen_lowpart (mode, tmp));
44564 }
44565 else
44566 gcc_unreachable ();
44567 }
44568 }
44569
44570 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44571 instructions unless MMX_OK is true. */
44572
44573 void
44574 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44575 {
44576 machine_mode mode = GET_MODE (target);
44577 machine_mode inner_mode = GET_MODE_INNER (mode);
44578 int n_elts = GET_MODE_NUNITS (mode);
44579 int n_var = 0, one_var = -1;
44580 bool all_same = true, all_const_zero = true;
44581 int i;
44582 rtx x;
44583
44584 for (i = 0; i < n_elts; ++i)
44585 {
44586 x = XVECEXP (vals, 0, i);
44587 if (!(CONST_SCALAR_INT_P (x)
44588 || CONST_DOUBLE_P (x)
44589 || CONST_FIXED_P (x)))
44590 n_var++, one_var = i;
44591 else if (x != CONST0_RTX (inner_mode))
44592 all_const_zero = false;
44593 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44594 all_same = false;
44595 }
44596
44597 /* Constants are best loaded from the constant pool. */
44598 if (n_var == 0)
44599 {
44600 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44601 return;
44602 }
44603
44604 /* If all values are identical, broadcast the value. */
44605 if (all_same
44606 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44607 XVECEXP (vals, 0, 0)))
44608 return;
44609
44610 /* Values where only one field is non-constant are best loaded from
44611 the pool and overwritten via move later. */
44612 if (n_var == 1)
44613 {
44614 if (all_const_zero
44615 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44616 XVECEXP (vals, 0, one_var),
44617 one_var))
44618 return;
44619
44620 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44621 return;
44622 }
44623
44624 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44625 }
44626
44627 void
44628 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44629 {
44630 machine_mode mode = GET_MODE (target);
44631 machine_mode inner_mode = GET_MODE_INNER (mode);
44632 machine_mode half_mode;
44633 bool use_vec_merge = false;
44634 rtx tmp;
44635 static rtx (*gen_extract[6][2]) (rtx, rtx)
44636 = {
44637 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44638 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44639 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44640 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44641 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44642 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44643 };
44644 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44645 = {
44646 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44647 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44648 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44649 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44650 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44651 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44652 };
44653 int i, j, n;
44654
44655 switch (mode)
44656 {
44657 case V2SFmode:
44658 case V2SImode:
44659 if (mmx_ok)
44660 {
44661 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44662 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44663 if (elt == 0)
44664 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44665 else
44666 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44667 emit_insn (gen_rtx_SET (target, tmp));
44668 return;
44669 }
44670 break;
44671
44672 case V2DImode:
44673 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44674 if (use_vec_merge)
44675 break;
44676
44677 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44678 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44679 if (elt == 0)
44680 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44681 else
44682 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44683 emit_insn (gen_rtx_SET (target, tmp));
44684 return;
44685
44686 case V2DFmode:
44687 {
44688 rtx op0, op1;
44689
44690 /* For the two element vectors, we implement a VEC_CONCAT with
44691 the extraction of the other element. */
44692
44693 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44694 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44695
44696 if (elt == 0)
44697 op0 = val, op1 = tmp;
44698 else
44699 op0 = tmp, op1 = val;
44700
44701 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44702 emit_insn (gen_rtx_SET (target, tmp));
44703 }
44704 return;
44705
44706 case V4SFmode:
44707 use_vec_merge = TARGET_SSE4_1;
44708 if (use_vec_merge)
44709 break;
44710
44711 switch (elt)
44712 {
44713 case 0:
44714 use_vec_merge = true;
44715 break;
44716
44717 case 1:
44718 /* tmp = target = A B C D */
44719 tmp = copy_to_reg (target);
44720 /* target = A A B B */
44721 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44722 /* target = X A B B */
44723 ix86_expand_vector_set (false, target, val, 0);
44724 /* target = A X C D */
44725 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44726 const1_rtx, const0_rtx,
44727 GEN_INT (2+4), GEN_INT (3+4)));
44728 return;
44729
44730 case 2:
44731 /* tmp = target = A B C D */
44732 tmp = copy_to_reg (target);
44733 /* tmp = X B C D */
44734 ix86_expand_vector_set (false, tmp, val, 0);
44735 /* target = A B X D */
44736 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44737 const0_rtx, const1_rtx,
44738 GEN_INT (0+4), GEN_INT (3+4)));
44739 return;
44740
44741 case 3:
44742 /* tmp = target = A B C D */
44743 tmp = copy_to_reg (target);
44744 /* tmp = X B C D */
44745 ix86_expand_vector_set (false, tmp, val, 0);
44746 /* target = A B X D */
44747 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44748 const0_rtx, const1_rtx,
44749 GEN_INT (2+4), GEN_INT (0+4)));
44750 return;
44751
44752 default:
44753 gcc_unreachable ();
44754 }
44755 break;
44756
44757 case V4SImode:
44758 use_vec_merge = TARGET_SSE4_1;
44759 if (use_vec_merge)
44760 break;
44761
44762 /* Element 0 handled by vec_merge below. */
44763 if (elt == 0)
44764 {
44765 use_vec_merge = true;
44766 break;
44767 }
44768
44769 if (TARGET_SSE2)
44770 {
44771 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44772 store into element 0, then shuffle them back. */
44773
44774 rtx order[4];
44775
44776 order[0] = GEN_INT (elt);
44777 order[1] = const1_rtx;
44778 order[2] = const2_rtx;
44779 order[3] = GEN_INT (3);
44780 order[elt] = const0_rtx;
44781
44782 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44783 order[1], order[2], order[3]));
44784
44785 ix86_expand_vector_set (false, target, val, 0);
44786
44787 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44788 order[1], order[2], order[3]));
44789 }
44790 else
44791 {
44792 /* For SSE1, we have to reuse the V4SF code. */
44793 rtx t = gen_reg_rtx (V4SFmode);
44794 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44795 emit_move_insn (target, gen_lowpart (mode, t));
44796 }
44797 return;
44798
44799 case V8HImode:
44800 use_vec_merge = TARGET_SSE2;
44801 break;
44802 case V4HImode:
44803 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44804 break;
44805
44806 case V16QImode:
44807 use_vec_merge = TARGET_SSE4_1;
44808 break;
44809
44810 case V8QImode:
44811 break;
44812
44813 case V32QImode:
44814 half_mode = V16QImode;
44815 j = 0;
44816 n = 16;
44817 goto half;
44818
44819 case V16HImode:
44820 half_mode = V8HImode;
44821 j = 1;
44822 n = 8;
44823 goto half;
44824
44825 case V8SImode:
44826 half_mode = V4SImode;
44827 j = 2;
44828 n = 4;
44829 goto half;
44830
44831 case V4DImode:
44832 half_mode = V2DImode;
44833 j = 3;
44834 n = 2;
44835 goto half;
44836
44837 case V8SFmode:
44838 half_mode = V4SFmode;
44839 j = 4;
44840 n = 4;
44841 goto half;
44842
44843 case V4DFmode:
44844 half_mode = V2DFmode;
44845 j = 5;
44846 n = 2;
44847 goto half;
44848
44849 half:
44850 /* Compute offset. */
44851 i = elt / n;
44852 elt %= n;
44853
44854 gcc_assert (i <= 1);
44855
44856 /* Extract the half. */
44857 tmp = gen_reg_rtx (half_mode);
44858 emit_insn (gen_extract[j][i] (tmp, target));
44859
44860 /* Put val in tmp at elt. */
44861 ix86_expand_vector_set (false, tmp, val, elt);
44862
44863 /* Put it back. */
44864 emit_insn (gen_insert[j][i] (target, target, tmp));
44865 return;
44866
44867 case V8DFmode:
44868 if (TARGET_AVX512F)
44869 {
44870 tmp = gen_reg_rtx (mode);
44871 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44872 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44873 force_reg (QImode, GEN_INT (1 << elt))));
44874 return;
44875 }
44876 else
44877 break;
44878 case V8DImode:
44879 if (TARGET_AVX512F)
44880 {
44881 tmp = gen_reg_rtx (mode);
44882 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44883 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44884 force_reg (QImode, GEN_INT (1 << elt))));
44885 return;
44886 }
44887 else
44888 break;
44889 case V16SFmode:
44890 if (TARGET_AVX512F)
44891 {
44892 tmp = gen_reg_rtx (mode);
44893 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44894 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44895 force_reg (HImode, GEN_INT (1 << elt))));
44896 return;
44897 }
44898 else
44899 break;
44900 case V16SImode:
44901 if (TARGET_AVX512F)
44902 {
44903 tmp = gen_reg_rtx (mode);
44904 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44905 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44906 force_reg (HImode, GEN_INT (1 << elt))));
44907 return;
44908 }
44909 else
44910 break;
44911 case V32HImode:
44912 if (TARGET_AVX512F && TARGET_AVX512BW)
44913 {
44914 tmp = gen_reg_rtx (mode);
44915 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44916 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44917 force_reg (SImode, GEN_INT (1 << elt))));
44918 return;
44919 }
44920 else
44921 break;
44922 case V64QImode:
44923 if (TARGET_AVX512F && TARGET_AVX512BW)
44924 {
44925 tmp = gen_reg_rtx (mode);
44926 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44927 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44928 force_reg (DImode, GEN_INT (1 << elt))));
44929 return;
44930 }
44931 else
44932 break;
44933
44934 default:
44935 break;
44936 }
44937
44938 if (use_vec_merge)
44939 {
44940 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44941 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44942 emit_insn (gen_rtx_SET (target, tmp));
44943 }
44944 else
44945 {
44946 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44947
44948 emit_move_insn (mem, target);
44949
44950 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44951 emit_move_insn (tmp, val);
44952
44953 emit_move_insn (target, mem);
44954 }
44955 }
44956
44957 void
44958 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44959 {
44960 machine_mode mode = GET_MODE (vec);
44961 machine_mode inner_mode = GET_MODE_INNER (mode);
44962 bool use_vec_extr = false;
44963 rtx tmp;
44964
44965 switch (mode)
44966 {
44967 case V2SImode:
44968 case V2SFmode:
44969 if (!mmx_ok)
44970 break;
44971 /* FALLTHRU */
44972
44973 case V2DFmode:
44974 case V2DImode:
44975 use_vec_extr = true;
44976 break;
44977
44978 case V4SFmode:
44979 use_vec_extr = TARGET_SSE4_1;
44980 if (use_vec_extr)
44981 break;
44982
44983 switch (elt)
44984 {
44985 case 0:
44986 tmp = vec;
44987 break;
44988
44989 case 1:
44990 case 3:
44991 tmp = gen_reg_rtx (mode);
44992 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44993 GEN_INT (elt), GEN_INT (elt),
44994 GEN_INT (elt+4), GEN_INT (elt+4)));
44995 break;
44996
44997 case 2:
44998 tmp = gen_reg_rtx (mode);
44999 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45000 break;
45001
45002 default:
45003 gcc_unreachable ();
45004 }
45005 vec = tmp;
45006 use_vec_extr = true;
45007 elt = 0;
45008 break;
45009
45010 case V4SImode:
45011 use_vec_extr = TARGET_SSE4_1;
45012 if (use_vec_extr)
45013 break;
45014
45015 if (TARGET_SSE2)
45016 {
45017 switch (elt)
45018 {
45019 case 0:
45020 tmp = vec;
45021 break;
45022
45023 case 1:
45024 case 3:
45025 tmp = gen_reg_rtx (mode);
45026 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45027 GEN_INT (elt), GEN_INT (elt),
45028 GEN_INT (elt), GEN_INT (elt)));
45029 break;
45030
45031 case 2:
45032 tmp = gen_reg_rtx (mode);
45033 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45034 break;
45035
45036 default:
45037 gcc_unreachable ();
45038 }
45039 vec = tmp;
45040 use_vec_extr = true;
45041 elt = 0;
45042 }
45043 else
45044 {
45045 /* For SSE1, we have to reuse the V4SF code. */
45046 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45047 gen_lowpart (V4SFmode, vec), elt);
45048 return;
45049 }
45050 break;
45051
45052 case V8HImode:
45053 use_vec_extr = TARGET_SSE2;
45054 break;
45055 case V4HImode:
45056 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45057 break;
45058
45059 case V16QImode:
45060 use_vec_extr = TARGET_SSE4_1;
45061 break;
45062
45063 case V8SFmode:
45064 if (TARGET_AVX)
45065 {
45066 tmp = gen_reg_rtx (V4SFmode);
45067 if (elt < 4)
45068 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45069 else
45070 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45071 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45072 return;
45073 }
45074 break;
45075
45076 case V4DFmode:
45077 if (TARGET_AVX)
45078 {
45079 tmp = gen_reg_rtx (V2DFmode);
45080 if (elt < 2)
45081 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45082 else
45083 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45084 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45085 return;
45086 }
45087 break;
45088
45089 case V32QImode:
45090 if (TARGET_AVX)
45091 {
45092 tmp = gen_reg_rtx (V16QImode);
45093 if (elt < 16)
45094 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45095 else
45096 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45097 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45098 return;
45099 }
45100 break;
45101
45102 case V16HImode:
45103 if (TARGET_AVX)
45104 {
45105 tmp = gen_reg_rtx (V8HImode);
45106 if (elt < 8)
45107 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45108 else
45109 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45110 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45111 return;
45112 }
45113 break;
45114
45115 case V8SImode:
45116 if (TARGET_AVX)
45117 {
45118 tmp = gen_reg_rtx (V4SImode);
45119 if (elt < 4)
45120 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45121 else
45122 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45123 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45124 return;
45125 }
45126 break;
45127
45128 case V4DImode:
45129 if (TARGET_AVX)
45130 {
45131 tmp = gen_reg_rtx (V2DImode);
45132 if (elt < 2)
45133 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45134 else
45135 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45136 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45137 return;
45138 }
45139 break;
45140
45141 case V32HImode:
45142 if (TARGET_AVX512BW)
45143 {
45144 tmp = gen_reg_rtx (V16HImode);
45145 if (elt < 16)
45146 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45147 else
45148 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45149 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45150 return;
45151 }
45152 break;
45153
45154 case V64QImode:
45155 if (TARGET_AVX512BW)
45156 {
45157 tmp = gen_reg_rtx (V32QImode);
45158 if (elt < 32)
45159 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45160 else
45161 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45162 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45163 return;
45164 }
45165 break;
45166
45167 case V16SFmode:
45168 tmp = gen_reg_rtx (V8SFmode);
45169 if (elt < 8)
45170 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45171 else
45172 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45173 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45174 return;
45175
45176 case V8DFmode:
45177 tmp = gen_reg_rtx (V4DFmode);
45178 if (elt < 4)
45179 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45180 else
45181 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45182 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45183 return;
45184
45185 case V16SImode:
45186 tmp = gen_reg_rtx (V8SImode);
45187 if (elt < 8)
45188 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45189 else
45190 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45191 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45192 return;
45193
45194 case V8DImode:
45195 tmp = gen_reg_rtx (V4DImode);
45196 if (elt < 4)
45197 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45198 else
45199 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45200 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45201 return;
45202
45203 case V8QImode:
45204 /* ??? Could extract the appropriate HImode element and shift. */
45205 default:
45206 break;
45207 }
45208
45209 if (use_vec_extr)
45210 {
45211 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45212 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45213
45214 /* Let the rtl optimizers know about the zero extension performed. */
45215 if (inner_mode == QImode || inner_mode == HImode)
45216 {
45217 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45218 target = gen_lowpart (SImode, target);
45219 }
45220
45221 emit_insn (gen_rtx_SET (target, tmp));
45222 }
45223 else
45224 {
45225 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45226
45227 emit_move_insn (mem, vec);
45228
45229 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45230 emit_move_insn (target, tmp);
45231 }
45232 }
45233
45234 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45235 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45236 The upper bits of DEST are undefined, though they shouldn't cause
45237 exceptions (some bits from src or all zeros are ok). */
45238
45239 static void
45240 emit_reduc_half (rtx dest, rtx src, int i)
45241 {
45242 rtx tem, d = dest;
45243 switch (GET_MODE (src))
45244 {
45245 case V4SFmode:
45246 if (i == 128)
45247 tem = gen_sse_movhlps (dest, src, src);
45248 else
45249 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45250 GEN_INT (1 + 4), GEN_INT (1 + 4));
45251 break;
45252 case V2DFmode:
45253 tem = gen_vec_interleave_highv2df (dest, src, src);
45254 break;
45255 case V16QImode:
45256 case V8HImode:
45257 case V4SImode:
45258 case V2DImode:
45259 d = gen_reg_rtx (V1TImode);
45260 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45261 GEN_INT (i / 2));
45262 break;
45263 case V8SFmode:
45264 if (i == 256)
45265 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45266 else
45267 tem = gen_avx_shufps256 (dest, src, src,
45268 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45269 break;
45270 case V4DFmode:
45271 if (i == 256)
45272 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45273 else
45274 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45275 break;
45276 case V32QImode:
45277 case V16HImode:
45278 case V8SImode:
45279 case V4DImode:
45280 if (i == 256)
45281 {
45282 if (GET_MODE (dest) != V4DImode)
45283 d = gen_reg_rtx (V4DImode);
45284 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45285 gen_lowpart (V4DImode, src),
45286 const1_rtx);
45287 }
45288 else
45289 {
45290 d = gen_reg_rtx (V2TImode);
45291 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45292 GEN_INT (i / 2));
45293 }
45294 break;
45295 case V64QImode:
45296 case V32HImode:
45297 case V16SImode:
45298 case V16SFmode:
45299 case V8DImode:
45300 case V8DFmode:
45301 if (i > 128)
45302 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45303 gen_lowpart (V16SImode, src),
45304 gen_lowpart (V16SImode, src),
45305 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45306 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45307 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45308 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45309 GEN_INT (0xC), GEN_INT (0xD),
45310 GEN_INT (0xE), GEN_INT (0xF),
45311 GEN_INT (0x10), GEN_INT (0x11),
45312 GEN_INT (0x12), GEN_INT (0x13),
45313 GEN_INT (0x14), GEN_INT (0x15),
45314 GEN_INT (0x16), GEN_INT (0x17));
45315 else
45316 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45317 gen_lowpart (V16SImode, src),
45318 GEN_INT (i == 128 ? 0x2 : 0x1),
45319 GEN_INT (0x3),
45320 GEN_INT (0x3),
45321 GEN_INT (0x3),
45322 GEN_INT (i == 128 ? 0x6 : 0x5),
45323 GEN_INT (0x7),
45324 GEN_INT (0x7),
45325 GEN_INT (0x7),
45326 GEN_INT (i == 128 ? 0xA : 0x9),
45327 GEN_INT (0xB),
45328 GEN_INT (0xB),
45329 GEN_INT (0xB),
45330 GEN_INT (i == 128 ? 0xE : 0xD),
45331 GEN_INT (0xF),
45332 GEN_INT (0xF),
45333 GEN_INT (0xF));
45334 break;
45335 default:
45336 gcc_unreachable ();
45337 }
45338 emit_insn (tem);
45339 if (d != dest)
45340 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45341 }
45342
45343 /* Expand a vector reduction. FN is the binary pattern to reduce;
45344 DEST is the destination; IN is the input vector. */
45345
45346 void
45347 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45348 {
45349 rtx half, dst, vec = in;
45350 machine_mode mode = GET_MODE (in);
45351 int i;
45352
45353 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45354 if (TARGET_SSE4_1
45355 && mode == V8HImode
45356 && fn == gen_uminv8hi3)
45357 {
45358 emit_insn (gen_sse4_1_phminposuw (dest, in));
45359 return;
45360 }
45361
45362 for (i = GET_MODE_BITSIZE (mode);
45363 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45364 i >>= 1)
45365 {
45366 half = gen_reg_rtx (mode);
45367 emit_reduc_half (half, vec, i);
45368 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45369 dst = dest;
45370 else
45371 dst = gen_reg_rtx (mode);
45372 emit_insn (fn (dst, half, vec));
45373 vec = dst;
45374 }
45375 }
45376 \f
45377 /* Target hook for scalar_mode_supported_p. */
45378 static bool
45379 ix86_scalar_mode_supported_p (machine_mode mode)
45380 {
45381 if (DECIMAL_FLOAT_MODE_P (mode))
45382 return default_decimal_float_supported_p ();
45383 else if (mode == TFmode)
45384 return true;
45385 else
45386 return default_scalar_mode_supported_p (mode);
45387 }
45388
45389 /* Implements target hook vector_mode_supported_p. */
45390 static bool
45391 ix86_vector_mode_supported_p (machine_mode mode)
45392 {
45393 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45394 return true;
45395 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45396 return true;
45397 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45398 return true;
45399 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45400 return true;
45401 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45402 return true;
45403 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45404 return true;
45405 return false;
45406 }
45407
45408 /* Implement target hook libgcc_floating_mode_supported_p. */
45409 static bool
45410 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45411 {
45412 switch (mode)
45413 {
45414 case SFmode:
45415 case DFmode:
45416 case XFmode:
45417 return true;
45418
45419 case TFmode:
45420 #ifdef IX86_NO_LIBGCC_TFMODE
45421 return false;
45422 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45423 return TARGET_LONG_DOUBLE_128;
45424 #else
45425 return true;
45426 #endif
45427
45428 default:
45429 return false;
45430 }
45431 }
45432
45433 /* Target hook for c_mode_for_suffix. */
45434 static machine_mode
45435 ix86_c_mode_for_suffix (char suffix)
45436 {
45437 if (suffix == 'q')
45438 return TFmode;
45439 if (suffix == 'w')
45440 return XFmode;
45441
45442 return VOIDmode;
45443 }
45444
45445 /* Worker function for TARGET_MD_ASM_ADJUST.
45446
45447 We do this in the new i386 backend to maintain source compatibility
45448 with the old cc0-based compiler. */
45449
45450 static rtx_insn *
45451 ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
45452 vec<const char *> &/*constraints*/,
45453 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45454 {
45455 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45456 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45457
45458 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45459 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45460
45461 return NULL;
45462 }
45463
45464 /* Implements target vector targetm.asm.encode_section_info. */
45465
45466 static void ATTRIBUTE_UNUSED
45467 ix86_encode_section_info (tree decl, rtx rtl, int first)
45468 {
45469 default_encode_section_info (decl, rtl, first);
45470
45471 if (ix86_in_large_data_p (decl))
45472 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45473 }
45474
45475 /* Worker function for REVERSE_CONDITION. */
45476
45477 enum rtx_code
45478 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45479 {
45480 return (mode != CCFPmode && mode != CCFPUmode
45481 ? reverse_condition (code)
45482 : reverse_condition_maybe_unordered (code));
45483 }
45484
45485 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45486 to OPERANDS[0]. */
45487
45488 const char *
45489 output_387_reg_move (rtx insn, rtx *operands)
45490 {
45491 if (REG_P (operands[0]))
45492 {
45493 if (REG_P (operands[1])
45494 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45495 {
45496 if (REGNO (operands[0]) == FIRST_STACK_REG)
45497 return output_387_ffreep (operands, 0);
45498 return "fstp\t%y0";
45499 }
45500 if (STACK_TOP_P (operands[0]))
45501 return "fld%Z1\t%y1";
45502 return "fst\t%y0";
45503 }
45504 else if (MEM_P (operands[0]))
45505 {
45506 gcc_assert (REG_P (operands[1]));
45507 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45508 return "fstp%Z0\t%y0";
45509 else
45510 {
45511 /* There is no non-popping store to memory for XFmode.
45512 So if we need one, follow the store with a load. */
45513 if (GET_MODE (operands[0]) == XFmode)
45514 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45515 else
45516 return "fst%Z0\t%y0";
45517 }
45518 }
45519 else
45520 gcc_unreachable();
45521 }
45522
45523 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45524 FP status register is set. */
45525
45526 void
45527 ix86_emit_fp_unordered_jump (rtx label)
45528 {
45529 rtx reg = gen_reg_rtx (HImode);
45530 rtx temp;
45531
45532 emit_insn (gen_x86_fnstsw_1 (reg));
45533
45534 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45535 {
45536 emit_insn (gen_x86_sahf_1 (reg));
45537
45538 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45539 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45540 }
45541 else
45542 {
45543 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45544
45545 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45546 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45547 }
45548
45549 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45550 gen_rtx_LABEL_REF (VOIDmode, label),
45551 pc_rtx);
45552 temp = gen_rtx_SET (pc_rtx, temp);
45553
45554 emit_jump_insn (temp);
45555 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45556 }
45557
45558 /* Output code to perform a log1p XFmode calculation. */
45559
45560 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45561 {
45562 rtx_code_label *label1 = gen_label_rtx ();
45563 rtx_code_label *label2 = gen_label_rtx ();
45564
45565 rtx tmp = gen_reg_rtx (XFmode);
45566 rtx tmp2 = gen_reg_rtx (XFmode);
45567 rtx test;
45568
45569 emit_insn (gen_absxf2 (tmp, op1));
45570 test = gen_rtx_GE (VOIDmode, tmp,
45571 CONST_DOUBLE_FROM_REAL_VALUE (
45572 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45573 XFmode));
45574 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45575
45576 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45577 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45578 emit_jump (label2);
45579
45580 emit_label (label1);
45581 emit_move_insn (tmp, CONST1_RTX (XFmode));
45582 emit_insn (gen_addxf3 (tmp, op1, tmp));
45583 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45584 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45585
45586 emit_label (label2);
45587 }
45588
45589 /* Emit code for round calculation. */
45590 void ix86_emit_i387_round (rtx op0, rtx op1)
45591 {
45592 machine_mode inmode = GET_MODE (op1);
45593 machine_mode outmode = GET_MODE (op0);
45594 rtx e1, e2, res, tmp, tmp1, half;
45595 rtx scratch = gen_reg_rtx (HImode);
45596 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45597 rtx_code_label *jump_label = gen_label_rtx ();
45598 rtx insn;
45599 rtx (*gen_abs) (rtx, rtx);
45600 rtx (*gen_neg) (rtx, rtx);
45601
45602 switch (inmode)
45603 {
45604 case SFmode:
45605 gen_abs = gen_abssf2;
45606 break;
45607 case DFmode:
45608 gen_abs = gen_absdf2;
45609 break;
45610 case XFmode:
45611 gen_abs = gen_absxf2;
45612 break;
45613 default:
45614 gcc_unreachable ();
45615 }
45616
45617 switch (outmode)
45618 {
45619 case SFmode:
45620 gen_neg = gen_negsf2;
45621 break;
45622 case DFmode:
45623 gen_neg = gen_negdf2;
45624 break;
45625 case XFmode:
45626 gen_neg = gen_negxf2;
45627 break;
45628 case HImode:
45629 gen_neg = gen_neghi2;
45630 break;
45631 case SImode:
45632 gen_neg = gen_negsi2;
45633 break;
45634 case DImode:
45635 gen_neg = gen_negdi2;
45636 break;
45637 default:
45638 gcc_unreachable ();
45639 }
45640
45641 e1 = gen_reg_rtx (inmode);
45642 e2 = gen_reg_rtx (inmode);
45643 res = gen_reg_rtx (outmode);
45644
45645 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45646
45647 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45648
45649 /* scratch = fxam(op1) */
45650 emit_insn (gen_rtx_SET (scratch,
45651 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45652 UNSPEC_FXAM)));
45653 /* e1 = fabs(op1) */
45654 emit_insn (gen_abs (e1, op1));
45655
45656 /* e2 = e1 + 0.5 */
45657 half = force_reg (inmode, half);
45658 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45659
45660 /* res = floor(e2) */
45661 if (inmode != XFmode)
45662 {
45663 tmp1 = gen_reg_rtx (XFmode);
45664
45665 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45666 }
45667 else
45668 tmp1 = e2;
45669
45670 switch (outmode)
45671 {
45672 case SFmode:
45673 case DFmode:
45674 {
45675 rtx tmp0 = gen_reg_rtx (XFmode);
45676
45677 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45678
45679 emit_insn (gen_rtx_SET (res,
45680 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45681 UNSPEC_TRUNC_NOOP)));
45682 }
45683 break;
45684 case XFmode:
45685 emit_insn (gen_frndintxf2_floor (res, tmp1));
45686 break;
45687 case HImode:
45688 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45689 break;
45690 case SImode:
45691 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45692 break;
45693 case DImode:
45694 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45695 break;
45696 default:
45697 gcc_unreachable ();
45698 }
45699
45700 /* flags = signbit(a) */
45701 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45702
45703 /* if (flags) then res = -res */
45704 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45705 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45706 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45707 pc_rtx);
45708 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45709 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45710 JUMP_LABEL (insn) = jump_label;
45711
45712 emit_insn (gen_neg (res, res));
45713
45714 emit_label (jump_label);
45715 LABEL_NUSES (jump_label) = 1;
45716
45717 emit_move_insn (op0, res);
45718 }
45719
45720 /* Output code to perform a Newton-Rhapson approximation of a single precision
45721 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45722
45723 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45724 {
45725 rtx x0, x1, e0, e1;
45726
45727 x0 = gen_reg_rtx (mode);
45728 e0 = gen_reg_rtx (mode);
45729 e1 = gen_reg_rtx (mode);
45730 x1 = gen_reg_rtx (mode);
45731
45732 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45733
45734 b = force_reg (mode, b);
45735
45736 /* x0 = rcp(b) estimate */
45737 if (mode == V16SFmode || mode == V8DFmode)
45738 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45739 UNSPEC_RCP14)));
45740 else
45741 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45742 UNSPEC_RCP)));
45743
45744 /* e0 = x0 * b */
45745 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45746
45747 /* e0 = x0 * e0 */
45748 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45749
45750 /* e1 = x0 + x0 */
45751 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45752
45753 /* x1 = e1 - e0 */
45754 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45755
45756 /* res = a * x1 */
45757 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45758 }
45759
45760 /* Output code to perform a Newton-Rhapson approximation of a
45761 single precision floating point [reciprocal] square root. */
45762
45763 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45764 bool recip)
45765 {
45766 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45767 REAL_VALUE_TYPE r;
45768 int unspec;
45769
45770 x0 = gen_reg_rtx (mode);
45771 e0 = gen_reg_rtx (mode);
45772 e1 = gen_reg_rtx (mode);
45773 e2 = gen_reg_rtx (mode);
45774 e3 = gen_reg_rtx (mode);
45775
45776 real_from_integer (&r, VOIDmode, -3, SIGNED);
45777 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45778
45779 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45780 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45781 unspec = UNSPEC_RSQRT;
45782
45783 if (VECTOR_MODE_P (mode))
45784 {
45785 mthree = ix86_build_const_vector (mode, true, mthree);
45786 mhalf = ix86_build_const_vector (mode, true, mhalf);
45787 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45788 if (GET_MODE_SIZE (mode) == 64)
45789 unspec = UNSPEC_RSQRT14;
45790 }
45791
45792 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45793 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45794
45795 a = force_reg (mode, a);
45796
45797 /* x0 = rsqrt(a) estimate */
45798 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45799 unspec)));
45800
45801 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45802 if (!recip)
45803 {
45804 rtx zero, mask;
45805
45806 zero = gen_reg_rtx (mode);
45807 mask = gen_reg_rtx (mode);
45808
45809 zero = force_reg (mode, CONST0_RTX(mode));
45810
45811 /* Handle masked compare. */
45812 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45813 {
45814 mask = gen_reg_rtx (HImode);
45815 /* Imm value 0x4 corresponds to not-equal comparison. */
45816 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45817 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45818 }
45819 else
45820 {
45821 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45822
45823 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45824 }
45825 }
45826
45827 /* e0 = x0 * a */
45828 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45829 /* e1 = e0 * x0 */
45830 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45831
45832 /* e2 = e1 - 3. */
45833 mthree = force_reg (mode, mthree);
45834 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45835
45836 mhalf = force_reg (mode, mhalf);
45837 if (recip)
45838 /* e3 = -.5 * x0 */
45839 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45840 else
45841 /* e3 = -.5 * e0 */
45842 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45843 /* ret = e2 * e3 */
45844 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45845 }
45846
45847 #ifdef TARGET_SOLARIS
45848 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45849
45850 static void
45851 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45852 tree decl)
45853 {
45854 /* With Binutils 2.15, the "@unwind" marker must be specified on
45855 every occurrence of the ".eh_frame" section, not just the first
45856 one. */
45857 if (TARGET_64BIT
45858 && strcmp (name, ".eh_frame") == 0)
45859 {
45860 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45861 flags & SECTION_WRITE ? "aw" : "a");
45862 return;
45863 }
45864
45865 #ifndef USE_GAS
45866 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45867 {
45868 solaris_elf_asm_comdat_section (name, flags, decl);
45869 return;
45870 }
45871 #endif
45872
45873 default_elf_asm_named_section (name, flags, decl);
45874 }
45875 #endif /* TARGET_SOLARIS */
45876
45877 /* Return the mangling of TYPE if it is an extended fundamental type. */
45878
45879 static const char *
45880 ix86_mangle_type (const_tree type)
45881 {
45882 type = TYPE_MAIN_VARIANT (type);
45883
45884 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45885 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45886 return NULL;
45887
45888 switch (TYPE_MODE (type))
45889 {
45890 case TFmode:
45891 /* __float128 is "g". */
45892 return "g";
45893 case XFmode:
45894 /* "long double" or __float80 is "e". */
45895 return "e";
45896 default:
45897 return NULL;
45898 }
45899 }
45900
45901 /* For 32-bit code we can save PIC register setup by using
45902 __stack_chk_fail_local hidden function instead of calling
45903 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45904 register, so it is better to call __stack_chk_fail directly. */
45905
45906 static tree ATTRIBUTE_UNUSED
45907 ix86_stack_protect_fail (void)
45908 {
45909 return TARGET_64BIT
45910 ? default_external_stack_protect_fail ()
45911 : default_hidden_stack_protect_fail ();
45912 }
45913
45914 /* Select a format to encode pointers in exception handling data. CODE
45915 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45916 true if the symbol may be affected by dynamic relocations.
45917
45918 ??? All x86 object file formats are capable of representing this.
45919 After all, the relocation needed is the same as for the call insn.
45920 Whether or not a particular assembler allows us to enter such, I
45921 guess we'll have to see. */
45922 int
45923 asm_preferred_eh_data_format (int code, int global)
45924 {
45925 if (flag_pic)
45926 {
45927 int type = DW_EH_PE_sdata8;
45928 if (!TARGET_64BIT
45929 || ix86_cmodel == CM_SMALL_PIC
45930 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45931 type = DW_EH_PE_sdata4;
45932 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45933 }
45934 if (ix86_cmodel == CM_SMALL
45935 || (ix86_cmodel == CM_MEDIUM && code))
45936 return DW_EH_PE_udata4;
45937 return DW_EH_PE_absptr;
45938 }
45939 \f
45940 /* Expand copysign from SIGN to the positive value ABS_VALUE
45941 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45942 the sign-bit. */
45943 static void
45944 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45945 {
45946 machine_mode mode = GET_MODE (sign);
45947 rtx sgn = gen_reg_rtx (mode);
45948 if (mask == NULL_RTX)
45949 {
45950 machine_mode vmode;
45951
45952 if (mode == SFmode)
45953 vmode = V4SFmode;
45954 else if (mode == DFmode)
45955 vmode = V2DFmode;
45956 else
45957 vmode = mode;
45958
45959 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45960 if (!VECTOR_MODE_P (mode))
45961 {
45962 /* We need to generate a scalar mode mask in this case. */
45963 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45964 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45965 mask = gen_reg_rtx (mode);
45966 emit_insn (gen_rtx_SET (mask, tmp));
45967 }
45968 }
45969 else
45970 mask = gen_rtx_NOT (mode, mask);
45971 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45972 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45973 }
45974
45975 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45976 mask for masking out the sign-bit is stored in *SMASK, if that is
45977 non-null. */
45978 static rtx
45979 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45980 {
45981 machine_mode vmode, mode = GET_MODE (op0);
45982 rtx xa, mask;
45983
45984 xa = gen_reg_rtx (mode);
45985 if (mode == SFmode)
45986 vmode = V4SFmode;
45987 else if (mode == DFmode)
45988 vmode = V2DFmode;
45989 else
45990 vmode = mode;
45991 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45992 if (!VECTOR_MODE_P (mode))
45993 {
45994 /* We need to generate a scalar mode mask in this case. */
45995 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45996 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45997 mask = gen_reg_rtx (mode);
45998 emit_insn (gen_rtx_SET (mask, tmp));
45999 }
46000 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46001
46002 if (smask)
46003 *smask = mask;
46004
46005 return xa;
46006 }
46007
46008 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46009 swapping the operands if SWAP_OPERANDS is true. The expanded
46010 code is a forward jump to a newly created label in case the
46011 comparison is true. The generated label rtx is returned. */
46012 static rtx_code_label *
46013 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46014 bool swap_operands)
46015 {
46016 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46017 rtx_code_label *label;
46018 rtx tmp;
46019
46020 if (swap_operands)
46021 std::swap (op0, op1);
46022
46023 label = gen_label_rtx ();
46024 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46025 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46026 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46027 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46028 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46029 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46030 JUMP_LABEL (tmp) = label;
46031
46032 return label;
46033 }
46034
46035 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46036 using comparison code CODE. Operands are swapped for the comparison if
46037 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46038 static rtx
46039 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46040 bool swap_operands)
46041 {
46042 rtx (*insn)(rtx, rtx, rtx, rtx);
46043 machine_mode mode = GET_MODE (op0);
46044 rtx mask = gen_reg_rtx (mode);
46045
46046 if (swap_operands)
46047 std::swap (op0, op1);
46048
46049 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46050
46051 emit_insn (insn (mask, op0, op1,
46052 gen_rtx_fmt_ee (code, mode, op0, op1)));
46053 return mask;
46054 }
46055
46056 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46057 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46058 static rtx
46059 ix86_gen_TWO52 (machine_mode mode)
46060 {
46061 REAL_VALUE_TYPE TWO52r;
46062 rtx TWO52;
46063
46064 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46065 TWO52 = const_double_from_real_value (TWO52r, mode);
46066 TWO52 = force_reg (mode, TWO52);
46067
46068 return TWO52;
46069 }
46070
46071 /* Expand SSE sequence for computing lround from OP1 storing
46072 into OP0. */
46073 void
46074 ix86_expand_lround (rtx op0, rtx op1)
46075 {
46076 /* C code for the stuff we're doing below:
46077 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46078 return (long)tmp;
46079 */
46080 machine_mode mode = GET_MODE (op1);
46081 const struct real_format *fmt;
46082 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46083 rtx adj;
46084
46085 /* load nextafter (0.5, 0.0) */
46086 fmt = REAL_MODE_FORMAT (mode);
46087 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46088 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46089
46090 /* adj = copysign (0.5, op1) */
46091 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46092 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46093
46094 /* adj = op1 + adj */
46095 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46096
46097 /* op0 = (imode)adj */
46098 expand_fix (op0, adj, 0);
46099 }
46100
46101 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46102 into OPERAND0. */
46103 void
46104 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46105 {
46106 /* C code for the stuff we're doing below (for do_floor):
46107 xi = (long)op1;
46108 xi -= (double)xi > op1 ? 1 : 0;
46109 return xi;
46110 */
46111 machine_mode fmode = GET_MODE (op1);
46112 machine_mode imode = GET_MODE (op0);
46113 rtx ireg, freg, tmp;
46114 rtx_code_label *label;
46115
46116 /* reg = (long)op1 */
46117 ireg = gen_reg_rtx (imode);
46118 expand_fix (ireg, op1, 0);
46119
46120 /* freg = (double)reg */
46121 freg = gen_reg_rtx (fmode);
46122 expand_float (freg, ireg, 0);
46123
46124 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46125 label = ix86_expand_sse_compare_and_jump (UNLE,
46126 freg, op1, !do_floor);
46127 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46128 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46129 emit_move_insn (ireg, tmp);
46130
46131 emit_label (label);
46132 LABEL_NUSES (label) = 1;
46133
46134 emit_move_insn (op0, ireg);
46135 }
46136
46137 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46138 result in OPERAND0. */
46139 void
46140 ix86_expand_rint (rtx operand0, rtx operand1)
46141 {
46142 /* C code for the stuff we're doing below:
46143 xa = fabs (operand1);
46144 if (!isless (xa, 2**52))
46145 return operand1;
46146 xa = xa + 2**52 - 2**52;
46147 return copysign (xa, operand1);
46148 */
46149 machine_mode mode = GET_MODE (operand0);
46150 rtx res, xa, TWO52, mask;
46151 rtx_code_label *label;
46152
46153 res = gen_reg_rtx (mode);
46154 emit_move_insn (res, operand1);
46155
46156 /* xa = abs (operand1) */
46157 xa = ix86_expand_sse_fabs (res, &mask);
46158
46159 /* if (!isless (xa, TWO52)) goto label; */
46160 TWO52 = ix86_gen_TWO52 (mode);
46161 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46162
46163 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46164 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46165
46166 ix86_sse_copysign_to_positive (res, xa, res, mask);
46167
46168 emit_label (label);
46169 LABEL_NUSES (label) = 1;
46170
46171 emit_move_insn (operand0, res);
46172 }
46173
46174 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46175 into OPERAND0. */
46176 void
46177 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46178 {
46179 /* C code for the stuff we expand below.
46180 double xa = fabs (x), x2;
46181 if (!isless (xa, TWO52))
46182 return x;
46183 xa = xa + TWO52 - TWO52;
46184 x2 = copysign (xa, x);
46185 Compensate. Floor:
46186 if (x2 > x)
46187 x2 -= 1;
46188 Compensate. Ceil:
46189 if (x2 < x)
46190 x2 -= -1;
46191 return x2;
46192 */
46193 machine_mode mode = GET_MODE (operand0);
46194 rtx xa, TWO52, tmp, one, res, mask;
46195 rtx_code_label *label;
46196
46197 TWO52 = ix86_gen_TWO52 (mode);
46198
46199 /* Temporary for holding the result, initialized to the input
46200 operand to ease control flow. */
46201 res = gen_reg_rtx (mode);
46202 emit_move_insn (res, operand1);
46203
46204 /* xa = abs (operand1) */
46205 xa = ix86_expand_sse_fabs (res, &mask);
46206
46207 /* if (!isless (xa, TWO52)) goto label; */
46208 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46209
46210 /* xa = xa + TWO52 - TWO52; */
46211 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46212 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46213
46214 /* xa = copysign (xa, operand1) */
46215 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46216
46217 /* generate 1.0 or -1.0 */
46218 one = force_reg (mode,
46219 const_double_from_real_value (do_floor
46220 ? dconst1 : dconstm1, mode));
46221
46222 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46223 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46224 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46225 /* We always need to subtract here to preserve signed zero. */
46226 tmp = expand_simple_binop (mode, MINUS,
46227 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46228 emit_move_insn (res, tmp);
46229
46230 emit_label (label);
46231 LABEL_NUSES (label) = 1;
46232
46233 emit_move_insn (operand0, res);
46234 }
46235
46236 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46237 into OPERAND0. */
46238 void
46239 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46240 {
46241 /* C code for the stuff we expand below.
46242 double xa = fabs (x), x2;
46243 if (!isless (xa, TWO52))
46244 return x;
46245 x2 = (double)(long)x;
46246 Compensate. Floor:
46247 if (x2 > x)
46248 x2 -= 1;
46249 Compensate. Ceil:
46250 if (x2 < x)
46251 x2 += 1;
46252 if (HONOR_SIGNED_ZEROS (mode))
46253 return copysign (x2, x);
46254 return x2;
46255 */
46256 machine_mode mode = GET_MODE (operand0);
46257 rtx xa, xi, TWO52, tmp, one, res, mask;
46258 rtx_code_label *label;
46259
46260 TWO52 = ix86_gen_TWO52 (mode);
46261
46262 /* Temporary for holding the result, initialized to the input
46263 operand to ease control flow. */
46264 res = gen_reg_rtx (mode);
46265 emit_move_insn (res, operand1);
46266
46267 /* xa = abs (operand1) */
46268 xa = ix86_expand_sse_fabs (res, &mask);
46269
46270 /* if (!isless (xa, TWO52)) goto label; */
46271 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46272
46273 /* xa = (double)(long)x */
46274 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46275 expand_fix (xi, res, 0);
46276 expand_float (xa, xi, 0);
46277
46278 /* generate 1.0 */
46279 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46280
46281 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46282 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46283 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46284 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46285 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46286 emit_move_insn (res, tmp);
46287
46288 if (HONOR_SIGNED_ZEROS (mode))
46289 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46290
46291 emit_label (label);
46292 LABEL_NUSES (label) = 1;
46293
46294 emit_move_insn (operand0, res);
46295 }
46296
46297 /* Expand SSE sequence for computing round from OPERAND1 storing
46298 into OPERAND0. Sequence that works without relying on DImode truncation
46299 via cvttsd2siq that is only available on 64bit targets. */
46300 void
46301 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46302 {
46303 /* C code for the stuff we expand below.
46304 double xa = fabs (x), xa2, x2;
46305 if (!isless (xa, TWO52))
46306 return x;
46307 Using the absolute value and copying back sign makes
46308 -0.0 -> -0.0 correct.
46309 xa2 = xa + TWO52 - TWO52;
46310 Compensate.
46311 dxa = xa2 - xa;
46312 if (dxa <= -0.5)
46313 xa2 += 1;
46314 else if (dxa > 0.5)
46315 xa2 -= 1;
46316 x2 = copysign (xa2, x);
46317 return x2;
46318 */
46319 machine_mode mode = GET_MODE (operand0);
46320 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46321 rtx_code_label *label;
46322
46323 TWO52 = ix86_gen_TWO52 (mode);
46324
46325 /* Temporary for holding the result, initialized to the input
46326 operand to ease control flow. */
46327 res = gen_reg_rtx (mode);
46328 emit_move_insn (res, operand1);
46329
46330 /* xa = abs (operand1) */
46331 xa = ix86_expand_sse_fabs (res, &mask);
46332
46333 /* if (!isless (xa, TWO52)) goto label; */
46334 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46335
46336 /* xa2 = xa + TWO52 - TWO52; */
46337 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46338 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46339
46340 /* dxa = xa2 - xa; */
46341 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46342
46343 /* generate 0.5, 1.0 and -0.5 */
46344 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46345 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46346 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46347 0, OPTAB_DIRECT);
46348
46349 /* Compensate. */
46350 tmp = gen_reg_rtx (mode);
46351 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46352 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46353 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46354 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46355 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46356 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46357 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46358 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46359
46360 /* res = copysign (xa2, operand1) */
46361 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46362
46363 emit_label (label);
46364 LABEL_NUSES (label) = 1;
46365
46366 emit_move_insn (operand0, res);
46367 }
46368
46369 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46370 into OPERAND0. */
46371 void
46372 ix86_expand_trunc (rtx operand0, rtx operand1)
46373 {
46374 /* C code for SSE variant we expand below.
46375 double xa = fabs (x), x2;
46376 if (!isless (xa, TWO52))
46377 return x;
46378 x2 = (double)(long)x;
46379 if (HONOR_SIGNED_ZEROS (mode))
46380 return copysign (x2, x);
46381 return x2;
46382 */
46383 machine_mode mode = GET_MODE (operand0);
46384 rtx xa, xi, TWO52, res, mask;
46385 rtx_code_label *label;
46386
46387 TWO52 = ix86_gen_TWO52 (mode);
46388
46389 /* Temporary for holding the result, initialized to the input
46390 operand to ease control flow. */
46391 res = gen_reg_rtx (mode);
46392 emit_move_insn (res, operand1);
46393
46394 /* xa = abs (operand1) */
46395 xa = ix86_expand_sse_fabs (res, &mask);
46396
46397 /* if (!isless (xa, TWO52)) goto label; */
46398 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46399
46400 /* x = (double)(long)x */
46401 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46402 expand_fix (xi, res, 0);
46403 expand_float (res, xi, 0);
46404
46405 if (HONOR_SIGNED_ZEROS (mode))
46406 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46407
46408 emit_label (label);
46409 LABEL_NUSES (label) = 1;
46410
46411 emit_move_insn (operand0, res);
46412 }
46413
46414 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46415 into OPERAND0. */
46416 void
46417 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46418 {
46419 machine_mode mode = GET_MODE (operand0);
46420 rtx xa, mask, TWO52, one, res, smask, tmp;
46421 rtx_code_label *label;
46422
46423 /* C code for SSE variant we expand below.
46424 double xa = fabs (x), x2;
46425 if (!isless (xa, TWO52))
46426 return x;
46427 xa2 = xa + TWO52 - TWO52;
46428 Compensate:
46429 if (xa2 > xa)
46430 xa2 -= 1.0;
46431 x2 = copysign (xa2, x);
46432 return x2;
46433 */
46434
46435 TWO52 = ix86_gen_TWO52 (mode);
46436
46437 /* Temporary for holding the result, initialized to the input
46438 operand to ease control flow. */
46439 res = gen_reg_rtx (mode);
46440 emit_move_insn (res, operand1);
46441
46442 /* xa = abs (operand1) */
46443 xa = ix86_expand_sse_fabs (res, &smask);
46444
46445 /* if (!isless (xa, TWO52)) goto label; */
46446 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46447
46448 /* res = xa + TWO52 - TWO52; */
46449 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46450 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46451 emit_move_insn (res, tmp);
46452
46453 /* generate 1.0 */
46454 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46455
46456 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46457 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46458 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46459 tmp = expand_simple_binop (mode, MINUS,
46460 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46461 emit_move_insn (res, tmp);
46462
46463 /* res = copysign (res, operand1) */
46464 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46465
46466 emit_label (label);
46467 LABEL_NUSES (label) = 1;
46468
46469 emit_move_insn (operand0, res);
46470 }
46471
46472 /* Expand SSE sequence for computing round from OPERAND1 storing
46473 into OPERAND0. */
46474 void
46475 ix86_expand_round (rtx operand0, rtx operand1)
46476 {
46477 /* C code for the stuff we're doing below:
46478 double xa = fabs (x);
46479 if (!isless (xa, TWO52))
46480 return x;
46481 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46482 return copysign (xa, x);
46483 */
46484 machine_mode mode = GET_MODE (operand0);
46485 rtx res, TWO52, xa, xi, half, mask;
46486 rtx_code_label *label;
46487 const struct real_format *fmt;
46488 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46489
46490 /* Temporary for holding the result, initialized to the input
46491 operand to ease control flow. */
46492 res = gen_reg_rtx (mode);
46493 emit_move_insn (res, operand1);
46494
46495 TWO52 = ix86_gen_TWO52 (mode);
46496 xa = ix86_expand_sse_fabs (res, &mask);
46497 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46498
46499 /* load nextafter (0.5, 0.0) */
46500 fmt = REAL_MODE_FORMAT (mode);
46501 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46502 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46503
46504 /* xa = xa + 0.5 */
46505 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46506 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46507
46508 /* xa = (double)(int64_t)xa */
46509 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46510 expand_fix (xi, xa, 0);
46511 expand_float (xa, xi, 0);
46512
46513 /* res = copysign (xa, operand1) */
46514 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46515
46516 emit_label (label);
46517 LABEL_NUSES (label) = 1;
46518
46519 emit_move_insn (operand0, res);
46520 }
46521
46522 /* Expand SSE sequence for computing round
46523 from OP1 storing into OP0 using sse4 round insn. */
46524 void
46525 ix86_expand_round_sse4 (rtx op0, rtx op1)
46526 {
46527 machine_mode mode = GET_MODE (op0);
46528 rtx e1, e2, res, half;
46529 const struct real_format *fmt;
46530 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46531 rtx (*gen_copysign) (rtx, rtx, rtx);
46532 rtx (*gen_round) (rtx, rtx, rtx);
46533
46534 switch (mode)
46535 {
46536 case SFmode:
46537 gen_copysign = gen_copysignsf3;
46538 gen_round = gen_sse4_1_roundsf2;
46539 break;
46540 case DFmode:
46541 gen_copysign = gen_copysigndf3;
46542 gen_round = gen_sse4_1_rounddf2;
46543 break;
46544 default:
46545 gcc_unreachable ();
46546 }
46547
46548 /* round (a) = trunc (a + copysign (0.5, a)) */
46549
46550 /* load nextafter (0.5, 0.0) */
46551 fmt = REAL_MODE_FORMAT (mode);
46552 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46553 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46554 half = const_double_from_real_value (pred_half, mode);
46555
46556 /* e1 = copysign (0.5, op1) */
46557 e1 = gen_reg_rtx (mode);
46558 emit_insn (gen_copysign (e1, half, op1));
46559
46560 /* e2 = op1 + e1 */
46561 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46562
46563 /* res = trunc (e2) */
46564 res = gen_reg_rtx (mode);
46565 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46566
46567 emit_move_insn (op0, res);
46568 }
46569 \f
46570
46571 /* Table of valid machine attributes. */
46572 static const struct attribute_spec ix86_attribute_table[] =
46573 {
46574 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46575 affects_type_identity } */
46576 /* Stdcall attribute says callee is responsible for popping arguments
46577 if they are not variable. */
46578 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46579 true },
46580 /* Fastcall attribute says callee is responsible for popping arguments
46581 if they are not variable. */
46582 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46583 true },
46584 /* Thiscall attribute says callee is responsible for popping arguments
46585 if they are not variable. */
46586 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46587 true },
46588 /* Cdecl attribute says the callee is a normal C declaration */
46589 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46590 true },
46591 /* Regparm attribute specifies how many integer arguments are to be
46592 passed in registers. */
46593 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46594 true },
46595 /* Sseregparm attribute says we are using x86_64 calling conventions
46596 for FP arguments. */
46597 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46598 true },
46599 /* The transactional memory builtins are implicitly regparm or fastcall
46600 depending on the ABI. Override the generic do-nothing attribute that
46601 these builtins were declared with. */
46602 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46603 true },
46604 /* force_align_arg_pointer says this function realigns the stack at entry. */
46605 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46606 false, true, true, ix86_handle_cconv_attribute, false },
46607 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46608 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46609 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46610 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46611 false },
46612 #endif
46613 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46614 false },
46615 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46616 false },
46617 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46618 SUBTARGET_ATTRIBUTE_TABLE,
46619 #endif
46620 /* ms_abi and sysv_abi calling convention function attributes. */
46621 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46622 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46623 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46624 false },
46625 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46626 ix86_handle_callee_pop_aggregate_return, true },
46627 /* End element. */
46628 { NULL, 0, 0, false, false, false, NULL, false }
46629 };
46630
46631 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46632 static int
46633 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46634 tree vectype, int)
46635 {
46636 unsigned elements;
46637
46638 switch (type_of_cost)
46639 {
46640 case scalar_stmt:
46641 return ix86_cost->scalar_stmt_cost;
46642
46643 case scalar_load:
46644 return ix86_cost->scalar_load_cost;
46645
46646 case scalar_store:
46647 return ix86_cost->scalar_store_cost;
46648
46649 case vector_stmt:
46650 return ix86_cost->vec_stmt_cost;
46651
46652 case vector_load:
46653 return ix86_cost->vec_align_load_cost;
46654
46655 case vector_store:
46656 return ix86_cost->vec_store_cost;
46657
46658 case vec_to_scalar:
46659 return ix86_cost->vec_to_scalar_cost;
46660
46661 case scalar_to_vec:
46662 return ix86_cost->scalar_to_vec_cost;
46663
46664 case unaligned_load:
46665 case unaligned_store:
46666 return ix86_cost->vec_unalign_load_cost;
46667
46668 case cond_branch_taken:
46669 return ix86_cost->cond_taken_branch_cost;
46670
46671 case cond_branch_not_taken:
46672 return ix86_cost->cond_not_taken_branch_cost;
46673
46674 case vec_perm:
46675 case vec_promote_demote:
46676 return ix86_cost->vec_stmt_cost;
46677
46678 case vec_construct:
46679 elements = TYPE_VECTOR_SUBPARTS (vectype);
46680 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46681
46682 default:
46683 gcc_unreachable ();
46684 }
46685 }
46686
46687 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46688 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46689 insn every time. */
46690
46691 static GTY(()) rtx_insn *vselect_insn;
46692
46693 /* Initialize vselect_insn. */
46694
46695 static void
46696 init_vselect_insn (void)
46697 {
46698 unsigned i;
46699 rtx x;
46700
46701 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46702 for (i = 0; i < MAX_VECT_LEN; ++i)
46703 XVECEXP (x, 0, i) = const0_rtx;
46704 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46705 const0_rtx), x);
46706 x = gen_rtx_SET (const0_rtx, x);
46707 start_sequence ();
46708 vselect_insn = emit_insn (x);
46709 end_sequence ();
46710 }
46711
46712 /* Construct (set target (vec_select op0 (parallel perm))) and
46713 return true if that's a valid instruction in the active ISA. */
46714
46715 static bool
46716 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46717 unsigned nelt, bool testing_p)
46718 {
46719 unsigned int i;
46720 rtx x, save_vconcat;
46721 int icode;
46722
46723 if (vselect_insn == NULL_RTX)
46724 init_vselect_insn ();
46725
46726 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46727 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46728 for (i = 0; i < nelt; ++i)
46729 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46730 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46731 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46732 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46733 SET_DEST (PATTERN (vselect_insn)) = target;
46734 icode = recog_memoized (vselect_insn);
46735
46736 if (icode >= 0 && !testing_p)
46737 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46738
46739 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46740 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46741 INSN_CODE (vselect_insn) = -1;
46742
46743 return icode >= 0;
46744 }
46745
46746 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46747
46748 static bool
46749 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46750 const unsigned char *perm, unsigned nelt,
46751 bool testing_p)
46752 {
46753 machine_mode v2mode;
46754 rtx x;
46755 bool ok;
46756
46757 if (vselect_insn == NULL_RTX)
46758 init_vselect_insn ();
46759
46760 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46761 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46762 PUT_MODE (x, v2mode);
46763 XEXP (x, 0) = op0;
46764 XEXP (x, 1) = op1;
46765 ok = expand_vselect (target, x, perm, nelt, testing_p);
46766 XEXP (x, 0) = const0_rtx;
46767 XEXP (x, 1) = const0_rtx;
46768 return ok;
46769 }
46770
46771 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46772 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46773
46774 static bool
46775 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46776 {
46777 machine_mode vmode = d->vmode;
46778 unsigned i, mask, nelt = d->nelt;
46779 rtx target, op0, op1, x;
46780 rtx rperm[32], vperm;
46781
46782 if (d->one_operand_p)
46783 return false;
46784 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46785 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46786 ;
46787 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46788 ;
46789 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46790 ;
46791 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46792 ;
46793 else
46794 return false;
46795
46796 /* This is a blend, not a permute. Elements must stay in their
46797 respective lanes. */
46798 for (i = 0; i < nelt; ++i)
46799 {
46800 unsigned e = d->perm[i];
46801 if (!(e == i || e == i + nelt))
46802 return false;
46803 }
46804
46805 if (d->testing_p)
46806 return true;
46807
46808 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46809 decision should be extracted elsewhere, so that we only try that
46810 sequence once all budget==3 options have been tried. */
46811 target = d->target;
46812 op0 = d->op0;
46813 op1 = d->op1;
46814 mask = 0;
46815
46816 switch (vmode)
46817 {
46818 case V8DFmode:
46819 case V16SFmode:
46820 case V4DFmode:
46821 case V8SFmode:
46822 case V2DFmode:
46823 case V4SFmode:
46824 case V8HImode:
46825 case V8SImode:
46826 case V32HImode:
46827 case V64QImode:
46828 case V16SImode:
46829 case V8DImode:
46830 for (i = 0; i < nelt; ++i)
46831 mask |= (d->perm[i] >= nelt) << i;
46832 break;
46833
46834 case V2DImode:
46835 for (i = 0; i < 2; ++i)
46836 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46837 vmode = V8HImode;
46838 goto do_subreg;
46839
46840 case V4SImode:
46841 for (i = 0; i < 4; ++i)
46842 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46843 vmode = V8HImode;
46844 goto do_subreg;
46845
46846 case V16QImode:
46847 /* See if bytes move in pairs so we can use pblendw with
46848 an immediate argument, rather than pblendvb with a vector
46849 argument. */
46850 for (i = 0; i < 16; i += 2)
46851 if (d->perm[i] + 1 != d->perm[i + 1])
46852 {
46853 use_pblendvb:
46854 for (i = 0; i < nelt; ++i)
46855 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46856
46857 finish_pblendvb:
46858 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46859 vperm = force_reg (vmode, vperm);
46860
46861 if (GET_MODE_SIZE (vmode) == 16)
46862 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46863 else
46864 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46865 if (target != d->target)
46866 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46867 return true;
46868 }
46869
46870 for (i = 0; i < 8; ++i)
46871 mask |= (d->perm[i * 2] >= 16) << i;
46872 vmode = V8HImode;
46873 /* FALLTHRU */
46874
46875 do_subreg:
46876 target = gen_reg_rtx (vmode);
46877 op0 = gen_lowpart (vmode, op0);
46878 op1 = gen_lowpart (vmode, op1);
46879 break;
46880
46881 case V32QImode:
46882 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46883 for (i = 0; i < 32; i += 2)
46884 if (d->perm[i] + 1 != d->perm[i + 1])
46885 goto use_pblendvb;
46886 /* See if bytes move in quadruplets. If yes, vpblendd
46887 with immediate can be used. */
46888 for (i = 0; i < 32; i += 4)
46889 if (d->perm[i] + 2 != d->perm[i + 2])
46890 break;
46891 if (i < 32)
46892 {
46893 /* See if bytes move the same in both lanes. If yes,
46894 vpblendw with immediate can be used. */
46895 for (i = 0; i < 16; i += 2)
46896 if (d->perm[i] + 16 != d->perm[i + 16])
46897 goto use_pblendvb;
46898
46899 /* Use vpblendw. */
46900 for (i = 0; i < 16; ++i)
46901 mask |= (d->perm[i * 2] >= 32) << i;
46902 vmode = V16HImode;
46903 goto do_subreg;
46904 }
46905
46906 /* Use vpblendd. */
46907 for (i = 0; i < 8; ++i)
46908 mask |= (d->perm[i * 4] >= 32) << i;
46909 vmode = V8SImode;
46910 goto do_subreg;
46911
46912 case V16HImode:
46913 /* See if words move in pairs. If yes, vpblendd can be used. */
46914 for (i = 0; i < 16; i += 2)
46915 if (d->perm[i] + 1 != d->perm[i + 1])
46916 break;
46917 if (i < 16)
46918 {
46919 /* See if words move the same in both lanes. If not,
46920 vpblendvb must be used. */
46921 for (i = 0; i < 8; i++)
46922 if (d->perm[i] + 8 != d->perm[i + 8])
46923 {
46924 /* Use vpblendvb. */
46925 for (i = 0; i < 32; ++i)
46926 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46927
46928 vmode = V32QImode;
46929 nelt = 32;
46930 target = gen_reg_rtx (vmode);
46931 op0 = gen_lowpart (vmode, op0);
46932 op1 = gen_lowpart (vmode, op1);
46933 goto finish_pblendvb;
46934 }
46935
46936 /* Use vpblendw. */
46937 for (i = 0; i < 16; ++i)
46938 mask |= (d->perm[i] >= 16) << i;
46939 break;
46940 }
46941
46942 /* Use vpblendd. */
46943 for (i = 0; i < 8; ++i)
46944 mask |= (d->perm[i * 2] >= 16) << i;
46945 vmode = V8SImode;
46946 goto do_subreg;
46947
46948 case V4DImode:
46949 /* Use vpblendd. */
46950 for (i = 0; i < 4; ++i)
46951 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46952 vmode = V8SImode;
46953 goto do_subreg;
46954
46955 default:
46956 gcc_unreachable ();
46957 }
46958
46959 /* This matches five different patterns with the different modes. */
46960 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46961 x = gen_rtx_SET (target, x);
46962 emit_insn (x);
46963 if (target != d->target)
46964 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46965
46966 return true;
46967 }
46968
46969 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46970 in terms of the variable form of vpermilps.
46971
46972 Note that we will have already failed the immediate input vpermilps,
46973 which requires that the high and low part shuffle be identical; the
46974 variable form doesn't require that. */
46975
46976 static bool
46977 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46978 {
46979 rtx rperm[8], vperm;
46980 unsigned i;
46981
46982 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46983 return false;
46984
46985 /* We can only permute within the 128-bit lane. */
46986 for (i = 0; i < 8; ++i)
46987 {
46988 unsigned e = d->perm[i];
46989 if (i < 4 ? e >= 4 : e < 4)
46990 return false;
46991 }
46992
46993 if (d->testing_p)
46994 return true;
46995
46996 for (i = 0; i < 8; ++i)
46997 {
46998 unsigned e = d->perm[i];
46999
47000 /* Within each 128-bit lane, the elements of op0 are numbered
47001 from 0 and the elements of op1 are numbered from 4. */
47002 if (e >= 8 + 4)
47003 e -= 8;
47004 else if (e >= 4)
47005 e -= 4;
47006
47007 rperm[i] = GEN_INT (e);
47008 }
47009
47010 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47011 vperm = force_reg (V8SImode, vperm);
47012 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47013
47014 return true;
47015 }
47016
47017 /* Return true if permutation D can be performed as VMODE permutation
47018 instead. */
47019
47020 static bool
47021 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47022 {
47023 unsigned int i, j, chunk;
47024
47025 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47026 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47027 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47028 return false;
47029
47030 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47031 return true;
47032
47033 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47034 for (i = 0; i < d->nelt; i += chunk)
47035 if (d->perm[i] & (chunk - 1))
47036 return false;
47037 else
47038 for (j = 1; j < chunk; ++j)
47039 if (d->perm[i] + j != d->perm[i + j])
47040 return false;
47041
47042 return true;
47043 }
47044
47045 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47046 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47047
47048 static bool
47049 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47050 {
47051 unsigned i, nelt, eltsz, mask;
47052 unsigned char perm[64];
47053 machine_mode vmode = V16QImode;
47054 rtx rperm[64], vperm, target, op0, op1;
47055
47056 nelt = d->nelt;
47057
47058 if (!d->one_operand_p)
47059 {
47060 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47061 {
47062 if (TARGET_AVX2
47063 && valid_perm_using_mode_p (V2TImode, d))
47064 {
47065 if (d->testing_p)
47066 return true;
47067
47068 /* Use vperm2i128 insn. The pattern uses
47069 V4DImode instead of V2TImode. */
47070 target = d->target;
47071 if (d->vmode != V4DImode)
47072 target = gen_reg_rtx (V4DImode);
47073 op0 = gen_lowpart (V4DImode, d->op0);
47074 op1 = gen_lowpart (V4DImode, d->op1);
47075 rperm[0]
47076 = GEN_INT ((d->perm[0] / (nelt / 2))
47077 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47078 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47079 if (target != d->target)
47080 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47081 return true;
47082 }
47083 return false;
47084 }
47085 }
47086 else
47087 {
47088 if (GET_MODE_SIZE (d->vmode) == 16)
47089 {
47090 if (!TARGET_SSSE3)
47091 return false;
47092 }
47093 else if (GET_MODE_SIZE (d->vmode) == 32)
47094 {
47095 if (!TARGET_AVX2)
47096 return false;
47097
47098 /* V4DImode should be already handled through
47099 expand_vselect by vpermq instruction. */
47100 gcc_assert (d->vmode != V4DImode);
47101
47102 vmode = V32QImode;
47103 if (d->vmode == V8SImode
47104 || d->vmode == V16HImode
47105 || d->vmode == V32QImode)
47106 {
47107 /* First see if vpermq can be used for
47108 V8SImode/V16HImode/V32QImode. */
47109 if (valid_perm_using_mode_p (V4DImode, d))
47110 {
47111 for (i = 0; i < 4; i++)
47112 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47113 if (d->testing_p)
47114 return true;
47115 target = gen_reg_rtx (V4DImode);
47116 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47117 perm, 4, false))
47118 {
47119 emit_move_insn (d->target,
47120 gen_lowpart (d->vmode, target));
47121 return true;
47122 }
47123 return false;
47124 }
47125
47126 /* Next see if vpermd can be used. */
47127 if (valid_perm_using_mode_p (V8SImode, d))
47128 vmode = V8SImode;
47129 }
47130 /* Or if vpermps can be used. */
47131 else if (d->vmode == V8SFmode)
47132 vmode = V8SImode;
47133
47134 if (vmode == V32QImode)
47135 {
47136 /* vpshufb only works intra lanes, it is not
47137 possible to shuffle bytes in between the lanes. */
47138 for (i = 0; i < nelt; ++i)
47139 if ((d->perm[i] ^ i) & (nelt / 2))
47140 return false;
47141 }
47142 }
47143 else if (GET_MODE_SIZE (d->vmode) == 64)
47144 {
47145 if (!TARGET_AVX512BW)
47146 return false;
47147
47148 /* If vpermq didn't work, vpshufb won't work either. */
47149 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47150 return false;
47151
47152 vmode = V64QImode;
47153 if (d->vmode == V16SImode
47154 || d->vmode == V32HImode
47155 || d->vmode == V64QImode)
47156 {
47157 /* First see if vpermq can be used for
47158 V16SImode/V32HImode/V64QImode. */
47159 if (valid_perm_using_mode_p (V8DImode, d))
47160 {
47161 for (i = 0; i < 8; i++)
47162 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47163 if (d->testing_p)
47164 return true;
47165 target = gen_reg_rtx (V8DImode);
47166 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47167 perm, 8, false))
47168 {
47169 emit_move_insn (d->target,
47170 gen_lowpart (d->vmode, target));
47171 return true;
47172 }
47173 return false;
47174 }
47175
47176 /* Next see if vpermd can be used. */
47177 if (valid_perm_using_mode_p (V16SImode, d))
47178 vmode = V16SImode;
47179 }
47180 /* Or if vpermps can be used. */
47181 else if (d->vmode == V16SFmode)
47182 vmode = V16SImode;
47183 if (vmode == V64QImode)
47184 {
47185 /* vpshufb only works intra lanes, it is not
47186 possible to shuffle bytes in between the lanes. */
47187 for (i = 0; i < nelt; ++i)
47188 if ((d->perm[i] ^ i) & (nelt / 4))
47189 return false;
47190 }
47191 }
47192 else
47193 return false;
47194 }
47195
47196 if (d->testing_p)
47197 return true;
47198
47199 if (vmode == V8SImode)
47200 for (i = 0; i < 8; ++i)
47201 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47202 else if (vmode == V16SImode)
47203 for (i = 0; i < 16; ++i)
47204 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47205 else
47206 {
47207 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47208 if (!d->one_operand_p)
47209 mask = 2 * nelt - 1;
47210 else if (vmode == V16QImode)
47211 mask = nelt - 1;
47212 else if (vmode == V64QImode)
47213 mask = nelt / 4 - 1;
47214 else
47215 mask = nelt / 2 - 1;
47216
47217 for (i = 0; i < nelt; ++i)
47218 {
47219 unsigned j, e = d->perm[i] & mask;
47220 for (j = 0; j < eltsz; ++j)
47221 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47222 }
47223 }
47224
47225 vperm = gen_rtx_CONST_VECTOR (vmode,
47226 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47227 vperm = force_reg (vmode, vperm);
47228
47229 target = d->target;
47230 if (d->vmode != vmode)
47231 target = gen_reg_rtx (vmode);
47232 op0 = gen_lowpart (vmode, d->op0);
47233 if (d->one_operand_p)
47234 {
47235 if (vmode == V16QImode)
47236 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47237 else if (vmode == V32QImode)
47238 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47239 else if (vmode == V64QImode)
47240 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47241 else if (vmode == V8SFmode)
47242 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47243 else if (vmode == V8SImode)
47244 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47245 else if (vmode == V16SFmode)
47246 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47247 else if (vmode == V16SImode)
47248 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47249 else
47250 gcc_unreachable ();
47251 }
47252 else
47253 {
47254 op1 = gen_lowpart (vmode, d->op1);
47255 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47256 }
47257 if (target != d->target)
47258 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47259
47260 return true;
47261 }
47262
47263 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47264 in a single instruction. */
47265
47266 static bool
47267 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47268 {
47269 unsigned i, nelt = d->nelt;
47270 unsigned char perm2[MAX_VECT_LEN];
47271
47272 /* Check plain VEC_SELECT first, because AVX has instructions that could
47273 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47274 input where SEL+CONCAT may not. */
47275 if (d->one_operand_p)
47276 {
47277 int mask = nelt - 1;
47278 bool identity_perm = true;
47279 bool broadcast_perm = true;
47280
47281 for (i = 0; i < nelt; i++)
47282 {
47283 perm2[i] = d->perm[i] & mask;
47284 if (perm2[i] != i)
47285 identity_perm = false;
47286 if (perm2[i])
47287 broadcast_perm = false;
47288 }
47289
47290 if (identity_perm)
47291 {
47292 if (!d->testing_p)
47293 emit_move_insn (d->target, d->op0);
47294 return true;
47295 }
47296 else if (broadcast_perm && TARGET_AVX2)
47297 {
47298 /* Use vpbroadcast{b,w,d}. */
47299 rtx (*gen) (rtx, rtx) = NULL;
47300 switch (d->vmode)
47301 {
47302 case V64QImode:
47303 if (TARGET_AVX512BW)
47304 gen = gen_avx512bw_vec_dupv64qi_1;
47305 break;
47306 case V32QImode:
47307 gen = gen_avx2_pbroadcastv32qi_1;
47308 break;
47309 case V32HImode:
47310 if (TARGET_AVX512BW)
47311 gen = gen_avx512bw_vec_dupv32hi_1;
47312 break;
47313 case V16HImode:
47314 gen = gen_avx2_pbroadcastv16hi_1;
47315 break;
47316 case V16SImode:
47317 if (TARGET_AVX512F)
47318 gen = gen_avx512f_vec_dupv16si_1;
47319 break;
47320 case V8SImode:
47321 gen = gen_avx2_pbroadcastv8si_1;
47322 break;
47323 case V16QImode:
47324 gen = gen_avx2_pbroadcastv16qi;
47325 break;
47326 case V8HImode:
47327 gen = gen_avx2_pbroadcastv8hi;
47328 break;
47329 case V16SFmode:
47330 if (TARGET_AVX512F)
47331 gen = gen_avx512f_vec_dupv16sf_1;
47332 break;
47333 case V8SFmode:
47334 gen = gen_avx2_vec_dupv8sf_1;
47335 break;
47336 case V8DFmode:
47337 if (TARGET_AVX512F)
47338 gen = gen_avx512f_vec_dupv8df_1;
47339 break;
47340 case V8DImode:
47341 if (TARGET_AVX512F)
47342 gen = gen_avx512f_vec_dupv8di_1;
47343 break;
47344 /* For other modes prefer other shuffles this function creates. */
47345 default: break;
47346 }
47347 if (gen != NULL)
47348 {
47349 if (!d->testing_p)
47350 emit_insn (gen (d->target, d->op0));
47351 return true;
47352 }
47353 }
47354
47355 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47356 return true;
47357
47358 /* There are plenty of patterns in sse.md that are written for
47359 SEL+CONCAT and are not replicated for a single op. Perhaps
47360 that should be changed, to avoid the nastiness here. */
47361
47362 /* Recognize interleave style patterns, which means incrementing
47363 every other permutation operand. */
47364 for (i = 0; i < nelt; i += 2)
47365 {
47366 perm2[i] = d->perm[i] & mask;
47367 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47368 }
47369 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47370 d->testing_p))
47371 return true;
47372
47373 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47374 if (nelt >= 4)
47375 {
47376 for (i = 0; i < nelt; i += 4)
47377 {
47378 perm2[i + 0] = d->perm[i + 0] & mask;
47379 perm2[i + 1] = d->perm[i + 1] & mask;
47380 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47381 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47382 }
47383
47384 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47385 d->testing_p))
47386 return true;
47387 }
47388 }
47389
47390 /* Finally, try the fully general two operand permute. */
47391 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47392 d->testing_p))
47393 return true;
47394
47395 /* Recognize interleave style patterns with reversed operands. */
47396 if (!d->one_operand_p)
47397 {
47398 for (i = 0; i < nelt; ++i)
47399 {
47400 unsigned e = d->perm[i];
47401 if (e >= nelt)
47402 e -= nelt;
47403 else
47404 e += nelt;
47405 perm2[i] = e;
47406 }
47407
47408 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47409 d->testing_p))
47410 return true;
47411 }
47412
47413 /* Try the SSE4.1 blend variable merge instructions. */
47414 if (expand_vec_perm_blend (d))
47415 return true;
47416
47417 /* Try one of the AVX vpermil variable permutations. */
47418 if (expand_vec_perm_vpermil (d))
47419 return true;
47420
47421 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47422 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47423 if (expand_vec_perm_pshufb (d))
47424 return true;
47425
47426 /* Try the AVX2 vpalignr instruction. */
47427 if (expand_vec_perm_palignr (d, true))
47428 return true;
47429
47430 /* Try the AVX512F vpermi2 instructions. */
47431 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47432 return true;
47433
47434 return false;
47435 }
47436
47437 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47438 in terms of a pair of pshuflw + pshufhw instructions. */
47439
47440 static bool
47441 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47442 {
47443 unsigned char perm2[MAX_VECT_LEN];
47444 unsigned i;
47445 bool ok;
47446
47447 if (d->vmode != V8HImode || !d->one_operand_p)
47448 return false;
47449
47450 /* The two permutations only operate in 64-bit lanes. */
47451 for (i = 0; i < 4; ++i)
47452 if (d->perm[i] >= 4)
47453 return false;
47454 for (i = 4; i < 8; ++i)
47455 if (d->perm[i] < 4)
47456 return false;
47457
47458 if (d->testing_p)
47459 return true;
47460
47461 /* Emit the pshuflw. */
47462 memcpy (perm2, d->perm, 4);
47463 for (i = 4; i < 8; ++i)
47464 perm2[i] = i;
47465 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47466 gcc_assert (ok);
47467
47468 /* Emit the pshufhw. */
47469 memcpy (perm2 + 4, d->perm + 4, 4);
47470 for (i = 0; i < 4; ++i)
47471 perm2[i] = i;
47472 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47473 gcc_assert (ok);
47474
47475 return true;
47476 }
47477
47478 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47479 the permutation using the SSSE3 palignr instruction. This succeeds
47480 when all of the elements in PERM fit within one vector and we merely
47481 need to shift them down so that a single vector permutation has a
47482 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47483 the vpalignr instruction itself can perform the requested permutation. */
47484
47485 static bool
47486 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47487 {
47488 unsigned i, nelt = d->nelt;
47489 unsigned min, max, minswap, maxswap;
47490 bool in_order, ok, swap = false;
47491 rtx shift, target;
47492 struct expand_vec_perm_d dcopy;
47493
47494 /* Even with AVX, palignr only operates on 128-bit vectors,
47495 in AVX2 palignr operates on both 128-bit lanes. */
47496 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47497 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47498 return false;
47499
47500 min = 2 * nelt;
47501 max = 0;
47502 minswap = 2 * nelt;
47503 maxswap = 0;
47504 for (i = 0; i < nelt; ++i)
47505 {
47506 unsigned e = d->perm[i];
47507 unsigned eswap = d->perm[i] ^ nelt;
47508 if (GET_MODE_SIZE (d->vmode) == 32)
47509 {
47510 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47511 eswap = e ^ (nelt / 2);
47512 }
47513 if (e < min)
47514 min = e;
47515 if (e > max)
47516 max = e;
47517 if (eswap < minswap)
47518 minswap = eswap;
47519 if (eswap > maxswap)
47520 maxswap = eswap;
47521 }
47522 if (min == 0
47523 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47524 {
47525 if (d->one_operand_p
47526 || minswap == 0
47527 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47528 ? nelt / 2 : nelt))
47529 return false;
47530 swap = true;
47531 min = minswap;
47532 max = maxswap;
47533 }
47534
47535 /* Given that we have SSSE3, we know we'll be able to implement the
47536 single operand permutation after the palignr with pshufb for
47537 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47538 first. */
47539 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47540 return true;
47541
47542 dcopy = *d;
47543 if (swap)
47544 {
47545 dcopy.op0 = d->op1;
47546 dcopy.op1 = d->op0;
47547 for (i = 0; i < nelt; ++i)
47548 dcopy.perm[i] ^= nelt;
47549 }
47550
47551 in_order = true;
47552 for (i = 0; i < nelt; ++i)
47553 {
47554 unsigned e = dcopy.perm[i];
47555 if (GET_MODE_SIZE (d->vmode) == 32
47556 && e >= nelt
47557 && (e & (nelt / 2 - 1)) < min)
47558 e = e - min - (nelt / 2);
47559 else
47560 e = e - min;
47561 if (e != i)
47562 in_order = false;
47563 dcopy.perm[i] = e;
47564 }
47565 dcopy.one_operand_p = true;
47566
47567 if (single_insn_only_p && !in_order)
47568 return false;
47569
47570 /* For AVX2, test whether we can permute the result in one instruction. */
47571 if (d->testing_p)
47572 {
47573 if (in_order)
47574 return true;
47575 dcopy.op1 = dcopy.op0;
47576 return expand_vec_perm_1 (&dcopy);
47577 }
47578
47579 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47580 if (GET_MODE_SIZE (d->vmode) == 16)
47581 {
47582 target = gen_reg_rtx (TImode);
47583 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47584 gen_lowpart (TImode, dcopy.op0), shift));
47585 }
47586 else
47587 {
47588 target = gen_reg_rtx (V2TImode);
47589 emit_insn (gen_avx2_palignrv2ti (target,
47590 gen_lowpart (V2TImode, dcopy.op1),
47591 gen_lowpart (V2TImode, dcopy.op0),
47592 shift));
47593 }
47594
47595 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47596
47597 /* Test for the degenerate case where the alignment by itself
47598 produces the desired permutation. */
47599 if (in_order)
47600 {
47601 emit_move_insn (d->target, dcopy.op0);
47602 return true;
47603 }
47604
47605 ok = expand_vec_perm_1 (&dcopy);
47606 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47607
47608 return ok;
47609 }
47610
47611 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47612 the permutation using the SSE4_1 pblendv instruction. Potentially
47613 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47614
47615 static bool
47616 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47617 {
47618 unsigned i, which, nelt = d->nelt;
47619 struct expand_vec_perm_d dcopy, dcopy1;
47620 machine_mode vmode = d->vmode;
47621 bool ok;
47622
47623 /* Use the same checks as in expand_vec_perm_blend. */
47624 if (d->one_operand_p)
47625 return false;
47626 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47627 ;
47628 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47629 ;
47630 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47631 ;
47632 else
47633 return false;
47634
47635 /* Figure out where permutation elements stay not in their
47636 respective lanes. */
47637 for (i = 0, which = 0; i < nelt; ++i)
47638 {
47639 unsigned e = d->perm[i];
47640 if (e != i)
47641 which |= (e < nelt ? 1 : 2);
47642 }
47643 /* We can pblend the part where elements stay not in their
47644 respective lanes only when these elements are all in one
47645 half of a permutation.
47646 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47647 lanes, but both 8 and 9 >= 8
47648 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47649 respective lanes and 8 >= 8, but 2 not. */
47650 if (which != 1 && which != 2)
47651 return false;
47652 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47653 return true;
47654
47655 /* First we apply one operand permutation to the part where
47656 elements stay not in their respective lanes. */
47657 dcopy = *d;
47658 if (which == 2)
47659 dcopy.op0 = dcopy.op1 = d->op1;
47660 else
47661 dcopy.op0 = dcopy.op1 = d->op0;
47662 if (!d->testing_p)
47663 dcopy.target = gen_reg_rtx (vmode);
47664 dcopy.one_operand_p = true;
47665
47666 for (i = 0; i < nelt; ++i)
47667 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47668
47669 ok = expand_vec_perm_1 (&dcopy);
47670 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47671 return false;
47672 else
47673 gcc_assert (ok);
47674 if (d->testing_p)
47675 return true;
47676
47677 /* Next we put permuted elements into their positions. */
47678 dcopy1 = *d;
47679 if (which == 2)
47680 dcopy1.op1 = dcopy.target;
47681 else
47682 dcopy1.op0 = dcopy.target;
47683
47684 for (i = 0; i < nelt; ++i)
47685 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47686
47687 ok = expand_vec_perm_blend (&dcopy1);
47688 gcc_assert (ok);
47689
47690 return true;
47691 }
47692
47693 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47694
47695 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47696 a two vector permutation into a single vector permutation by using
47697 an interleave operation to merge the vectors. */
47698
47699 static bool
47700 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47701 {
47702 struct expand_vec_perm_d dremap, dfinal;
47703 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47704 unsigned HOST_WIDE_INT contents;
47705 unsigned char remap[2 * MAX_VECT_LEN];
47706 rtx_insn *seq;
47707 bool ok, same_halves = false;
47708
47709 if (GET_MODE_SIZE (d->vmode) == 16)
47710 {
47711 if (d->one_operand_p)
47712 return false;
47713 }
47714 else if (GET_MODE_SIZE (d->vmode) == 32)
47715 {
47716 if (!TARGET_AVX)
47717 return false;
47718 /* For 32-byte modes allow even d->one_operand_p.
47719 The lack of cross-lane shuffling in some instructions
47720 might prevent a single insn shuffle. */
47721 dfinal = *d;
47722 dfinal.testing_p = true;
47723 /* If expand_vec_perm_interleave3 can expand this into
47724 a 3 insn sequence, give up and let it be expanded as
47725 3 insn sequence. While that is one insn longer,
47726 it doesn't need a memory operand and in the common
47727 case that both interleave low and high permutations
47728 with the same operands are adjacent needs 4 insns
47729 for both after CSE. */
47730 if (expand_vec_perm_interleave3 (&dfinal))
47731 return false;
47732 }
47733 else
47734 return false;
47735
47736 /* Examine from whence the elements come. */
47737 contents = 0;
47738 for (i = 0; i < nelt; ++i)
47739 contents |= HOST_WIDE_INT_1U << d->perm[i];
47740
47741 memset (remap, 0xff, sizeof (remap));
47742 dremap = *d;
47743
47744 if (GET_MODE_SIZE (d->vmode) == 16)
47745 {
47746 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47747
47748 /* Split the two input vectors into 4 halves. */
47749 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47750 h2 = h1 << nelt2;
47751 h3 = h2 << nelt2;
47752 h4 = h3 << nelt2;
47753
47754 /* If the elements from the low halves use interleave low, and similarly
47755 for interleave high. If the elements are from mis-matched halves, we
47756 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47757 if ((contents & (h1 | h3)) == contents)
47758 {
47759 /* punpckl* */
47760 for (i = 0; i < nelt2; ++i)
47761 {
47762 remap[i] = i * 2;
47763 remap[i + nelt] = i * 2 + 1;
47764 dremap.perm[i * 2] = i;
47765 dremap.perm[i * 2 + 1] = i + nelt;
47766 }
47767 if (!TARGET_SSE2 && d->vmode == V4SImode)
47768 dremap.vmode = V4SFmode;
47769 }
47770 else if ((contents & (h2 | h4)) == contents)
47771 {
47772 /* punpckh* */
47773 for (i = 0; i < nelt2; ++i)
47774 {
47775 remap[i + nelt2] = i * 2;
47776 remap[i + nelt + nelt2] = i * 2 + 1;
47777 dremap.perm[i * 2] = i + nelt2;
47778 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47779 }
47780 if (!TARGET_SSE2 && d->vmode == V4SImode)
47781 dremap.vmode = V4SFmode;
47782 }
47783 else if ((contents & (h1 | h4)) == contents)
47784 {
47785 /* shufps */
47786 for (i = 0; i < nelt2; ++i)
47787 {
47788 remap[i] = i;
47789 remap[i + nelt + nelt2] = i + nelt2;
47790 dremap.perm[i] = i;
47791 dremap.perm[i + nelt2] = i + nelt + nelt2;
47792 }
47793 if (nelt != 4)
47794 {
47795 /* shufpd */
47796 dremap.vmode = V2DImode;
47797 dremap.nelt = 2;
47798 dremap.perm[0] = 0;
47799 dremap.perm[1] = 3;
47800 }
47801 }
47802 else if ((contents & (h2 | h3)) == contents)
47803 {
47804 /* shufps */
47805 for (i = 0; i < nelt2; ++i)
47806 {
47807 remap[i + nelt2] = i;
47808 remap[i + nelt] = i + nelt2;
47809 dremap.perm[i] = i + nelt2;
47810 dremap.perm[i + nelt2] = i + nelt;
47811 }
47812 if (nelt != 4)
47813 {
47814 /* shufpd */
47815 dremap.vmode = V2DImode;
47816 dremap.nelt = 2;
47817 dremap.perm[0] = 1;
47818 dremap.perm[1] = 2;
47819 }
47820 }
47821 else
47822 return false;
47823 }
47824 else
47825 {
47826 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47827 unsigned HOST_WIDE_INT q[8];
47828 unsigned int nonzero_halves[4];
47829
47830 /* Split the two input vectors into 8 quarters. */
47831 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47832 for (i = 1; i < 8; ++i)
47833 q[i] = q[0] << (nelt4 * i);
47834 for (i = 0; i < 4; ++i)
47835 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47836 {
47837 nonzero_halves[nzcnt] = i;
47838 ++nzcnt;
47839 }
47840
47841 if (nzcnt == 1)
47842 {
47843 gcc_assert (d->one_operand_p);
47844 nonzero_halves[1] = nonzero_halves[0];
47845 same_halves = true;
47846 }
47847 else if (d->one_operand_p)
47848 {
47849 gcc_assert (nonzero_halves[0] == 0);
47850 gcc_assert (nonzero_halves[1] == 1);
47851 }
47852
47853 if (nzcnt <= 2)
47854 {
47855 if (d->perm[0] / nelt2 == nonzero_halves[1])
47856 {
47857 /* Attempt to increase the likelihood that dfinal
47858 shuffle will be intra-lane. */
47859 char tmph = nonzero_halves[0];
47860 nonzero_halves[0] = nonzero_halves[1];
47861 nonzero_halves[1] = tmph;
47862 }
47863
47864 /* vperm2f128 or vperm2i128. */
47865 for (i = 0; i < nelt2; ++i)
47866 {
47867 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47868 remap[i + nonzero_halves[0] * nelt2] = i;
47869 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47870 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47871 }
47872
47873 if (d->vmode != V8SFmode
47874 && d->vmode != V4DFmode
47875 && d->vmode != V8SImode)
47876 {
47877 dremap.vmode = V8SImode;
47878 dremap.nelt = 8;
47879 for (i = 0; i < 4; ++i)
47880 {
47881 dremap.perm[i] = i + nonzero_halves[0] * 4;
47882 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47883 }
47884 }
47885 }
47886 else if (d->one_operand_p)
47887 return false;
47888 else if (TARGET_AVX2
47889 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47890 {
47891 /* vpunpckl* */
47892 for (i = 0; i < nelt4; ++i)
47893 {
47894 remap[i] = i * 2;
47895 remap[i + nelt] = i * 2 + 1;
47896 remap[i + nelt2] = i * 2 + nelt2;
47897 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47898 dremap.perm[i * 2] = i;
47899 dremap.perm[i * 2 + 1] = i + nelt;
47900 dremap.perm[i * 2 + nelt2] = i + nelt2;
47901 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47902 }
47903 }
47904 else if (TARGET_AVX2
47905 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47906 {
47907 /* vpunpckh* */
47908 for (i = 0; i < nelt4; ++i)
47909 {
47910 remap[i + nelt4] = i * 2;
47911 remap[i + nelt + nelt4] = i * 2 + 1;
47912 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47913 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47914 dremap.perm[i * 2] = i + nelt4;
47915 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47916 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47917 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47918 }
47919 }
47920 else
47921 return false;
47922 }
47923
47924 /* Use the remapping array set up above to move the elements from their
47925 swizzled locations into their final destinations. */
47926 dfinal = *d;
47927 for (i = 0; i < nelt; ++i)
47928 {
47929 unsigned e = remap[d->perm[i]];
47930 gcc_assert (e < nelt);
47931 /* If same_halves is true, both halves of the remapped vector are the
47932 same. Avoid cross-lane accesses if possible. */
47933 if (same_halves && i >= nelt2)
47934 {
47935 gcc_assert (e < nelt2);
47936 dfinal.perm[i] = e + nelt2;
47937 }
47938 else
47939 dfinal.perm[i] = e;
47940 }
47941 if (!d->testing_p)
47942 {
47943 dremap.target = gen_reg_rtx (dremap.vmode);
47944 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47945 }
47946 dfinal.op1 = dfinal.op0;
47947 dfinal.one_operand_p = true;
47948
47949 /* Test if the final remap can be done with a single insn. For V4SFmode or
47950 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47951 start_sequence ();
47952 ok = expand_vec_perm_1 (&dfinal);
47953 seq = get_insns ();
47954 end_sequence ();
47955
47956 if (!ok)
47957 return false;
47958
47959 if (d->testing_p)
47960 return true;
47961
47962 if (dremap.vmode != dfinal.vmode)
47963 {
47964 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47965 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47966 }
47967
47968 ok = expand_vec_perm_1 (&dremap);
47969 gcc_assert (ok);
47970
47971 emit_insn (seq);
47972 return true;
47973 }
47974
47975 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47976 a single vector cross-lane permutation into vpermq followed
47977 by any of the single insn permutations. */
47978
47979 static bool
47980 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47981 {
47982 struct expand_vec_perm_d dremap, dfinal;
47983 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47984 unsigned contents[2];
47985 bool ok;
47986
47987 if (!(TARGET_AVX2
47988 && (d->vmode == V32QImode || d->vmode == V16HImode)
47989 && d->one_operand_p))
47990 return false;
47991
47992 contents[0] = 0;
47993 contents[1] = 0;
47994 for (i = 0; i < nelt2; ++i)
47995 {
47996 contents[0] |= 1u << (d->perm[i] / nelt4);
47997 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47998 }
47999
48000 for (i = 0; i < 2; ++i)
48001 {
48002 unsigned int cnt = 0;
48003 for (j = 0; j < 4; ++j)
48004 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48005 return false;
48006 }
48007
48008 if (d->testing_p)
48009 return true;
48010
48011 dremap = *d;
48012 dremap.vmode = V4DImode;
48013 dremap.nelt = 4;
48014 dremap.target = gen_reg_rtx (V4DImode);
48015 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48016 dremap.op1 = dremap.op0;
48017 dremap.one_operand_p = true;
48018 for (i = 0; i < 2; ++i)
48019 {
48020 unsigned int cnt = 0;
48021 for (j = 0; j < 4; ++j)
48022 if ((contents[i] & (1u << j)) != 0)
48023 dremap.perm[2 * i + cnt++] = j;
48024 for (; cnt < 2; ++cnt)
48025 dremap.perm[2 * i + cnt] = 0;
48026 }
48027
48028 dfinal = *d;
48029 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48030 dfinal.op1 = dfinal.op0;
48031 dfinal.one_operand_p = true;
48032 for (i = 0, j = 0; i < nelt; ++i)
48033 {
48034 if (i == nelt2)
48035 j = 2;
48036 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48037 if ((d->perm[i] / nelt4) == dremap.perm[j])
48038 ;
48039 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48040 dfinal.perm[i] |= nelt4;
48041 else
48042 gcc_unreachable ();
48043 }
48044
48045 ok = expand_vec_perm_1 (&dremap);
48046 gcc_assert (ok);
48047
48048 ok = expand_vec_perm_1 (&dfinal);
48049 gcc_assert (ok);
48050
48051 return true;
48052 }
48053
48054 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48055 a vector permutation using two instructions, vperm2f128 resp.
48056 vperm2i128 followed by any single in-lane permutation. */
48057
48058 static bool
48059 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48060 {
48061 struct expand_vec_perm_d dfirst, dsecond;
48062 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48063 bool ok;
48064
48065 if (!TARGET_AVX
48066 || GET_MODE_SIZE (d->vmode) != 32
48067 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48068 return false;
48069
48070 dsecond = *d;
48071 dsecond.one_operand_p = false;
48072 dsecond.testing_p = true;
48073
48074 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48075 immediate. For perm < 16 the second permutation uses
48076 d->op0 as first operand, for perm >= 16 it uses d->op1
48077 as first operand. The second operand is the result of
48078 vperm2[fi]128. */
48079 for (perm = 0; perm < 32; perm++)
48080 {
48081 /* Ignore permutations which do not move anything cross-lane. */
48082 if (perm < 16)
48083 {
48084 /* The second shuffle for e.g. V4DFmode has
48085 0123 and ABCD operands.
48086 Ignore AB23, as 23 is already in the second lane
48087 of the first operand. */
48088 if ((perm & 0xc) == (1 << 2)) continue;
48089 /* And 01CD, as 01 is in the first lane of the first
48090 operand. */
48091 if ((perm & 3) == 0) continue;
48092 /* And 4567, as then the vperm2[fi]128 doesn't change
48093 anything on the original 4567 second operand. */
48094 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48095 }
48096 else
48097 {
48098 /* The second shuffle for e.g. V4DFmode has
48099 4567 and ABCD operands.
48100 Ignore AB67, as 67 is already in the second lane
48101 of the first operand. */
48102 if ((perm & 0xc) == (3 << 2)) continue;
48103 /* And 45CD, as 45 is in the first lane of the first
48104 operand. */
48105 if ((perm & 3) == 2) continue;
48106 /* And 0123, as then the vperm2[fi]128 doesn't change
48107 anything on the original 0123 first operand. */
48108 if ((perm & 0xf) == (1 << 2)) continue;
48109 }
48110
48111 for (i = 0; i < nelt; i++)
48112 {
48113 j = d->perm[i] / nelt2;
48114 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48115 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48116 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48117 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48118 else
48119 break;
48120 }
48121
48122 if (i == nelt)
48123 {
48124 start_sequence ();
48125 ok = expand_vec_perm_1 (&dsecond);
48126 end_sequence ();
48127 }
48128 else
48129 ok = false;
48130
48131 if (ok)
48132 {
48133 if (d->testing_p)
48134 return true;
48135
48136 /* Found a usable second shuffle. dfirst will be
48137 vperm2f128 on d->op0 and d->op1. */
48138 dsecond.testing_p = false;
48139 dfirst = *d;
48140 dfirst.target = gen_reg_rtx (d->vmode);
48141 for (i = 0; i < nelt; i++)
48142 dfirst.perm[i] = (i & (nelt2 - 1))
48143 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48144
48145 canonicalize_perm (&dfirst);
48146 ok = expand_vec_perm_1 (&dfirst);
48147 gcc_assert (ok);
48148
48149 /* And dsecond is some single insn shuffle, taking
48150 d->op0 and result of vperm2f128 (if perm < 16) or
48151 d->op1 and result of vperm2f128 (otherwise). */
48152 if (perm >= 16)
48153 dsecond.op0 = dsecond.op1;
48154 dsecond.op1 = dfirst.target;
48155
48156 ok = expand_vec_perm_1 (&dsecond);
48157 gcc_assert (ok);
48158
48159 return true;
48160 }
48161
48162 /* For one operand, the only useful vperm2f128 permutation is 0x01
48163 aka lanes swap. */
48164 if (d->one_operand_p)
48165 return false;
48166 }
48167
48168 return false;
48169 }
48170
48171 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48172 a two vector permutation using 2 intra-lane interleave insns
48173 and cross-lane shuffle for 32-byte vectors. */
48174
48175 static bool
48176 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48177 {
48178 unsigned i, nelt;
48179 rtx (*gen) (rtx, rtx, rtx);
48180
48181 if (d->one_operand_p)
48182 return false;
48183 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48184 ;
48185 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48186 ;
48187 else
48188 return false;
48189
48190 nelt = d->nelt;
48191 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48192 return false;
48193 for (i = 0; i < nelt; i += 2)
48194 if (d->perm[i] != d->perm[0] + i / 2
48195 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48196 return false;
48197
48198 if (d->testing_p)
48199 return true;
48200
48201 switch (d->vmode)
48202 {
48203 case V32QImode:
48204 if (d->perm[0])
48205 gen = gen_vec_interleave_highv32qi;
48206 else
48207 gen = gen_vec_interleave_lowv32qi;
48208 break;
48209 case V16HImode:
48210 if (d->perm[0])
48211 gen = gen_vec_interleave_highv16hi;
48212 else
48213 gen = gen_vec_interleave_lowv16hi;
48214 break;
48215 case V8SImode:
48216 if (d->perm[0])
48217 gen = gen_vec_interleave_highv8si;
48218 else
48219 gen = gen_vec_interleave_lowv8si;
48220 break;
48221 case V4DImode:
48222 if (d->perm[0])
48223 gen = gen_vec_interleave_highv4di;
48224 else
48225 gen = gen_vec_interleave_lowv4di;
48226 break;
48227 case V8SFmode:
48228 if (d->perm[0])
48229 gen = gen_vec_interleave_highv8sf;
48230 else
48231 gen = gen_vec_interleave_lowv8sf;
48232 break;
48233 case V4DFmode:
48234 if (d->perm[0])
48235 gen = gen_vec_interleave_highv4df;
48236 else
48237 gen = gen_vec_interleave_lowv4df;
48238 break;
48239 default:
48240 gcc_unreachable ();
48241 }
48242
48243 emit_insn (gen (d->target, d->op0, d->op1));
48244 return true;
48245 }
48246
48247 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48248 a single vector permutation using a single intra-lane vector
48249 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48250 the non-swapped and swapped vectors together. */
48251
48252 static bool
48253 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48254 {
48255 struct expand_vec_perm_d dfirst, dsecond;
48256 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48257 rtx_insn *seq;
48258 bool ok;
48259 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48260
48261 if (!TARGET_AVX
48262 || TARGET_AVX2
48263 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48264 || !d->one_operand_p)
48265 return false;
48266
48267 dfirst = *d;
48268 for (i = 0; i < nelt; i++)
48269 dfirst.perm[i] = 0xff;
48270 for (i = 0, msk = 0; i < nelt; i++)
48271 {
48272 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48273 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48274 return false;
48275 dfirst.perm[j] = d->perm[i];
48276 if (j != i)
48277 msk |= (1 << i);
48278 }
48279 for (i = 0; i < nelt; i++)
48280 if (dfirst.perm[i] == 0xff)
48281 dfirst.perm[i] = i;
48282
48283 if (!d->testing_p)
48284 dfirst.target = gen_reg_rtx (dfirst.vmode);
48285
48286 start_sequence ();
48287 ok = expand_vec_perm_1 (&dfirst);
48288 seq = get_insns ();
48289 end_sequence ();
48290
48291 if (!ok)
48292 return false;
48293
48294 if (d->testing_p)
48295 return true;
48296
48297 emit_insn (seq);
48298
48299 dsecond = *d;
48300 dsecond.op0 = dfirst.target;
48301 dsecond.op1 = dfirst.target;
48302 dsecond.one_operand_p = true;
48303 dsecond.target = gen_reg_rtx (dsecond.vmode);
48304 for (i = 0; i < nelt; i++)
48305 dsecond.perm[i] = i ^ nelt2;
48306
48307 ok = expand_vec_perm_1 (&dsecond);
48308 gcc_assert (ok);
48309
48310 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48311 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48312 return true;
48313 }
48314
48315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48316 permutation using two vperm2f128, followed by a vshufpd insn blending
48317 the two vectors together. */
48318
48319 static bool
48320 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48321 {
48322 struct expand_vec_perm_d dfirst, dsecond, dthird;
48323 bool ok;
48324
48325 if (!TARGET_AVX || (d->vmode != V4DFmode))
48326 return false;
48327
48328 if (d->testing_p)
48329 return true;
48330
48331 dfirst = *d;
48332 dsecond = *d;
48333 dthird = *d;
48334
48335 dfirst.perm[0] = (d->perm[0] & ~1);
48336 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48337 dfirst.perm[2] = (d->perm[2] & ~1);
48338 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48339 dsecond.perm[0] = (d->perm[1] & ~1);
48340 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48341 dsecond.perm[2] = (d->perm[3] & ~1);
48342 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48343 dthird.perm[0] = (d->perm[0] % 2);
48344 dthird.perm[1] = (d->perm[1] % 2) + 4;
48345 dthird.perm[2] = (d->perm[2] % 2) + 2;
48346 dthird.perm[3] = (d->perm[3] % 2) + 6;
48347
48348 dfirst.target = gen_reg_rtx (dfirst.vmode);
48349 dsecond.target = gen_reg_rtx (dsecond.vmode);
48350 dthird.op0 = dfirst.target;
48351 dthird.op1 = dsecond.target;
48352 dthird.one_operand_p = false;
48353
48354 canonicalize_perm (&dfirst);
48355 canonicalize_perm (&dsecond);
48356
48357 ok = expand_vec_perm_1 (&dfirst)
48358 && expand_vec_perm_1 (&dsecond)
48359 && expand_vec_perm_1 (&dthird);
48360
48361 gcc_assert (ok);
48362
48363 return true;
48364 }
48365
48366 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48367 permutation with two pshufb insns and an ior. We should have already
48368 failed all two instruction sequences. */
48369
48370 static bool
48371 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48372 {
48373 rtx rperm[2][16], vperm, l, h, op, m128;
48374 unsigned int i, nelt, eltsz;
48375
48376 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48377 return false;
48378 gcc_assert (!d->one_operand_p);
48379
48380 if (d->testing_p)
48381 return true;
48382
48383 nelt = d->nelt;
48384 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48385
48386 /* Generate two permutation masks. If the required element is within
48387 the given vector it is shuffled into the proper lane. If the required
48388 element is in the other vector, force a zero into the lane by setting
48389 bit 7 in the permutation mask. */
48390 m128 = GEN_INT (-128);
48391 for (i = 0; i < nelt; ++i)
48392 {
48393 unsigned j, e = d->perm[i];
48394 unsigned which = (e >= nelt);
48395 if (e >= nelt)
48396 e -= nelt;
48397
48398 for (j = 0; j < eltsz; ++j)
48399 {
48400 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48401 rperm[1-which][i*eltsz + j] = m128;
48402 }
48403 }
48404
48405 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48406 vperm = force_reg (V16QImode, vperm);
48407
48408 l = gen_reg_rtx (V16QImode);
48409 op = gen_lowpart (V16QImode, d->op0);
48410 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48411
48412 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48413 vperm = force_reg (V16QImode, vperm);
48414
48415 h = gen_reg_rtx (V16QImode);
48416 op = gen_lowpart (V16QImode, d->op1);
48417 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48418
48419 op = d->target;
48420 if (d->vmode != V16QImode)
48421 op = gen_reg_rtx (V16QImode);
48422 emit_insn (gen_iorv16qi3 (op, l, h));
48423 if (op != d->target)
48424 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48425
48426 return true;
48427 }
48428
48429 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48430 with two vpshufb insns, vpermq and vpor. We should have already failed
48431 all two or three instruction sequences. */
48432
48433 static bool
48434 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48435 {
48436 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48437 unsigned int i, nelt, eltsz;
48438
48439 if (!TARGET_AVX2
48440 || !d->one_operand_p
48441 || (d->vmode != V32QImode && d->vmode != V16HImode))
48442 return false;
48443
48444 if (d->testing_p)
48445 return true;
48446
48447 nelt = d->nelt;
48448 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48449
48450 /* Generate two permutation masks. If the required element is within
48451 the same lane, it is shuffled in. If the required element from the
48452 other lane, force a zero by setting bit 7 in the permutation mask.
48453 In the other mask the mask has non-negative elements if element
48454 is requested from the other lane, but also moved to the other lane,
48455 so that the result of vpshufb can have the two V2TImode halves
48456 swapped. */
48457 m128 = GEN_INT (-128);
48458 for (i = 0; i < nelt; ++i)
48459 {
48460 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48461 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48462
48463 for (j = 0; j < eltsz; ++j)
48464 {
48465 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48466 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48467 }
48468 }
48469
48470 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48471 vperm = force_reg (V32QImode, vperm);
48472
48473 h = gen_reg_rtx (V32QImode);
48474 op = gen_lowpart (V32QImode, d->op0);
48475 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48476
48477 /* Swap the 128-byte lanes of h into hp. */
48478 hp = gen_reg_rtx (V4DImode);
48479 op = gen_lowpart (V4DImode, h);
48480 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48481 const1_rtx));
48482
48483 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48484 vperm = force_reg (V32QImode, vperm);
48485
48486 l = gen_reg_rtx (V32QImode);
48487 op = gen_lowpart (V32QImode, d->op0);
48488 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48489
48490 op = d->target;
48491 if (d->vmode != V32QImode)
48492 op = gen_reg_rtx (V32QImode);
48493 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48494 if (op != d->target)
48495 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48496
48497 return true;
48498 }
48499
48500 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48501 and extract-odd permutations of two V32QImode and V16QImode operand
48502 with two vpshufb insns, vpor and vpermq. We should have already
48503 failed all two or three instruction sequences. */
48504
48505 static bool
48506 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48507 {
48508 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48509 unsigned int i, nelt, eltsz;
48510
48511 if (!TARGET_AVX2
48512 || d->one_operand_p
48513 || (d->vmode != V32QImode && d->vmode != V16HImode))
48514 return false;
48515
48516 for (i = 0; i < d->nelt; ++i)
48517 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48518 return false;
48519
48520 if (d->testing_p)
48521 return true;
48522
48523 nelt = d->nelt;
48524 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48525
48526 /* Generate two permutation masks. In the first permutation mask
48527 the first quarter will contain indexes for the first half
48528 of the op0, the second quarter will contain bit 7 set, third quarter
48529 will contain indexes for the second half of the op0 and the
48530 last quarter bit 7 set. In the second permutation mask
48531 the first quarter will contain bit 7 set, the second quarter
48532 indexes for the first half of the op1, the third quarter bit 7 set
48533 and last quarter indexes for the second half of the op1.
48534 I.e. the first mask e.g. for V32QImode extract even will be:
48535 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48536 (all values masked with 0xf except for -128) and second mask
48537 for extract even will be
48538 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48539 m128 = GEN_INT (-128);
48540 for (i = 0; i < nelt; ++i)
48541 {
48542 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48543 unsigned which = d->perm[i] >= nelt;
48544 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48545
48546 for (j = 0; j < eltsz; ++j)
48547 {
48548 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48549 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48550 }
48551 }
48552
48553 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48554 vperm = force_reg (V32QImode, vperm);
48555
48556 l = gen_reg_rtx (V32QImode);
48557 op = gen_lowpart (V32QImode, d->op0);
48558 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48559
48560 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48561 vperm = force_reg (V32QImode, vperm);
48562
48563 h = gen_reg_rtx (V32QImode);
48564 op = gen_lowpart (V32QImode, d->op1);
48565 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48566
48567 ior = gen_reg_rtx (V32QImode);
48568 emit_insn (gen_iorv32qi3 (ior, l, h));
48569
48570 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48571 op = gen_reg_rtx (V4DImode);
48572 ior = gen_lowpart (V4DImode, ior);
48573 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48574 const1_rtx, GEN_INT (3)));
48575 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48576
48577 return true;
48578 }
48579
48580 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48581 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48582 with two "and" and "pack" or two "shift" and "pack" insns. We should
48583 have already failed all two instruction sequences. */
48584
48585 static bool
48586 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48587 {
48588 rtx op, dop0, dop1, t, rperm[16];
48589 unsigned i, odd, c, s, nelt = d->nelt;
48590 bool end_perm = false;
48591 machine_mode half_mode;
48592 rtx (*gen_and) (rtx, rtx, rtx);
48593 rtx (*gen_pack) (rtx, rtx, rtx);
48594 rtx (*gen_shift) (rtx, rtx, rtx);
48595
48596 if (d->one_operand_p)
48597 return false;
48598
48599 switch (d->vmode)
48600 {
48601 case V8HImode:
48602 /* Required for "pack". */
48603 if (!TARGET_SSE4_1)
48604 return false;
48605 c = 0xffff;
48606 s = 16;
48607 half_mode = V4SImode;
48608 gen_and = gen_andv4si3;
48609 gen_pack = gen_sse4_1_packusdw;
48610 gen_shift = gen_lshrv4si3;
48611 break;
48612 case V16QImode:
48613 /* No check as all instructions are SSE2. */
48614 c = 0xff;
48615 s = 8;
48616 half_mode = V8HImode;
48617 gen_and = gen_andv8hi3;
48618 gen_pack = gen_sse2_packuswb;
48619 gen_shift = gen_lshrv8hi3;
48620 break;
48621 case V16HImode:
48622 if (!TARGET_AVX2)
48623 return false;
48624 c = 0xffff;
48625 s = 16;
48626 half_mode = V8SImode;
48627 gen_and = gen_andv8si3;
48628 gen_pack = gen_avx2_packusdw;
48629 gen_shift = gen_lshrv8si3;
48630 end_perm = true;
48631 break;
48632 case V32QImode:
48633 if (!TARGET_AVX2)
48634 return false;
48635 c = 0xff;
48636 s = 8;
48637 half_mode = V16HImode;
48638 gen_and = gen_andv16hi3;
48639 gen_pack = gen_avx2_packuswb;
48640 gen_shift = gen_lshrv16hi3;
48641 end_perm = true;
48642 break;
48643 default:
48644 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48645 general shuffles. */
48646 return false;
48647 }
48648
48649 /* Check that permutation is even or odd. */
48650 odd = d->perm[0];
48651 if (odd > 1)
48652 return false;
48653
48654 for (i = 1; i < nelt; ++i)
48655 if (d->perm[i] != 2 * i + odd)
48656 return false;
48657
48658 if (d->testing_p)
48659 return true;
48660
48661 dop0 = gen_reg_rtx (half_mode);
48662 dop1 = gen_reg_rtx (half_mode);
48663 if (odd == 0)
48664 {
48665 for (i = 0; i < nelt / 2; i++)
48666 rperm[i] = GEN_INT (c);
48667 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48668 t = force_reg (half_mode, t);
48669 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48670 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48671 }
48672 else
48673 {
48674 emit_insn (gen_shift (dop0,
48675 gen_lowpart (half_mode, d->op0),
48676 GEN_INT (s)));
48677 emit_insn (gen_shift (dop1,
48678 gen_lowpart (half_mode, d->op1),
48679 GEN_INT (s)));
48680 }
48681 /* In AVX2 for 256 bit case we need to permute pack result. */
48682 if (TARGET_AVX2 && end_perm)
48683 {
48684 op = gen_reg_rtx (d->vmode);
48685 t = gen_reg_rtx (V4DImode);
48686 emit_insn (gen_pack (op, dop0, dop1));
48687 emit_insn (gen_avx2_permv4di_1 (t,
48688 gen_lowpart (V4DImode, op),
48689 const0_rtx,
48690 const2_rtx,
48691 const1_rtx,
48692 GEN_INT (3)));
48693 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48694 }
48695 else
48696 emit_insn (gen_pack (d->target, dop0, dop1));
48697
48698 return true;
48699 }
48700
48701 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48702 and extract-odd permutations. */
48703
48704 static bool
48705 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48706 {
48707 rtx t1, t2, t3, t4, t5;
48708
48709 switch (d->vmode)
48710 {
48711 case V4DFmode:
48712 if (d->testing_p)
48713 break;
48714 t1 = gen_reg_rtx (V4DFmode);
48715 t2 = gen_reg_rtx (V4DFmode);
48716
48717 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48718 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48719 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48720
48721 /* Now an unpck[lh]pd will produce the result required. */
48722 if (odd)
48723 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48724 else
48725 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48726 emit_insn (t3);
48727 break;
48728
48729 case V8SFmode:
48730 {
48731 int mask = odd ? 0xdd : 0x88;
48732
48733 if (d->testing_p)
48734 break;
48735 t1 = gen_reg_rtx (V8SFmode);
48736 t2 = gen_reg_rtx (V8SFmode);
48737 t3 = gen_reg_rtx (V8SFmode);
48738
48739 /* Shuffle within the 128-bit lanes to produce:
48740 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48741 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48742 GEN_INT (mask)));
48743
48744 /* Shuffle the lanes around to produce:
48745 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48746 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48747 GEN_INT (0x3)));
48748
48749 /* Shuffle within the 128-bit lanes to produce:
48750 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48751 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48752
48753 /* Shuffle within the 128-bit lanes to produce:
48754 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48755 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48756
48757 /* Shuffle the lanes around to produce:
48758 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48759 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48760 GEN_INT (0x20)));
48761 }
48762 break;
48763
48764 case V2DFmode:
48765 case V4SFmode:
48766 case V2DImode:
48767 case V4SImode:
48768 /* These are always directly implementable by expand_vec_perm_1. */
48769 gcc_unreachable ();
48770
48771 case V8HImode:
48772 if (TARGET_SSE4_1)
48773 return expand_vec_perm_even_odd_pack (d);
48774 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48775 return expand_vec_perm_pshufb2 (d);
48776 else
48777 {
48778 if (d->testing_p)
48779 break;
48780 /* We need 2*log2(N)-1 operations to achieve odd/even
48781 with interleave. */
48782 t1 = gen_reg_rtx (V8HImode);
48783 t2 = gen_reg_rtx (V8HImode);
48784 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48785 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48786 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48787 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48788 if (odd)
48789 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48790 else
48791 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48792 emit_insn (t3);
48793 }
48794 break;
48795
48796 case V16QImode:
48797 return expand_vec_perm_even_odd_pack (d);
48798
48799 case V16HImode:
48800 case V32QImode:
48801 return expand_vec_perm_even_odd_pack (d);
48802
48803 case V4DImode:
48804 if (!TARGET_AVX2)
48805 {
48806 struct expand_vec_perm_d d_copy = *d;
48807 d_copy.vmode = V4DFmode;
48808 if (d->testing_p)
48809 d_copy.target = gen_lowpart (V4DFmode, d->target);
48810 else
48811 d_copy.target = gen_reg_rtx (V4DFmode);
48812 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48813 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48814 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48815 {
48816 if (!d->testing_p)
48817 emit_move_insn (d->target,
48818 gen_lowpart (V4DImode, d_copy.target));
48819 return true;
48820 }
48821 return false;
48822 }
48823
48824 if (d->testing_p)
48825 break;
48826
48827 t1 = gen_reg_rtx (V4DImode);
48828 t2 = gen_reg_rtx (V4DImode);
48829
48830 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48831 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48832 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48833
48834 /* Now an vpunpck[lh]qdq will produce the result required. */
48835 if (odd)
48836 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48837 else
48838 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48839 emit_insn (t3);
48840 break;
48841
48842 case V8SImode:
48843 if (!TARGET_AVX2)
48844 {
48845 struct expand_vec_perm_d d_copy = *d;
48846 d_copy.vmode = V8SFmode;
48847 if (d->testing_p)
48848 d_copy.target = gen_lowpart (V8SFmode, d->target);
48849 else
48850 d_copy.target = gen_reg_rtx (V8SFmode);
48851 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48852 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48853 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48854 {
48855 if (!d->testing_p)
48856 emit_move_insn (d->target,
48857 gen_lowpart (V8SImode, d_copy.target));
48858 return true;
48859 }
48860 return false;
48861 }
48862
48863 if (d->testing_p)
48864 break;
48865
48866 t1 = gen_reg_rtx (V8SImode);
48867 t2 = gen_reg_rtx (V8SImode);
48868 t3 = gen_reg_rtx (V4DImode);
48869 t4 = gen_reg_rtx (V4DImode);
48870 t5 = gen_reg_rtx (V4DImode);
48871
48872 /* Shuffle the lanes around into
48873 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48874 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48875 gen_lowpart (V4DImode, d->op1),
48876 GEN_INT (0x20)));
48877 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48878 gen_lowpart (V4DImode, d->op1),
48879 GEN_INT (0x31)));
48880
48881 /* Swap the 2nd and 3rd position in each lane into
48882 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48883 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48884 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48885 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48886 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48887
48888 /* Now an vpunpck[lh]qdq will produce
48889 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48890 if (odd)
48891 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48892 gen_lowpart (V4DImode, t2));
48893 else
48894 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48895 gen_lowpart (V4DImode, t2));
48896 emit_insn (t3);
48897 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48898 break;
48899
48900 default:
48901 gcc_unreachable ();
48902 }
48903
48904 return true;
48905 }
48906
48907 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48908 extract-even and extract-odd permutations. */
48909
48910 static bool
48911 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48912 {
48913 unsigned i, odd, nelt = d->nelt;
48914
48915 odd = d->perm[0];
48916 if (odd != 0 && odd != 1)
48917 return false;
48918
48919 for (i = 1; i < nelt; ++i)
48920 if (d->perm[i] != 2 * i + odd)
48921 return false;
48922
48923 return expand_vec_perm_even_odd_1 (d, odd);
48924 }
48925
48926 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48927 permutations. We assume that expand_vec_perm_1 has already failed. */
48928
48929 static bool
48930 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48931 {
48932 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48933 machine_mode vmode = d->vmode;
48934 unsigned char perm2[4];
48935 rtx op0 = d->op0, dest;
48936 bool ok;
48937
48938 switch (vmode)
48939 {
48940 case V4DFmode:
48941 case V8SFmode:
48942 /* These are special-cased in sse.md so that we can optionally
48943 use the vbroadcast instruction. They expand to two insns
48944 if the input happens to be in a register. */
48945 gcc_unreachable ();
48946
48947 case V2DFmode:
48948 case V2DImode:
48949 case V4SFmode:
48950 case V4SImode:
48951 /* These are always implementable using standard shuffle patterns. */
48952 gcc_unreachable ();
48953
48954 case V8HImode:
48955 case V16QImode:
48956 /* These can be implemented via interleave. We save one insn by
48957 stopping once we have promoted to V4SImode and then use pshufd. */
48958 if (d->testing_p)
48959 return true;
48960 do
48961 {
48962 rtx dest;
48963 rtx (*gen) (rtx, rtx, rtx)
48964 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48965 : gen_vec_interleave_lowv8hi;
48966
48967 if (elt >= nelt2)
48968 {
48969 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48970 : gen_vec_interleave_highv8hi;
48971 elt -= nelt2;
48972 }
48973 nelt2 /= 2;
48974
48975 dest = gen_reg_rtx (vmode);
48976 emit_insn (gen (dest, op0, op0));
48977 vmode = get_mode_wider_vector (vmode);
48978 op0 = gen_lowpart (vmode, dest);
48979 }
48980 while (vmode != V4SImode);
48981
48982 memset (perm2, elt, 4);
48983 dest = gen_reg_rtx (V4SImode);
48984 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48985 gcc_assert (ok);
48986 if (!d->testing_p)
48987 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48988 return true;
48989
48990 case V64QImode:
48991 case V32QImode:
48992 case V16HImode:
48993 case V8SImode:
48994 case V4DImode:
48995 /* For AVX2 broadcasts of the first element vpbroadcast* or
48996 vpermq should be used by expand_vec_perm_1. */
48997 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48998 return false;
48999
49000 default:
49001 gcc_unreachable ();
49002 }
49003 }
49004
49005 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49006 broadcast permutations. */
49007
49008 static bool
49009 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49010 {
49011 unsigned i, elt, nelt = d->nelt;
49012
49013 if (!d->one_operand_p)
49014 return false;
49015
49016 elt = d->perm[0];
49017 for (i = 1; i < nelt; ++i)
49018 if (d->perm[i] != elt)
49019 return false;
49020
49021 return expand_vec_perm_broadcast_1 (d);
49022 }
49023
49024 /* Implement arbitrary permutations of two V64QImode operands
49025 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49026 static bool
49027 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49028 {
49029 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49030 return false;
49031
49032 if (d->testing_p)
49033 return true;
49034
49035 struct expand_vec_perm_d ds[2];
49036 rtx rperm[128], vperm, target0, target1;
49037 unsigned int i, nelt;
49038 machine_mode vmode;
49039
49040 nelt = d->nelt;
49041 vmode = V64QImode;
49042
49043 for (i = 0; i < 2; i++)
49044 {
49045 ds[i] = *d;
49046 ds[i].vmode = V32HImode;
49047 ds[i].nelt = 32;
49048 ds[i].target = gen_reg_rtx (V32HImode);
49049 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49050 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49051 }
49052
49053 /* Prepare permutations such that the first one takes care of
49054 putting the even bytes into the right positions or one higher
49055 positions (ds[0]) and the second one takes care of
49056 putting the odd bytes into the right positions or one below
49057 (ds[1]). */
49058
49059 for (i = 0; i < nelt; i++)
49060 {
49061 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49062 if (i & 1)
49063 {
49064 rperm[i] = constm1_rtx;
49065 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49066 }
49067 else
49068 {
49069 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49070 rperm[i + 64] = constm1_rtx;
49071 }
49072 }
49073
49074 bool ok = expand_vec_perm_1 (&ds[0]);
49075 gcc_assert (ok);
49076 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49077
49078 ok = expand_vec_perm_1 (&ds[1]);
49079 gcc_assert (ok);
49080 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49081
49082 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49083 vperm = force_reg (vmode, vperm);
49084 target0 = gen_reg_rtx (V64QImode);
49085 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49086
49087 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49088 vperm = force_reg (vmode, vperm);
49089 target1 = gen_reg_rtx (V64QImode);
49090 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49091
49092 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49093 return true;
49094 }
49095
49096 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49097 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49098 all the shorter instruction sequences. */
49099
49100 static bool
49101 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49102 {
49103 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49104 unsigned int i, nelt, eltsz;
49105 bool used[4];
49106
49107 if (!TARGET_AVX2
49108 || d->one_operand_p
49109 || (d->vmode != V32QImode && d->vmode != V16HImode))
49110 return false;
49111
49112 if (d->testing_p)
49113 return true;
49114
49115 nelt = d->nelt;
49116 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49117
49118 /* Generate 4 permutation masks. If the required element is within
49119 the same lane, it is shuffled in. If the required element from the
49120 other lane, force a zero by setting bit 7 in the permutation mask.
49121 In the other mask the mask has non-negative elements if element
49122 is requested from the other lane, but also moved to the other lane,
49123 so that the result of vpshufb can have the two V2TImode halves
49124 swapped. */
49125 m128 = GEN_INT (-128);
49126 for (i = 0; i < 32; ++i)
49127 {
49128 rperm[0][i] = m128;
49129 rperm[1][i] = m128;
49130 rperm[2][i] = m128;
49131 rperm[3][i] = m128;
49132 }
49133 used[0] = false;
49134 used[1] = false;
49135 used[2] = false;
49136 used[3] = false;
49137 for (i = 0; i < nelt; ++i)
49138 {
49139 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49140 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49141 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49142
49143 for (j = 0; j < eltsz; ++j)
49144 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49145 used[which] = true;
49146 }
49147
49148 for (i = 0; i < 2; ++i)
49149 {
49150 if (!used[2 * i + 1])
49151 {
49152 h[i] = NULL_RTX;
49153 continue;
49154 }
49155 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49156 gen_rtvec_v (32, rperm[2 * i + 1]));
49157 vperm = force_reg (V32QImode, vperm);
49158 h[i] = gen_reg_rtx (V32QImode);
49159 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49160 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49161 }
49162
49163 /* Swap the 128-byte lanes of h[X]. */
49164 for (i = 0; i < 2; ++i)
49165 {
49166 if (h[i] == NULL_RTX)
49167 continue;
49168 op = gen_reg_rtx (V4DImode);
49169 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49170 const2_rtx, GEN_INT (3), const0_rtx,
49171 const1_rtx));
49172 h[i] = gen_lowpart (V32QImode, op);
49173 }
49174
49175 for (i = 0; i < 2; ++i)
49176 {
49177 if (!used[2 * i])
49178 {
49179 l[i] = NULL_RTX;
49180 continue;
49181 }
49182 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49183 vperm = force_reg (V32QImode, vperm);
49184 l[i] = gen_reg_rtx (V32QImode);
49185 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49186 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49187 }
49188
49189 for (i = 0; i < 2; ++i)
49190 {
49191 if (h[i] && l[i])
49192 {
49193 op = gen_reg_rtx (V32QImode);
49194 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49195 l[i] = op;
49196 }
49197 else if (h[i])
49198 l[i] = h[i];
49199 }
49200
49201 gcc_assert (l[0] && l[1]);
49202 op = d->target;
49203 if (d->vmode != V32QImode)
49204 op = gen_reg_rtx (V32QImode);
49205 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49206 if (op != d->target)
49207 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49208 return true;
49209 }
49210
49211 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49212 With all of the interface bits taken care of, perform the expansion
49213 in D and return true on success. */
49214
49215 static bool
49216 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49217 {
49218 /* Try a single instruction expansion. */
49219 if (expand_vec_perm_1 (d))
49220 return true;
49221
49222 /* Try sequences of two instructions. */
49223
49224 if (expand_vec_perm_pshuflw_pshufhw (d))
49225 return true;
49226
49227 if (expand_vec_perm_palignr (d, false))
49228 return true;
49229
49230 if (expand_vec_perm_interleave2 (d))
49231 return true;
49232
49233 if (expand_vec_perm_broadcast (d))
49234 return true;
49235
49236 if (expand_vec_perm_vpermq_perm_1 (d))
49237 return true;
49238
49239 if (expand_vec_perm_vperm2f128 (d))
49240 return true;
49241
49242 if (expand_vec_perm_pblendv (d))
49243 return true;
49244
49245 /* Try sequences of three instructions. */
49246
49247 if (expand_vec_perm_even_odd_pack (d))
49248 return true;
49249
49250 if (expand_vec_perm_2vperm2f128_vshuf (d))
49251 return true;
49252
49253 if (expand_vec_perm_pshufb2 (d))
49254 return true;
49255
49256 if (expand_vec_perm_interleave3 (d))
49257 return true;
49258
49259 if (expand_vec_perm_vperm2f128_vblend (d))
49260 return true;
49261
49262 /* Try sequences of four instructions. */
49263
49264 if (expand_vec_perm_vpshufb2_vpermq (d))
49265 return true;
49266
49267 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49268 return true;
49269
49270 if (expand_vec_perm_vpermi2_vpshub2 (d))
49271 return true;
49272
49273 /* ??? Look for narrow permutations whose element orderings would
49274 allow the promotion to a wider mode. */
49275
49276 /* ??? Look for sequences of interleave or a wider permute that place
49277 the data into the correct lanes for a half-vector shuffle like
49278 pshuf[lh]w or vpermilps. */
49279
49280 /* ??? Look for sequences of interleave that produce the desired results.
49281 The combinatorics of punpck[lh] get pretty ugly... */
49282
49283 if (expand_vec_perm_even_odd (d))
49284 return true;
49285
49286 /* Even longer sequences. */
49287 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49288 return true;
49289
49290 return false;
49291 }
49292
49293 /* If a permutation only uses one operand, make it clear. Returns true
49294 if the permutation references both operands. */
49295
49296 static bool
49297 canonicalize_perm (struct expand_vec_perm_d *d)
49298 {
49299 int i, which, nelt = d->nelt;
49300
49301 for (i = which = 0; i < nelt; ++i)
49302 which |= (d->perm[i] < nelt ? 1 : 2);
49303
49304 d->one_operand_p = true;
49305 switch (which)
49306 {
49307 default:
49308 gcc_unreachable();
49309
49310 case 3:
49311 if (!rtx_equal_p (d->op0, d->op1))
49312 {
49313 d->one_operand_p = false;
49314 break;
49315 }
49316 /* The elements of PERM do not suggest that only the first operand
49317 is used, but both operands are identical. Allow easier matching
49318 of the permutation by folding the permutation into the single
49319 input vector. */
49320 /* FALLTHRU */
49321
49322 case 2:
49323 for (i = 0; i < nelt; ++i)
49324 d->perm[i] &= nelt - 1;
49325 d->op0 = d->op1;
49326 break;
49327
49328 case 1:
49329 d->op1 = d->op0;
49330 break;
49331 }
49332
49333 return (which == 3);
49334 }
49335
49336 bool
49337 ix86_expand_vec_perm_const (rtx operands[4])
49338 {
49339 struct expand_vec_perm_d d;
49340 unsigned char perm[MAX_VECT_LEN];
49341 int i, nelt;
49342 bool two_args;
49343 rtx sel;
49344
49345 d.target = operands[0];
49346 d.op0 = operands[1];
49347 d.op1 = operands[2];
49348 sel = operands[3];
49349
49350 d.vmode = GET_MODE (d.target);
49351 gcc_assert (VECTOR_MODE_P (d.vmode));
49352 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49353 d.testing_p = false;
49354
49355 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49356 gcc_assert (XVECLEN (sel, 0) == nelt);
49357 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49358
49359 for (i = 0; i < nelt; ++i)
49360 {
49361 rtx e = XVECEXP (sel, 0, i);
49362 int ei = INTVAL (e) & (2 * nelt - 1);
49363 d.perm[i] = ei;
49364 perm[i] = ei;
49365 }
49366
49367 two_args = canonicalize_perm (&d);
49368
49369 if (ix86_expand_vec_perm_const_1 (&d))
49370 return true;
49371
49372 /* If the selector says both arguments are needed, but the operands are the
49373 same, the above tried to expand with one_operand_p and flattened selector.
49374 If that didn't work, retry without one_operand_p; we succeeded with that
49375 during testing. */
49376 if (two_args && d.one_operand_p)
49377 {
49378 d.one_operand_p = false;
49379 memcpy (d.perm, perm, sizeof (perm));
49380 return ix86_expand_vec_perm_const_1 (&d);
49381 }
49382
49383 return false;
49384 }
49385
49386 /* Implement targetm.vectorize.vec_perm_const_ok. */
49387
49388 static bool
49389 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49390 const unsigned char *sel)
49391 {
49392 struct expand_vec_perm_d d;
49393 unsigned int i, nelt, which;
49394 bool ret;
49395
49396 d.vmode = vmode;
49397 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49398 d.testing_p = true;
49399
49400 /* Given sufficient ISA support we can just return true here
49401 for selected vector modes. */
49402 switch (d.vmode)
49403 {
49404 case V16SFmode:
49405 case V16SImode:
49406 case V8DImode:
49407 case V8DFmode:
49408 if (TARGET_AVX512F)
49409 /* All implementable with a single vpermi2 insn. */
49410 return true;
49411 break;
49412 case V32HImode:
49413 if (TARGET_AVX512BW)
49414 /* All implementable with a single vpermi2 insn. */
49415 return true;
49416 break;
49417 case V64QImode:
49418 if (TARGET_AVX512BW)
49419 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49420 return true;
49421 break;
49422 case V8SImode:
49423 case V8SFmode:
49424 case V4DFmode:
49425 case V4DImode:
49426 if (TARGET_AVX512VL)
49427 /* All implementable with a single vpermi2 insn. */
49428 return true;
49429 break;
49430 case V16HImode:
49431 if (TARGET_AVX2)
49432 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49433 return true;
49434 break;
49435 case V32QImode:
49436 if (TARGET_AVX2)
49437 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49438 return true;
49439 break;
49440 case V4SImode:
49441 case V4SFmode:
49442 case V8HImode:
49443 case V16QImode:
49444 /* All implementable with a single vpperm insn. */
49445 if (TARGET_XOP)
49446 return true;
49447 /* All implementable with 2 pshufb + 1 ior. */
49448 if (TARGET_SSSE3)
49449 return true;
49450 break;
49451 case V2DImode:
49452 case V2DFmode:
49453 /* All implementable with shufpd or unpck[lh]pd. */
49454 return true;
49455 default:
49456 return false;
49457 }
49458
49459 /* Extract the values from the vector CST into the permutation
49460 array in D. */
49461 memcpy (d.perm, sel, nelt);
49462 for (i = which = 0; i < nelt; ++i)
49463 {
49464 unsigned char e = d.perm[i];
49465 gcc_assert (e < 2 * nelt);
49466 which |= (e < nelt ? 1 : 2);
49467 }
49468
49469 /* For all elements from second vector, fold the elements to first. */
49470 if (which == 2)
49471 for (i = 0; i < nelt; ++i)
49472 d.perm[i] -= nelt;
49473
49474 /* Check whether the mask can be applied to the vector type. */
49475 d.one_operand_p = (which != 3);
49476
49477 /* Implementable with shufps or pshufd. */
49478 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49479 return true;
49480
49481 /* Otherwise we have to go through the motions and see if we can
49482 figure out how to generate the requested permutation. */
49483 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49484 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49485 if (!d.one_operand_p)
49486 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49487
49488 start_sequence ();
49489 ret = ix86_expand_vec_perm_const_1 (&d);
49490 end_sequence ();
49491
49492 return ret;
49493 }
49494
49495 void
49496 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49497 {
49498 struct expand_vec_perm_d d;
49499 unsigned i, nelt;
49500
49501 d.target = targ;
49502 d.op0 = op0;
49503 d.op1 = op1;
49504 d.vmode = GET_MODE (targ);
49505 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49506 d.one_operand_p = false;
49507 d.testing_p = false;
49508
49509 for (i = 0; i < nelt; ++i)
49510 d.perm[i] = i * 2 + odd;
49511
49512 /* We'll either be able to implement the permutation directly... */
49513 if (expand_vec_perm_1 (&d))
49514 return;
49515
49516 /* ... or we use the special-case patterns. */
49517 expand_vec_perm_even_odd_1 (&d, odd);
49518 }
49519
49520 static void
49521 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49522 {
49523 struct expand_vec_perm_d d;
49524 unsigned i, nelt, base;
49525 bool ok;
49526
49527 d.target = targ;
49528 d.op0 = op0;
49529 d.op1 = op1;
49530 d.vmode = GET_MODE (targ);
49531 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49532 d.one_operand_p = false;
49533 d.testing_p = false;
49534
49535 base = high_p ? nelt / 2 : 0;
49536 for (i = 0; i < nelt / 2; ++i)
49537 {
49538 d.perm[i * 2] = i + base;
49539 d.perm[i * 2 + 1] = i + base + nelt;
49540 }
49541
49542 /* Note that for AVX this isn't one instruction. */
49543 ok = ix86_expand_vec_perm_const_1 (&d);
49544 gcc_assert (ok);
49545 }
49546
49547
49548 /* Expand a vector operation CODE for a V*QImode in terms of the
49549 same operation on V*HImode. */
49550
49551 void
49552 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49553 {
49554 machine_mode qimode = GET_MODE (dest);
49555 machine_mode himode;
49556 rtx (*gen_il) (rtx, rtx, rtx);
49557 rtx (*gen_ih) (rtx, rtx, rtx);
49558 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49559 struct expand_vec_perm_d d;
49560 bool ok, full_interleave;
49561 bool uns_p = false;
49562 int i;
49563
49564 switch (qimode)
49565 {
49566 case V16QImode:
49567 himode = V8HImode;
49568 gen_il = gen_vec_interleave_lowv16qi;
49569 gen_ih = gen_vec_interleave_highv16qi;
49570 break;
49571 case V32QImode:
49572 himode = V16HImode;
49573 gen_il = gen_avx2_interleave_lowv32qi;
49574 gen_ih = gen_avx2_interleave_highv32qi;
49575 break;
49576 case V64QImode:
49577 himode = V32HImode;
49578 gen_il = gen_avx512bw_interleave_lowv64qi;
49579 gen_ih = gen_avx512bw_interleave_highv64qi;
49580 break;
49581 default:
49582 gcc_unreachable ();
49583 }
49584
49585 op2_l = op2_h = op2;
49586 switch (code)
49587 {
49588 case MULT:
49589 /* Unpack data such that we've got a source byte in each low byte of
49590 each word. We don't care what goes into the high byte of each word.
49591 Rather than trying to get zero in there, most convenient is to let
49592 it be a copy of the low byte. */
49593 op2_l = gen_reg_rtx (qimode);
49594 op2_h = gen_reg_rtx (qimode);
49595 emit_insn (gen_il (op2_l, op2, op2));
49596 emit_insn (gen_ih (op2_h, op2, op2));
49597 /* FALLTHRU */
49598
49599 op1_l = gen_reg_rtx (qimode);
49600 op1_h = gen_reg_rtx (qimode);
49601 emit_insn (gen_il (op1_l, op1, op1));
49602 emit_insn (gen_ih (op1_h, op1, op1));
49603 full_interleave = qimode == V16QImode;
49604 break;
49605
49606 case ASHIFT:
49607 case LSHIFTRT:
49608 uns_p = true;
49609 /* FALLTHRU */
49610 case ASHIFTRT:
49611 op1_l = gen_reg_rtx (himode);
49612 op1_h = gen_reg_rtx (himode);
49613 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49614 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49615 full_interleave = true;
49616 break;
49617 default:
49618 gcc_unreachable ();
49619 }
49620
49621 /* Perform the operation. */
49622 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49623 1, OPTAB_DIRECT);
49624 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49625 1, OPTAB_DIRECT);
49626 gcc_assert (res_l && res_h);
49627
49628 /* Merge the data back into the right place. */
49629 d.target = dest;
49630 d.op0 = gen_lowpart (qimode, res_l);
49631 d.op1 = gen_lowpart (qimode, res_h);
49632 d.vmode = qimode;
49633 d.nelt = GET_MODE_NUNITS (qimode);
49634 d.one_operand_p = false;
49635 d.testing_p = false;
49636
49637 if (full_interleave)
49638 {
49639 /* For SSE2, we used an full interleave, so the desired
49640 results are in the even elements. */
49641 for (i = 0; i < 64; ++i)
49642 d.perm[i] = i * 2;
49643 }
49644 else
49645 {
49646 /* For AVX, the interleave used above was not cross-lane. So the
49647 extraction is evens but with the second and third quarter swapped.
49648 Happily, that is even one insn shorter than even extraction. */
49649 for (i = 0; i < 64; ++i)
49650 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49651 }
49652
49653 ok = ix86_expand_vec_perm_const_1 (&d);
49654 gcc_assert (ok);
49655
49656 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49657 gen_rtx_fmt_ee (code, qimode, op1, op2));
49658 }
49659
49660 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49661 if op is CONST_VECTOR with all odd elements equal to their
49662 preceding element. */
49663
49664 static bool
49665 const_vector_equal_evenodd_p (rtx op)
49666 {
49667 machine_mode mode = GET_MODE (op);
49668 int i, nunits = GET_MODE_NUNITS (mode);
49669 if (GET_CODE (op) != CONST_VECTOR
49670 || nunits != CONST_VECTOR_NUNITS (op))
49671 return false;
49672 for (i = 0; i < nunits; i += 2)
49673 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49674 return false;
49675 return true;
49676 }
49677
49678 void
49679 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49680 bool uns_p, bool odd_p)
49681 {
49682 machine_mode mode = GET_MODE (op1);
49683 machine_mode wmode = GET_MODE (dest);
49684 rtx x;
49685 rtx orig_op1 = op1, orig_op2 = op2;
49686
49687 if (!nonimmediate_operand (op1, mode))
49688 op1 = force_reg (mode, op1);
49689 if (!nonimmediate_operand (op2, mode))
49690 op2 = force_reg (mode, op2);
49691
49692 /* We only play even/odd games with vectors of SImode. */
49693 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49694
49695 /* If we're looking for the odd results, shift those members down to
49696 the even slots. For some cpus this is faster than a PSHUFD. */
49697 if (odd_p)
49698 {
49699 /* For XOP use vpmacsdqh, but only for smult, as it is only
49700 signed. */
49701 if (TARGET_XOP && mode == V4SImode && !uns_p)
49702 {
49703 x = force_reg (wmode, CONST0_RTX (wmode));
49704 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49705 return;
49706 }
49707
49708 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49709 if (!const_vector_equal_evenodd_p (orig_op1))
49710 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49711 x, NULL, 1, OPTAB_DIRECT);
49712 if (!const_vector_equal_evenodd_p (orig_op2))
49713 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49714 x, NULL, 1, OPTAB_DIRECT);
49715 op1 = gen_lowpart (mode, op1);
49716 op2 = gen_lowpart (mode, op2);
49717 }
49718
49719 if (mode == V16SImode)
49720 {
49721 if (uns_p)
49722 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49723 else
49724 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49725 }
49726 else if (mode == V8SImode)
49727 {
49728 if (uns_p)
49729 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49730 else
49731 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49732 }
49733 else if (uns_p)
49734 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49735 else if (TARGET_SSE4_1)
49736 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49737 else
49738 {
49739 rtx s1, s2, t0, t1, t2;
49740
49741 /* The easiest way to implement this without PMULDQ is to go through
49742 the motions as if we are performing a full 64-bit multiply. With
49743 the exception that we need to do less shuffling of the elements. */
49744
49745 /* Compute the sign-extension, aka highparts, of the two operands. */
49746 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49747 op1, pc_rtx, pc_rtx);
49748 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49749 op2, pc_rtx, pc_rtx);
49750
49751 /* Multiply LO(A) * HI(B), and vice-versa. */
49752 t1 = gen_reg_rtx (wmode);
49753 t2 = gen_reg_rtx (wmode);
49754 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49755 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49756
49757 /* Multiply LO(A) * LO(B). */
49758 t0 = gen_reg_rtx (wmode);
49759 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49760
49761 /* Combine and shift the highparts into place. */
49762 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49763 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49764 1, OPTAB_DIRECT);
49765
49766 /* Combine high and low parts. */
49767 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49768 return;
49769 }
49770 emit_insn (x);
49771 }
49772
49773 void
49774 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49775 bool uns_p, bool high_p)
49776 {
49777 machine_mode wmode = GET_MODE (dest);
49778 machine_mode mode = GET_MODE (op1);
49779 rtx t1, t2, t3, t4, mask;
49780
49781 switch (mode)
49782 {
49783 case V4SImode:
49784 t1 = gen_reg_rtx (mode);
49785 t2 = gen_reg_rtx (mode);
49786 if (TARGET_XOP && !uns_p)
49787 {
49788 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49789 shuffle the elements once so that all elements are in the right
49790 place for immediate use: { A C B D }. */
49791 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49792 const1_rtx, GEN_INT (3)));
49793 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49794 const1_rtx, GEN_INT (3)));
49795 }
49796 else
49797 {
49798 /* Put the elements into place for the multiply. */
49799 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49800 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49801 high_p = false;
49802 }
49803 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49804 break;
49805
49806 case V8SImode:
49807 /* Shuffle the elements between the lanes. After this we
49808 have { A B E F | C D G H } for each operand. */
49809 t1 = gen_reg_rtx (V4DImode);
49810 t2 = gen_reg_rtx (V4DImode);
49811 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49812 const0_rtx, const2_rtx,
49813 const1_rtx, GEN_INT (3)));
49814 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49815 const0_rtx, const2_rtx,
49816 const1_rtx, GEN_INT (3)));
49817
49818 /* Shuffle the elements within the lanes. After this we
49819 have { A A B B | C C D D } or { E E F F | G G H H }. */
49820 t3 = gen_reg_rtx (V8SImode);
49821 t4 = gen_reg_rtx (V8SImode);
49822 mask = GEN_INT (high_p
49823 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49824 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49825 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49826 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49827
49828 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49829 break;
49830
49831 case V8HImode:
49832 case V16HImode:
49833 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49834 uns_p, OPTAB_DIRECT);
49835 t2 = expand_binop (mode,
49836 uns_p ? umul_highpart_optab : smul_highpart_optab,
49837 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49838 gcc_assert (t1 && t2);
49839
49840 t3 = gen_reg_rtx (mode);
49841 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49842 emit_move_insn (dest, gen_lowpart (wmode, t3));
49843 break;
49844
49845 case V16QImode:
49846 case V32QImode:
49847 case V32HImode:
49848 case V16SImode:
49849 case V64QImode:
49850 t1 = gen_reg_rtx (wmode);
49851 t2 = gen_reg_rtx (wmode);
49852 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49853 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49854
49855 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49856 break;
49857
49858 default:
49859 gcc_unreachable ();
49860 }
49861 }
49862
49863 void
49864 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49865 {
49866 rtx res_1, res_2, res_3, res_4;
49867
49868 res_1 = gen_reg_rtx (V4SImode);
49869 res_2 = gen_reg_rtx (V4SImode);
49870 res_3 = gen_reg_rtx (V2DImode);
49871 res_4 = gen_reg_rtx (V2DImode);
49872 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49873 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49874
49875 /* Move the results in element 2 down to element 1; we don't care
49876 what goes in elements 2 and 3. Then we can merge the parts
49877 back together with an interleave.
49878
49879 Note that two other sequences were tried:
49880 (1) Use interleaves at the start instead of psrldq, which allows
49881 us to use a single shufps to merge things back at the end.
49882 (2) Use shufps here to combine the two vectors, then pshufd to
49883 put the elements in the correct order.
49884 In both cases the cost of the reformatting stall was too high
49885 and the overall sequence slower. */
49886
49887 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49888 const0_rtx, const2_rtx,
49889 const0_rtx, const0_rtx));
49890 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49891 const0_rtx, const2_rtx,
49892 const0_rtx, const0_rtx));
49893 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49894
49895 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49896 }
49897
49898 void
49899 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49900 {
49901 machine_mode mode = GET_MODE (op0);
49902 rtx t1, t2, t3, t4, t5, t6;
49903
49904 if (TARGET_AVX512DQ && mode == V8DImode)
49905 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49906 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49907 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49908 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49909 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49910 else if (TARGET_XOP && mode == V2DImode)
49911 {
49912 /* op1: A,B,C,D, op2: E,F,G,H */
49913 op1 = gen_lowpart (V4SImode, op1);
49914 op2 = gen_lowpart (V4SImode, op2);
49915
49916 t1 = gen_reg_rtx (V4SImode);
49917 t2 = gen_reg_rtx (V4SImode);
49918 t3 = gen_reg_rtx (V2DImode);
49919 t4 = gen_reg_rtx (V2DImode);
49920
49921 /* t1: B,A,D,C */
49922 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49923 GEN_INT (1),
49924 GEN_INT (0),
49925 GEN_INT (3),
49926 GEN_INT (2)));
49927
49928 /* t2: (B*E),(A*F),(D*G),(C*H) */
49929 emit_insn (gen_mulv4si3 (t2, t1, op2));
49930
49931 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49932 emit_insn (gen_xop_phadddq (t3, t2));
49933
49934 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49935 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49936
49937 /* Multiply lower parts and add all */
49938 t5 = gen_reg_rtx (V2DImode);
49939 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49940 gen_lowpart (V4SImode, op1),
49941 gen_lowpart (V4SImode, op2)));
49942 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49943
49944 }
49945 else
49946 {
49947 machine_mode nmode;
49948 rtx (*umul) (rtx, rtx, rtx);
49949
49950 if (mode == V2DImode)
49951 {
49952 umul = gen_vec_widen_umult_even_v4si;
49953 nmode = V4SImode;
49954 }
49955 else if (mode == V4DImode)
49956 {
49957 umul = gen_vec_widen_umult_even_v8si;
49958 nmode = V8SImode;
49959 }
49960 else if (mode == V8DImode)
49961 {
49962 umul = gen_vec_widen_umult_even_v16si;
49963 nmode = V16SImode;
49964 }
49965 else
49966 gcc_unreachable ();
49967
49968
49969 /* Multiply low parts. */
49970 t1 = gen_reg_rtx (mode);
49971 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49972
49973 /* Shift input vectors right 32 bits so we can multiply high parts. */
49974 t6 = GEN_INT (32);
49975 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49976 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49977
49978 /* Multiply high parts by low parts. */
49979 t4 = gen_reg_rtx (mode);
49980 t5 = gen_reg_rtx (mode);
49981 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49982 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49983
49984 /* Combine and shift the highparts back. */
49985 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49986 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49987
49988 /* Combine high and low parts. */
49989 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49990 }
49991
49992 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49993 gen_rtx_MULT (mode, op1, op2));
49994 }
49995
49996 /* Return 1 if control tansfer instruction INSN
49997 should be encoded with bnd prefix.
49998 If insn is NULL then return 1 when control
49999 transfer instructions should be prefixed with
50000 bnd by default for current function. */
50001
50002 bool
50003 ix86_bnd_prefixed_insn_p (rtx insn)
50004 {
50005 /* For call insns check special flag. */
50006 if (insn && CALL_P (insn))
50007 {
50008 rtx call = get_call_rtx_from (insn);
50009 if (call)
50010 return CALL_EXPR_WITH_BOUNDS_P (call);
50011 }
50012
50013 /* All other insns are prefixed only if function is instrumented. */
50014 return chkp_function_instrumented_p (current_function_decl);
50015 }
50016
50017 /* Calculate integer abs() using only SSE2 instructions. */
50018
50019 void
50020 ix86_expand_sse2_abs (rtx target, rtx input)
50021 {
50022 machine_mode mode = GET_MODE (target);
50023 rtx tmp0, tmp1, x;
50024
50025 switch (mode)
50026 {
50027 /* For 32-bit signed integer X, the best way to calculate the absolute
50028 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50029 case V4SImode:
50030 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50031 GEN_INT (GET_MODE_BITSIZE
50032 (GET_MODE_INNER (mode)) - 1),
50033 NULL, 0, OPTAB_DIRECT);
50034 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50035 NULL, 0, OPTAB_DIRECT);
50036 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50037 target, 0, OPTAB_DIRECT);
50038 break;
50039
50040 /* For 16-bit signed integer X, the best way to calculate the absolute
50041 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50042 case V8HImode:
50043 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50044
50045 x = expand_simple_binop (mode, SMAX, tmp0, input,
50046 target, 0, OPTAB_DIRECT);
50047 break;
50048
50049 /* For 8-bit signed integer X, the best way to calculate the absolute
50050 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50051 as SSE2 provides the PMINUB insn. */
50052 case V16QImode:
50053 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50054
50055 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50056 target, 0, OPTAB_DIRECT);
50057 break;
50058
50059 default:
50060 gcc_unreachable ();
50061 }
50062
50063 if (x != target)
50064 emit_move_insn (target, x);
50065 }
50066
50067 /* Expand an insert into a vector register through pinsr insn.
50068 Return true if successful. */
50069
50070 bool
50071 ix86_expand_pinsr (rtx *operands)
50072 {
50073 rtx dst = operands[0];
50074 rtx src = operands[3];
50075
50076 unsigned int size = INTVAL (operands[1]);
50077 unsigned int pos = INTVAL (operands[2]);
50078
50079 if (GET_CODE (dst) == SUBREG)
50080 {
50081 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50082 dst = SUBREG_REG (dst);
50083 }
50084
50085 if (GET_CODE (src) == SUBREG)
50086 src = SUBREG_REG (src);
50087
50088 switch (GET_MODE (dst))
50089 {
50090 case V16QImode:
50091 case V8HImode:
50092 case V4SImode:
50093 case V2DImode:
50094 {
50095 machine_mode srcmode, dstmode;
50096 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50097
50098 srcmode = mode_for_size (size, MODE_INT, 0);
50099
50100 switch (srcmode)
50101 {
50102 case QImode:
50103 if (!TARGET_SSE4_1)
50104 return false;
50105 dstmode = V16QImode;
50106 pinsr = gen_sse4_1_pinsrb;
50107 break;
50108
50109 case HImode:
50110 if (!TARGET_SSE2)
50111 return false;
50112 dstmode = V8HImode;
50113 pinsr = gen_sse2_pinsrw;
50114 break;
50115
50116 case SImode:
50117 if (!TARGET_SSE4_1)
50118 return false;
50119 dstmode = V4SImode;
50120 pinsr = gen_sse4_1_pinsrd;
50121 break;
50122
50123 case DImode:
50124 gcc_assert (TARGET_64BIT);
50125 if (!TARGET_SSE4_1)
50126 return false;
50127 dstmode = V2DImode;
50128 pinsr = gen_sse4_1_pinsrq;
50129 break;
50130
50131 default:
50132 return false;
50133 }
50134
50135 rtx d = dst;
50136 if (GET_MODE (dst) != dstmode)
50137 d = gen_reg_rtx (dstmode);
50138 src = gen_lowpart (srcmode, src);
50139
50140 pos /= size;
50141
50142 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50143 GEN_INT (1 << pos)));
50144 if (d != dst)
50145 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50146 return true;
50147 }
50148
50149 default:
50150 return false;
50151 }
50152 }
50153 \f
50154 /* This function returns the calling abi specific va_list type node.
50155 It returns the FNDECL specific va_list type. */
50156
50157 static tree
50158 ix86_fn_abi_va_list (tree fndecl)
50159 {
50160 if (!TARGET_64BIT)
50161 return va_list_type_node;
50162 gcc_assert (fndecl != NULL_TREE);
50163
50164 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50165 return ms_va_list_type_node;
50166 else
50167 return sysv_va_list_type_node;
50168 }
50169
50170 /* Returns the canonical va_list type specified by TYPE. If there
50171 is no valid TYPE provided, it return NULL_TREE. */
50172
50173 static tree
50174 ix86_canonical_va_list_type (tree type)
50175 {
50176 tree wtype, htype;
50177
50178 /* Resolve references and pointers to va_list type. */
50179 if (TREE_CODE (type) == MEM_REF)
50180 type = TREE_TYPE (type);
50181 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50182 type = TREE_TYPE (type);
50183 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50184 type = TREE_TYPE (type);
50185
50186 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50187 {
50188 wtype = va_list_type_node;
50189 gcc_assert (wtype != NULL_TREE);
50190 htype = type;
50191 if (TREE_CODE (wtype) == ARRAY_TYPE)
50192 {
50193 /* If va_list is an array type, the argument may have decayed
50194 to a pointer type, e.g. by being passed to another function.
50195 In that case, unwrap both types so that we can compare the
50196 underlying records. */
50197 if (TREE_CODE (htype) == ARRAY_TYPE
50198 || POINTER_TYPE_P (htype))
50199 {
50200 wtype = TREE_TYPE (wtype);
50201 htype = TREE_TYPE (htype);
50202 }
50203 }
50204 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50205 return va_list_type_node;
50206 wtype = sysv_va_list_type_node;
50207 gcc_assert (wtype != NULL_TREE);
50208 htype = type;
50209 if (TREE_CODE (wtype) == ARRAY_TYPE)
50210 {
50211 /* If va_list is an array type, the argument may have decayed
50212 to a pointer type, e.g. by being passed to another function.
50213 In that case, unwrap both types so that we can compare the
50214 underlying records. */
50215 if (TREE_CODE (htype) == ARRAY_TYPE
50216 || POINTER_TYPE_P (htype))
50217 {
50218 wtype = TREE_TYPE (wtype);
50219 htype = TREE_TYPE (htype);
50220 }
50221 }
50222 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50223 return sysv_va_list_type_node;
50224 wtype = ms_va_list_type_node;
50225 gcc_assert (wtype != NULL_TREE);
50226 htype = type;
50227 if (TREE_CODE (wtype) == ARRAY_TYPE)
50228 {
50229 /* If va_list is an array type, the argument may have decayed
50230 to a pointer type, e.g. by being passed to another function.
50231 In that case, unwrap both types so that we can compare the
50232 underlying records. */
50233 if (TREE_CODE (htype) == ARRAY_TYPE
50234 || POINTER_TYPE_P (htype))
50235 {
50236 wtype = TREE_TYPE (wtype);
50237 htype = TREE_TYPE (htype);
50238 }
50239 }
50240 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50241 return ms_va_list_type_node;
50242 return NULL_TREE;
50243 }
50244 return std_canonical_va_list_type (type);
50245 }
50246
50247 /* Iterate through the target-specific builtin types for va_list.
50248 IDX denotes the iterator, *PTREE is set to the result type of
50249 the va_list builtin, and *PNAME to its internal type.
50250 Returns zero if there is no element for this index, otherwise
50251 IDX should be increased upon the next call.
50252 Note, do not iterate a base builtin's name like __builtin_va_list.
50253 Used from c_common_nodes_and_builtins. */
50254
50255 static int
50256 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50257 {
50258 if (TARGET_64BIT)
50259 {
50260 switch (idx)
50261 {
50262 default:
50263 break;
50264
50265 case 0:
50266 *ptree = ms_va_list_type_node;
50267 *pname = "__builtin_ms_va_list";
50268 return 1;
50269
50270 case 1:
50271 *ptree = sysv_va_list_type_node;
50272 *pname = "__builtin_sysv_va_list";
50273 return 1;
50274 }
50275 }
50276
50277 return 0;
50278 }
50279
50280 #undef TARGET_SCHED_DISPATCH
50281 #define TARGET_SCHED_DISPATCH has_dispatch
50282 #undef TARGET_SCHED_DISPATCH_DO
50283 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50284 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50285 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50286 #undef TARGET_SCHED_REORDER
50287 #define TARGET_SCHED_REORDER ix86_sched_reorder
50288 #undef TARGET_SCHED_ADJUST_PRIORITY
50289 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50290 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50291 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50292 ix86_dependencies_evaluation_hook
50293
50294 /* The size of the dispatch window is the total number of bytes of
50295 object code allowed in a window. */
50296 #define DISPATCH_WINDOW_SIZE 16
50297
50298 /* Number of dispatch windows considered for scheduling. */
50299 #define MAX_DISPATCH_WINDOWS 3
50300
50301 /* Maximum number of instructions in a window. */
50302 #define MAX_INSN 4
50303
50304 /* Maximum number of immediate operands in a window. */
50305 #define MAX_IMM 4
50306
50307 /* Maximum number of immediate bits allowed in a window. */
50308 #define MAX_IMM_SIZE 128
50309
50310 /* Maximum number of 32 bit immediates allowed in a window. */
50311 #define MAX_IMM_32 4
50312
50313 /* Maximum number of 64 bit immediates allowed in a window. */
50314 #define MAX_IMM_64 2
50315
50316 /* Maximum total of loads or prefetches allowed in a window. */
50317 #define MAX_LOAD 2
50318
50319 /* Maximum total of stores allowed in a window. */
50320 #define MAX_STORE 1
50321
50322 #undef BIG
50323 #define BIG 100
50324
50325
50326 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50327 enum dispatch_group {
50328 disp_no_group = 0,
50329 disp_load,
50330 disp_store,
50331 disp_load_store,
50332 disp_prefetch,
50333 disp_imm,
50334 disp_imm_32,
50335 disp_imm_64,
50336 disp_branch,
50337 disp_cmp,
50338 disp_jcc,
50339 disp_last
50340 };
50341
50342 /* Number of allowable groups in a dispatch window. It is an array
50343 indexed by dispatch_group enum. 100 is used as a big number,
50344 because the number of these kind of operations does not have any
50345 effect in dispatch window, but we need them for other reasons in
50346 the table. */
50347 static unsigned int num_allowable_groups[disp_last] = {
50348 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50349 };
50350
50351 char group_name[disp_last + 1][16] = {
50352 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50353 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50354 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50355 };
50356
50357 /* Instruction path. */
50358 enum insn_path {
50359 no_path = 0,
50360 path_single, /* Single micro op. */
50361 path_double, /* Double micro op. */
50362 path_multi, /* Instructions with more than 2 micro op.. */
50363 last_path
50364 };
50365
50366 /* sched_insn_info defines a window to the instructions scheduled in
50367 the basic block. It contains a pointer to the insn_info table and
50368 the instruction scheduled.
50369
50370 Windows are allocated for each basic block and are linked
50371 together. */
50372 typedef struct sched_insn_info_s {
50373 rtx insn;
50374 enum dispatch_group group;
50375 enum insn_path path;
50376 int byte_len;
50377 int imm_bytes;
50378 } sched_insn_info;
50379
50380 /* Linked list of dispatch windows. This is a two way list of
50381 dispatch windows of a basic block. It contains information about
50382 the number of uops in the window and the total number of
50383 instructions and of bytes in the object code for this dispatch
50384 window. */
50385 typedef struct dispatch_windows_s {
50386 int num_insn; /* Number of insn in the window. */
50387 int num_uops; /* Number of uops in the window. */
50388 int window_size; /* Number of bytes in the window. */
50389 int window_num; /* Window number between 0 or 1. */
50390 int num_imm; /* Number of immediates in an insn. */
50391 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50392 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50393 int imm_size; /* Total immediates in the window. */
50394 int num_loads; /* Total memory loads in the window. */
50395 int num_stores; /* Total memory stores in the window. */
50396 int violation; /* Violation exists in window. */
50397 sched_insn_info *window; /* Pointer to the window. */
50398 struct dispatch_windows_s *next;
50399 struct dispatch_windows_s *prev;
50400 } dispatch_windows;
50401
50402 /* Immediate valuse used in an insn. */
50403 typedef struct imm_info_s
50404 {
50405 int imm;
50406 int imm32;
50407 int imm64;
50408 } imm_info;
50409
50410 static dispatch_windows *dispatch_window_list;
50411 static dispatch_windows *dispatch_window_list1;
50412
50413 /* Get dispatch group of insn. */
50414
50415 static enum dispatch_group
50416 get_mem_group (rtx_insn *insn)
50417 {
50418 enum attr_memory memory;
50419
50420 if (INSN_CODE (insn) < 0)
50421 return disp_no_group;
50422 memory = get_attr_memory (insn);
50423 if (memory == MEMORY_STORE)
50424 return disp_store;
50425
50426 if (memory == MEMORY_LOAD)
50427 return disp_load;
50428
50429 if (memory == MEMORY_BOTH)
50430 return disp_load_store;
50431
50432 return disp_no_group;
50433 }
50434
50435 /* Return true if insn is a compare instruction. */
50436
50437 static bool
50438 is_cmp (rtx_insn *insn)
50439 {
50440 enum attr_type type;
50441
50442 type = get_attr_type (insn);
50443 return (type == TYPE_TEST
50444 || type == TYPE_ICMP
50445 || type == TYPE_FCMP
50446 || GET_CODE (PATTERN (insn)) == COMPARE);
50447 }
50448
50449 /* Return true if a dispatch violation encountered. */
50450
50451 static bool
50452 dispatch_violation (void)
50453 {
50454 if (dispatch_window_list->next)
50455 return dispatch_window_list->next->violation;
50456 return dispatch_window_list->violation;
50457 }
50458
50459 /* Return true if insn is a branch instruction. */
50460
50461 static bool
50462 is_branch (rtx_insn *insn)
50463 {
50464 return (CALL_P (insn) || JUMP_P (insn));
50465 }
50466
50467 /* Return true if insn is a prefetch instruction. */
50468
50469 static bool
50470 is_prefetch (rtx_insn *insn)
50471 {
50472 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50473 }
50474
50475 /* This function initializes a dispatch window and the list container holding a
50476 pointer to the window. */
50477
50478 static void
50479 init_window (int window_num)
50480 {
50481 int i;
50482 dispatch_windows *new_list;
50483
50484 if (window_num == 0)
50485 new_list = dispatch_window_list;
50486 else
50487 new_list = dispatch_window_list1;
50488
50489 new_list->num_insn = 0;
50490 new_list->num_uops = 0;
50491 new_list->window_size = 0;
50492 new_list->next = NULL;
50493 new_list->prev = NULL;
50494 new_list->window_num = window_num;
50495 new_list->num_imm = 0;
50496 new_list->num_imm_32 = 0;
50497 new_list->num_imm_64 = 0;
50498 new_list->imm_size = 0;
50499 new_list->num_loads = 0;
50500 new_list->num_stores = 0;
50501 new_list->violation = false;
50502
50503 for (i = 0; i < MAX_INSN; i++)
50504 {
50505 new_list->window[i].insn = NULL;
50506 new_list->window[i].group = disp_no_group;
50507 new_list->window[i].path = no_path;
50508 new_list->window[i].byte_len = 0;
50509 new_list->window[i].imm_bytes = 0;
50510 }
50511 return;
50512 }
50513
50514 /* This function allocates and initializes a dispatch window and the
50515 list container holding a pointer to the window. */
50516
50517 static dispatch_windows *
50518 allocate_window (void)
50519 {
50520 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50521 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50522
50523 return new_list;
50524 }
50525
50526 /* This routine initializes the dispatch scheduling information. It
50527 initiates building dispatch scheduler tables and constructs the
50528 first dispatch window. */
50529
50530 static void
50531 init_dispatch_sched (void)
50532 {
50533 /* Allocate a dispatch list and a window. */
50534 dispatch_window_list = allocate_window ();
50535 dispatch_window_list1 = allocate_window ();
50536 init_window (0);
50537 init_window (1);
50538 }
50539
50540 /* This function returns true if a branch is detected. End of a basic block
50541 does not have to be a branch, but here we assume only branches end a
50542 window. */
50543
50544 static bool
50545 is_end_basic_block (enum dispatch_group group)
50546 {
50547 return group == disp_branch;
50548 }
50549
50550 /* This function is called when the end of a window processing is reached. */
50551
50552 static void
50553 process_end_window (void)
50554 {
50555 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50556 if (dispatch_window_list->next)
50557 {
50558 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50559 gcc_assert (dispatch_window_list->window_size
50560 + dispatch_window_list1->window_size <= 48);
50561 init_window (1);
50562 }
50563 init_window (0);
50564 }
50565
50566 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50567 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50568 for 48 bytes of instructions. Note that these windows are not dispatch
50569 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50570
50571 static dispatch_windows *
50572 allocate_next_window (int window_num)
50573 {
50574 if (window_num == 0)
50575 {
50576 if (dispatch_window_list->next)
50577 init_window (1);
50578 init_window (0);
50579 return dispatch_window_list;
50580 }
50581
50582 dispatch_window_list->next = dispatch_window_list1;
50583 dispatch_window_list1->prev = dispatch_window_list;
50584
50585 return dispatch_window_list1;
50586 }
50587
50588 /* Compute number of immediate operands of an instruction. */
50589
50590 static void
50591 find_constant (rtx in_rtx, imm_info *imm_values)
50592 {
50593 if (INSN_P (in_rtx))
50594 in_rtx = PATTERN (in_rtx);
50595 subrtx_iterator::array_type array;
50596 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50597 if (const_rtx x = *iter)
50598 switch (GET_CODE (x))
50599 {
50600 case CONST:
50601 case SYMBOL_REF:
50602 case CONST_INT:
50603 (imm_values->imm)++;
50604 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50605 (imm_values->imm32)++;
50606 else
50607 (imm_values->imm64)++;
50608 break;
50609
50610 case CONST_DOUBLE:
50611 case CONST_WIDE_INT:
50612 (imm_values->imm)++;
50613 (imm_values->imm64)++;
50614 break;
50615
50616 case CODE_LABEL:
50617 if (LABEL_KIND (x) == LABEL_NORMAL)
50618 {
50619 (imm_values->imm)++;
50620 (imm_values->imm32)++;
50621 }
50622 break;
50623
50624 default:
50625 break;
50626 }
50627 }
50628
50629 /* Return total size of immediate operands of an instruction along with number
50630 of corresponding immediate-operands. It initializes its parameters to zero
50631 befor calling FIND_CONSTANT.
50632 INSN is the input instruction. IMM is the total of immediates.
50633 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50634 bit immediates. */
50635
50636 static int
50637 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50638 {
50639 imm_info imm_values = {0, 0, 0};
50640
50641 find_constant (insn, &imm_values);
50642 *imm = imm_values.imm;
50643 *imm32 = imm_values.imm32;
50644 *imm64 = imm_values.imm64;
50645 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50646 }
50647
50648 /* This function indicates if an operand of an instruction is an
50649 immediate. */
50650
50651 static bool
50652 has_immediate (rtx_insn *insn)
50653 {
50654 int num_imm_operand;
50655 int num_imm32_operand;
50656 int num_imm64_operand;
50657
50658 if (insn)
50659 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50660 &num_imm64_operand);
50661 return false;
50662 }
50663
50664 /* Return single or double path for instructions. */
50665
50666 static enum insn_path
50667 get_insn_path (rtx_insn *insn)
50668 {
50669 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50670
50671 if ((int)path == 0)
50672 return path_single;
50673
50674 if ((int)path == 1)
50675 return path_double;
50676
50677 return path_multi;
50678 }
50679
50680 /* Return insn dispatch group. */
50681
50682 static enum dispatch_group
50683 get_insn_group (rtx_insn *insn)
50684 {
50685 enum dispatch_group group = get_mem_group (insn);
50686 if (group)
50687 return group;
50688
50689 if (is_branch (insn))
50690 return disp_branch;
50691
50692 if (is_cmp (insn))
50693 return disp_cmp;
50694
50695 if (has_immediate (insn))
50696 return disp_imm;
50697
50698 if (is_prefetch (insn))
50699 return disp_prefetch;
50700
50701 return disp_no_group;
50702 }
50703
50704 /* Count number of GROUP restricted instructions in a dispatch
50705 window WINDOW_LIST. */
50706
50707 static int
50708 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50709 {
50710 enum dispatch_group group = get_insn_group (insn);
50711 int imm_size;
50712 int num_imm_operand;
50713 int num_imm32_operand;
50714 int num_imm64_operand;
50715
50716 if (group == disp_no_group)
50717 return 0;
50718
50719 if (group == disp_imm)
50720 {
50721 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50722 &num_imm64_operand);
50723 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50724 || num_imm_operand + window_list->num_imm > MAX_IMM
50725 || (num_imm32_operand > 0
50726 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50727 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50728 || (num_imm64_operand > 0
50729 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50730 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50731 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50732 && num_imm64_operand > 0
50733 && ((window_list->num_imm_64 > 0
50734 && window_list->num_insn >= 2)
50735 || window_list->num_insn >= 3)))
50736 return BIG;
50737
50738 return 1;
50739 }
50740
50741 if ((group == disp_load_store
50742 && (window_list->num_loads >= MAX_LOAD
50743 || window_list->num_stores >= MAX_STORE))
50744 || ((group == disp_load
50745 || group == disp_prefetch)
50746 && window_list->num_loads >= MAX_LOAD)
50747 || (group == disp_store
50748 && window_list->num_stores >= MAX_STORE))
50749 return BIG;
50750
50751 return 1;
50752 }
50753
50754 /* This function returns true if insn satisfies dispatch rules on the
50755 last window scheduled. */
50756
50757 static bool
50758 fits_dispatch_window (rtx_insn *insn)
50759 {
50760 dispatch_windows *window_list = dispatch_window_list;
50761 dispatch_windows *window_list_next = dispatch_window_list->next;
50762 unsigned int num_restrict;
50763 enum dispatch_group group = get_insn_group (insn);
50764 enum insn_path path = get_insn_path (insn);
50765 int sum;
50766
50767 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50768 instructions should be given the lowest priority in the
50769 scheduling process in Haifa scheduler to make sure they will be
50770 scheduled in the same dispatch window as the reference to them. */
50771 if (group == disp_jcc || group == disp_cmp)
50772 return false;
50773
50774 /* Check nonrestricted. */
50775 if (group == disp_no_group || group == disp_branch)
50776 return true;
50777
50778 /* Get last dispatch window. */
50779 if (window_list_next)
50780 window_list = window_list_next;
50781
50782 if (window_list->window_num == 1)
50783 {
50784 sum = window_list->prev->window_size + window_list->window_size;
50785
50786 if (sum == 32
50787 || (min_insn_size (insn) + sum) >= 48)
50788 /* Window 1 is full. Go for next window. */
50789 return true;
50790 }
50791
50792 num_restrict = count_num_restricted (insn, window_list);
50793
50794 if (num_restrict > num_allowable_groups[group])
50795 return false;
50796
50797 /* See if it fits in the first window. */
50798 if (window_list->window_num == 0)
50799 {
50800 /* The first widow should have only single and double path
50801 uops. */
50802 if (path == path_double
50803 && (window_list->num_uops + 2) > MAX_INSN)
50804 return false;
50805 else if (path != path_single)
50806 return false;
50807 }
50808 return true;
50809 }
50810
50811 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50812 dispatch window WINDOW_LIST. */
50813
50814 static void
50815 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50816 {
50817 int byte_len = min_insn_size (insn);
50818 int num_insn = window_list->num_insn;
50819 int imm_size;
50820 sched_insn_info *window = window_list->window;
50821 enum dispatch_group group = get_insn_group (insn);
50822 enum insn_path path = get_insn_path (insn);
50823 int num_imm_operand;
50824 int num_imm32_operand;
50825 int num_imm64_operand;
50826
50827 if (!window_list->violation && group != disp_cmp
50828 && !fits_dispatch_window (insn))
50829 window_list->violation = true;
50830
50831 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50832 &num_imm64_operand);
50833
50834 /* Initialize window with new instruction. */
50835 window[num_insn].insn = insn;
50836 window[num_insn].byte_len = byte_len;
50837 window[num_insn].group = group;
50838 window[num_insn].path = path;
50839 window[num_insn].imm_bytes = imm_size;
50840
50841 window_list->window_size += byte_len;
50842 window_list->num_insn = num_insn + 1;
50843 window_list->num_uops = window_list->num_uops + num_uops;
50844 window_list->imm_size += imm_size;
50845 window_list->num_imm += num_imm_operand;
50846 window_list->num_imm_32 += num_imm32_operand;
50847 window_list->num_imm_64 += num_imm64_operand;
50848
50849 if (group == disp_store)
50850 window_list->num_stores += 1;
50851 else if (group == disp_load
50852 || group == disp_prefetch)
50853 window_list->num_loads += 1;
50854 else if (group == disp_load_store)
50855 {
50856 window_list->num_stores += 1;
50857 window_list->num_loads += 1;
50858 }
50859 }
50860
50861 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50862 If the total bytes of instructions or the number of instructions in
50863 the window exceed allowable, it allocates a new window. */
50864
50865 static void
50866 add_to_dispatch_window (rtx_insn *insn)
50867 {
50868 int byte_len;
50869 dispatch_windows *window_list;
50870 dispatch_windows *next_list;
50871 dispatch_windows *window0_list;
50872 enum insn_path path;
50873 enum dispatch_group insn_group;
50874 bool insn_fits;
50875 int num_insn;
50876 int num_uops;
50877 int window_num;
50878 int insn_num_uops;
50879 int sum;
50880
50881 if (INSN_CODE (insn) < 0)
50882 return;
50883
50884 byte_len = min_insn_size (insn);
50885 window_list = dispatch_window_list;
50886 next_list = window_list->next;
50887 path = get_insn_path (insn);
50888 insn_group = get_insn_group (insn);
50889
50890 /* Get the last dispatch window. */
50891 if (next_list)
50892 window_list = dispatch_window_list->next;
50893
50894 if (path == path_single)
50895 insn_num_uops = 1;
50896 else if (path == path_double)
50897 insn_num_uops = 2;
50898 else
50899 insn_num_uops = (int) path;
50900
50901 /* If current window is full, get a new window.
50902 Window number zero is full, if MAX_INSN uops are scheduled in it.
50903 Window number one is full, if window zero's bytes plus window
50904 one's bytes is 32, or if the bytes of the new instruction added
50905 to the total makes it greater than 48, or it has already MAX_INSN
50906 instructions in it. */
50907 num_insn = window_list->num_insn;
50908 num_uops = window_list->num_uops;
50909 window_num = window_list->window_num;
50910 insn_fits = fits_dispatch_window (insn);
50911
50912 if (num_insn >= MAX_INSN
50913 || num_uops + insn_num_uops > MAX_INSN
50914 || !(insn_fits))
50915 {
50916 window_num = ~window_num & 1;
50917 window_list = allocate_next_window (window_num);
50918 }
50919
50920 if (window_num == 0)
50921 {
50922 add_insn_window (insn, window_list, insn_num_uops);
50923 if (window_list->num_insn >= MAX_INSN
50924 && insn_group == disp_branch)
50925 {
50926 process_end_window ();
50927 return;
50928 }
50929 }
50930 else if (window_num == 1)
50931 {
50932 window0_list = window_list->prev;
50933 sum = window0_list->window_size + window_list->window_size;
50934 if (sum == 32
50935 || (byte_len + sum) >= 48)
50936 {
50937 process_end_window ();
50938 window_list = dispatch_window_list;
50939 }
50940
50941 add_insn_window (insn, window_list, insn_num_uops);
50942 }
50943 else
50944 gcc_unreachable ();
50945
50946 if (is_end_basic_block (insn_group))
50947 {
50948 /* End of basic block is reached do end-basic-block process. */
50949 process_end_window ();
50950 return;
50951 }
50952 }
50953
50954 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50955
50956 DEBUG_FUNCTION static void
50957 debug_dispatch_window_file (FILE *file, int window_num)
50958 {
50959 dispatch_windows *list;
50960 int i;
50961
50962 if (window_num == 0)
50963 list = dispatch_window_list;
50964 else
50965 list = dispatch_window_list1;
50966
50967 fprintf (file, "Window #%d:\n", list->window_num);
50968 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50969 list->num_insn, list->num_uops, list->window_size);
50970 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50971 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50972
50973 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50974 list->num_stores);
50975 fprintf (file, " insn info:\n");
50976
50977 for (i = 0; i < MAX_INSN; i++)
50978 {
50979 if (!list->window[i].insn)
50980 break;
50981 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50982 i, group_name[list->window[i].group],
50983 i, (void *)list->window[i].insn,
50984 i, list->window[i].path,
50985 i, list->window[i].byte_len,
50986 i, list->window[i].imm_bytes);
50987 }
50988 }
50989
50990 /* Print to stdout a dispatch window. */
50991
50992 DEBUG_FUNCTION void
50993 debug_dispatch_window (int window_num)
50994 {
50995 debug_dispatch_window_file (stdout, window_num);
50996 }
50997
50998 /* Print INSN dispatch information to FILE. */
50999
51000 DEBUG_FUNCTION static void
51001 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51002 {
51003 int byte_len;
51004 enum insn_path path;
51005 enum dispatch_group group;
51006 int imm_size;
51007 int num_imm_operand;
51008 int num_imm32_operand;
51009 int num_imm64_operand;
51010
51011 if (INSN_CODE (insn) < 0)
51012 return;
51013
51014 byte_len = min_insn_size (insn);
51015 path = get_insn_path (insn);
51016 group = get_insn_group (insn);
51017 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51018 &num_imm64_operand);
51019
51020 fprintf (file, " insn info:\n");
51021 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51022 group_name[group], path, byte_len);
51023 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51024 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51025 }
51026
51027 /* Print to STDERR the status of the ready list with respect to
51028 dispatch windows. */
51029
51030 DEBUG_FUNCTION void
51031 debug_ready_dispatch (void)
51032 {
51033 int i;
51034 int no_ready = number_in_ready ();
51035
51036 fprintf (stdout, "Number of ready: %d\n", no_ready);
51037
51038 for (i = 0; i < no_ready; i++)
51039 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51040 }
51041
51042 /* This routine is the driver of the dispatch scheduler. */
51043
51044 static void
51045 do_dispatch (rtx_insn *insn, int mode)
51046 {
51047 if (mode == DISPATCH_INIT)
51048 init_dispatch_sched ();
51049 else if (mode == ADD_TO_DISPATCH_WINDOW)
51050 add_to_dispatch_window (insn);
51051 }
51052
51053 /* Return TRUE if Dispatch Scheduling is supported. */
51054
51055 static bool
51056 has_dispatch (rtx_insn *insn, int action)
51057 {
51058 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51059 && flag_dispatch_scheduler)
51060 switch (action)
51061 {
51062 default:
51063 return false;
51064
51065 case IS_DISPATCH_ON:
51066 return true;
51067 break;
51068
51069 case IS_CMP:
51070 return is_cmp (insn);
51071
51072 case DISPATCH_VIOLATION:
51073 return dispatch_violation ();
51074
51075 case FITS_DISPATCH_WINDOW:
51076 return fits_dispatch_window (insn);
51077 }
51078
51079 return false;
51080 }
51081
51082 /* Implementation of reassociation_width target hook used by
51083 reassoc phase to identify parallelism level in reassociated
51084 tree. Statements tree_code is passed in OPC. Arguments type
51085 is passed in MODE.
51086
51087 Currently parallel reassociation is enabled for Atom
51088 processors only and we set reassociation width to be 2
51089 because Atom may issue up to 2 instructions per cycle.
51090
51091 Return value should be fixed if parallel reassociation is
51092 enabled for other processors. */
51093
51094 static int
51095 ix86_reassociation_width (unsigned int, machine_mode mode)
51096 {
51097 /* Vector part. */
51098 if (VECTOR_MODE_P (mode))
51099 {
51100 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51101 return 2;
51102 else
51103 return 1;
51104 }
51105
51106 /* Scalar part. */
51107 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51108 return 2;
51109 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51110 return 2;
51111 else
51112 return 1;
51113 }
51114
51115 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51116 place emms and femms instructions. */
51117
51118 static machine_mode
51119 ix86_preferred_simd_mode (machine_mode mode)
51120 {
51121 if (!TARGET_SSE)
51122 return word_mode;
51123
51124 switch (mode)
51125 {
51126 case QImode:
51127 return TARGET_AVX512BW ? V64QImode :
51128 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51129 case HImode:
51130 return TARGET_AVX512BW ? V32HImode :
51131 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51132 case SImode:
51133 return TARGET_AVX512F ? V16SImode :
51134 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51135 case DImode:
51136 return TARGET_AVX512F ? V8DImode :
51137 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51138
51139 case SFmode:
51140 if (TARGET_AVX512F)
51141 return V16SFmode;
51142 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51143 return V8SFmode;
51144 else
51145 return V4SFmode;
51146
51147 case DFmode:
51148 if (!TARGET_VECTORIZE_DOUBLE)
51149 return word_mode;
51150 else if (TARGET_AVX512F)
51151 return V8DFmode;
51152 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51153 return V4DFmode;
51154 else if (TARGET_SSE2)
51155 return V2DFmode;
51156 /* FALLTHRU */
51157
51158 default:
51159 return word_mode;
51160 }
51161 }
51162
51163 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51164 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51165 256bit and 128bit vectors. */
51166
51167 static unsigned int
51168 ix86_autovectorize_vector_sizes (void)
51169 {
51170 return TARGET_AVX512F ? 64 | 32 | 16 :
51171 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51172 }
51173
51174 \f
51175
51176 /* Return class of registers which could be used for pseudo of MODE
51177 and of class RCLASS for spilling instead of memory. Return NO_REGS
51178 if it is not possible or non-profitable. */
51179 static reg_class_t
51180 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51181 {
51182 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51183 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51184 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51185 return ALL_SSE_REGS;
51186 return NO_REGS;
51187 }
51188
51189 /* Implement targetm.vectorize.init_cost. */
51190
51191 static void *
51192 ix86_init_cost (struct loop *)
51193 {
51194 unsigned *cost = XNEWVEC (unsigned, 3);
51195 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51196 return cost;
51197 }
51198
51199 /* Implement targetm.vectorize.add_stmt_cost. */
51200
51201 static unsigned
51202 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51203 struct _stmt_vec_info *stmt_info, int misalign,
51204 enum vect_cost_model_location where)
51205 {
51206 unsigned *cost = (unsigned *) data;
51207 unsigned retval = 0;
51208
51209 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51210 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51211
51212 /* Statements in an inner loop relative to the loop being
51213 vectorized are weighted more heavily. The value here is
51214 arbitrary and could potentially be improved with analysis. */
51215 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51216 count *= 50; /* FIXME. */
51217
51218 retval = (unsigned) (count * stmt_cost);
51219
51220 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51221 for Silvermont as it has out of order integer pipeline and can execute
51222 2 scalar instruction per tick, but has in order SIMD pipeline. */
51223 if (TARGET_SILVERMONT || TARGET_INTEL)
51224 if (stmt_info && stmt_info->stmt)
51225 {
51226 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51227 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51228 retval = (retval * 17) / 10;
51229 }
51230
51231 cost[where] += retval;
51232
51233 return retval;
51234 }
51235
51236 /* Implement targetm.vectorize.finish_cost. */
51237
51238 static void
51239 ix86_finish_cost (void *data, unsigned *prologue_cost,
51240 unsigned *body_cost, unsigned *epilogue_cost)
51241 {
51242 unsigned *cost = (unsigned *) data;
51243 *prologue_cost = cost[vect_prologue];
51244 *body_cost = cost[vect_body];
51245 *epilogue_cost = cost[vect_epilogue];
51246 }
51247
51248 /* Implement targetm.vectorize.destroy_cost_data. */
51249
51250 static void
51251 ix86_destroy_cost_data (void *data)
51252 {
51253 free (data);
51254 }
51255
51256 /* Validate target specific memory model bits in VAL. */
51257
51258 static unsigned HOST_WIDE_INT
51259 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51260 {
51261 enum memmodel model = memmodel_from_int (val);
51262 bool strong;
51263
51264 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51265 |MEMMODEL_MASK)
51266 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51267 {
51268 warning (OPT_Winvalid_memory_model,
51269 "Unknown architecture specific memory model");
51270 return MEMMODEL_SEQ_CST;
51271 }
51272 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51273 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51274 {
51275 warning (OPT_Winvalid_memory_model,
51276 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51277 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51278 }
51279 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51280 {
51281 warning (OPT_Winvalid_memory_model,
51282 "HLE_RELEASE not used with RELEASE or stronger memory model");
51283 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51284 }
51285 return val;
51286 }
51287
51288 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51289 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51290 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51291 or number of vecsize_mangle variants that should be emitted. */
51292
51293 static int
51294 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51295 struct cgraph_simd_clone *clonei,
51296 tree base_type, int num)
51297 {
51298 int ret = 1;
51299
51300 if (clonei->simdlen
51301 && (clonei->simdlen < 2
51302 || clonei->simdlen > 16
51303 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51304 {
51305 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51306 "unsupported simdlen %d", clonei->simdlen);
51307 return 0;
51308 }
51309
51310 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51311 if (TREE_CODE (ret_type) != VOID_TYPE)
51312 switch (TYPE_MODE (ret_type))
51313 {
51314 case QImode:
51315 case HImode:
51316 case SImode:
51317 case DImode:
51318 case SFmode:
51319 case DFmode:
51320 /* case SCmode: */
51321 /* case DCmode: */
51322 break;
51323 default:
51324 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51325 "unsupported return type %qT for simd\n", ret_type);
51326 return 0;
51327 }
51328
51329 tree t;
51330 int i;
51331
51332 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51333 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51334 switch (TYPE_MODE (TREE_TYPE (t)))
51335 {
51336 case QImode:
51337 case HImode:
51338 case SImode:
51339 case DImode:
51340 case SFmode:
51341 case DFmode:
51342 /* case SCmode: */
51343 /* case DCmode: */
51344 break;
51345 default:
51346 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51347 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51348 return 0;
51349 }
51350
51351 if (clonei->cilk_elemental)
51352 {
51353 /* Parse here processor clause. If not present, default to 'b'. */
51354 clonei->vecsize_mangle = 'b';
51355 }
51356 else if (!TREE_PUBLIC (node->decl))
51357 {
51358 /* If the function isn't exported, we can pick up just one ISA
51359 for the clones. */
51360 if (TARGET_AVX2)
51361 clonei->vecsize_mangle = 'd';
51362 else if (TARGET_AVX)
51363 clonei->vecsize_mangle = 'c';
51364 else
51365 clonei->vecsize_mangle = 'b';
51366 ret = 1;
51367 }
51368 else
51369 {
51370 clonei->vecsize_mangle = "bcd"[num];
51371 ret = 3;
51372 }
51373 switch (clonei->vecsize_mangle)
51374 {
51375 case 'b':
51376 clonei->vecsize_int = 128;
51377 clonei->vecsize_float = 128;
51378 break;
51379 case 'c':
51380 clonei->vecsize_int = 128;
51381 clonei->vecsize_float = 256;
51382 break;
51383 case 'd':
51384 clonei->vecsize_int = 256;
51385 clonei->vecsize_float = 256;
51386 break;
51387 }
51388 if (clonei->simdlen == 0)
51389 {
51390 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51391 clonei->simdlen = clonei->vecsize_int;
51392 else
51393 clonei->simdlen = clonei->vecsize_float;
51394 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51395 if (clonei->simdlen > 16)
51396 clonei->simdlen = 16;
51397 }
51398 return ret;
51399 }
51400
51401 /* Add target attribute to SIMD clone NODE if needed. */
51402
51403 static void
51404 ix86_simd_clone_adjust (struct cgraph_node *node)
51405 {
51406 const char *str = NULL;
51407 gcc_assert (node->decl == cfun->decl);
51408 switch (node->simdclone->vecsize_mangle)
51409 {
51410 case 'b':
51411 if (!TARGET_SSE2)
51412 str = "sse2";
51413 break;
51414 case 'c':
51415 if (!TARGET_AVX)
51416 str = "avx";
51417 break;
51418 case 'd':
51419 if (!TARGET_AVX2)
51420 str = "avx2";
51421 break;
51422 default:
51423 gcc_unreachable ();
51424 }
51425 if (str == NULL)
51426 return;
51427 push_cfun (NULL);
51428 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51429 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51430 gcc_assert (ok);
51431 pop_cfun ();
51432 ix86_reset_previous_fndecl ();
51433 ix86_set_current_function (node->decl);
51434 }
51435
51436 /* If SIMD clone NODE can't be used in a vectorized loop
51437 in current function, return -1, otherwise return a badness of using it
51438 (0 if it is most desirable from vecsize_mangle point of view, 1
51439 slightly less desirable, etc.). */
51440
51441 static int
51442 ix86_simd_clone_usable (struct cgraph_node *node)
51443 {
51444 switch (node->simdclone->vecsize_mangle)
51445 {
51446 case 'b':
51447 if (!TARGET_SSE2)
51448 return -1;
51449 if (!TARGET_AVX)
51450 return 0;
51451 return TARGET_AVX2 ? 2 : 1;
51452 case 'c':
51453 if (!TARGET_AVX)
51454 return -1;
51455 return TARGET_AVX2 ? 1 : 0;
51456 break;
51457 case 'd':
51458 if (!TARGET_AVX2)
51459 return -1;
51460 return 0;
51461 default:
51462 gcc_unreachable ();
51463 }
51464 }
51465
51466 /* This function adjusts the unroll factor based on
51467 the hardware capabilities. For ex, bdver3 has
51468 a loop buffer which makes unrolling of smaller
51469 loops less important. This function decides the
51470 unroll factor using number of memory references
51471 (value 32 is used) as a heuristic. */
51472
51473 static unsigned
51474 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51475 {
51476 basic_block *bbs;
51477 rtx_insn *insn;
51478 unsigned i;
51479 unsigned mem_count = 0;
51480
51481 if (!TARGET_ADJUST_UNROLL)
51482 return nunroll;
51483
51484 /* Count the number of memory references within the loop body.
51485 This value determines the unrolling factor for bdver3 and bdver4
51486 architectures. */
51487 subrtx_iterator::array_type array;
51488 bbs = get_loop_body (loop);
51489 for (i = 0; i < loop->num_nodes; i++)
51490 FOR_BB_INSNS (bbs[i], insn)
51491 if (NONDEBUG_INSN_P (insn))
51492 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51493 if (const_rtx x = *iter)
51494 if (MEM_P (x))
51495 {
51496 machine_mode mode = GET_MODE (x);
51497 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51498 if (n_words > 4)
51499 mem_count += 2;
51500 else
51501 mem_count += 1;
51502 }
51503 free (bbs);
51504
51505 if (mem_count && mem_count <=32)
51506 return 32/mem_count;
51507
51508 return nunroll;
51509 }
51510
51511
51512 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51513
51514 static bool
51515 ix86_float_exceptions_rounding_supported_p (void)
51516 {
51517 /* For x87 floating point with standard excess precision handling,
51518 there is no adddf3 pattern (since x87 floating point only has
51519 XFmode operations) so the default hook implementation gets this
51520 wrong. */
51521 return TARGET_80387 || TARGET_SSE_MATH;
51522 }
51523
51524 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51525
51526 static void
51527 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51528 {
51529 if (!TARGET_80387 && !TARGET_SSE_MATH)
51530 return;
51531 tree exceptions_var = create_tmp_var (integer_type_node);
51532 if (TARGET_80387)
51533 {
51534 tree fenv_index_type = build_index_type (size_int (6));
51535 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51536 tree fenv_var = create_tmp_var (fenv_type);
51537 mark_addressable (fenv_var);
51538 tree fenv_ptr = build_pointer_type (fenv_type);
51539 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51540 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51541 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51542 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51543 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51544 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51545 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51546 tree hold_fnclex = build_call_expr (fnclex, 0);
51547 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51548 hold_fnclex);
51549 *clear = build_call_expr (fnclex, 0);
51550 tree sw_var = create_tmp_var (short_unsigned_type_node);
51551 tree fnstsw_call = build_call_expr (fnstsw, 0);
51552 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51553 sw_var, fnstsw_call);
51554 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51555 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51556 exceptions_var, exceptions_x87);
51557 *update = build2 (COMPOUND_EXPR, integer_type_node,
51558 sw_mod, update_mod);
51559 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51560 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51561 }
51562 if (TARGET_SSE_MATH)
51563 {
51564 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51565 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51566 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51567 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51568 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51569 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51570 mxcsr_orig_var, stmxcsr_hold_call);
51571 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51572 mxcsr_orig_var,
51573 build_int_cst (unsigned_type_node, 0x1f80));
51574 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51575 build_int_cst (unsigned_type_node, 0xffffffc0));
51576 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51577 mxcsr_mod_var, hold_mod_val);
51578 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51579 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51580 hold_assign_orig, hold_assign_mod);
51581 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51582 ldmxcsr_hold_call);
51583 if (*hold)
51584 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51585 else
51586 *hold = hold_all;
51587 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51588 if (*clear)
51589 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51590 ldmxcsr_clear_call);
51591 else
51592 *clear = ldmxcsr_clear_call;
51593 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51594 tree exceptions_sse = fold_convert (integer_type_node,
51595 stxmcsr_update_call);
51596 if (*update)
51597 {
51598 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51599 exceptions_var, exceptions_sse);
51600 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51601 exceptions_var, exceptions_mod);
51602 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51603 exceptions_assign);
51604 }
51605 else
51606 *update = build2 (MODIFY_EXPR, integer_type_node,
51607 exceptions_var, exceptions_sse);
51608 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51609 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51610 ldmxcsr_update_call);
51611 }
51612 tree atomic_feraiseexcept
51613 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51614 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51615 1, exceptions_var);
51616 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51617 atomic_feraiseexcept_call);
51618 }
51619
51620 /* Return mode to be used for bounds or VOIDmode
51621 if bounds are not supported. */
51622
51623 static enum machine_mode
51624 ix86_mpx_bound_mode ()
51625 {
51626 /* Do not support pointer checker if MPX
51627 is not enabled. */
51628 if (!TARGET_MPX)
51629 {
51630 if (flag_check_pointer_bounds)
51631 warning (0, "Pointer Checker requires MPX support on this target."
51632 " Use -mmpx options to enable MPX.");
51633 return VOIDmode;
51634 }
51635
51636 return BNDmode;
51637 }
51638
51639 /* Return constant used to statically initialize constant bounds.
51640
51641 This function is used to create special bound values. For now
51642 only INIT bounds and NONE bounds are expected. More special
51643 values may be added later. */
51644
51645 static tree
51646 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51647 {
51648 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51649 : build_zero_cst (pointer_sized_int_node);
51650 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51651 : build_minus_one_cst (pointer_sized_int_node);
51652
51653 /* This function is supposed to be used to create INIT and
51654 NONE bounds only. */
51655 gcc_assert ((lb == 0 && ub == -1)
51656 || (lb == -1 && ub == 0));
51657
51658 return build_complex (NULL, low, high);
51659 }
51660
51661 /* Generate a list of statements STMTS to initialize pointer bounds
51662 variable VAR with bounds LB and UB. Return the number of generated
51663 statements. */
51664
51665 static int
51666 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51667 {
51668 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51669 tree lhs, modify, var_p;
51670
51671 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51672 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51673
51674 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51675 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51676 append_to_statement_list (modify, stmts);
51677
51678 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51679 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51680 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51681 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51682 append_to_statement_list (modify, stmts);
51683
51684 return 2;
51685 }
51686
51687 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51688 /* For i386, common symbol is local only for non-PIE binaries. For
51689 x86-64, common symbol is local only for non-PIE binaries or linker
51690 supports copy reloc in PIE binaries. */
51691
51692 static bool
51693 ix86_binds_local_p (const_tree exp)
51694 {
51695 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51696 (!flag_pic
51697 || (TARGET_64BIT
51698 && HAVE_LD_PIE_COPYRELOC != 0)));
51699 }
51700 #endif
51701
51702 /* If MEM is in the form of [base+offset], extract the two parts
51703 of address and set to BASE and OFFSET, otherwise return false. */
51704
51705 static bool
51706 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51707 {
51708 rtx addr;
51709
51710 gcc_assert (MEM_P (mem));
51711
51712 addr = XEXP (mem, 0);
51713
51714 if (GET_CODE (addr) == CONST)
51715 addr = XEXP (addr, 0);
51716
51717 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51718 {
51719 *base = addr;
51720 *offset = const0_rtx;
51721 return true;
51722 }
51723
51724 if (GET_CODE (addr) == PLUS
51725 && (REG_P (XEXP (addr, 0))
51726 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51727 && CONST_INT_P (XEXP (addr, 1)))
51728 {
51729 *base = XEXP (addr, 0);
51730 *offset = XEXP (addr, 1);
51731 return true;
51732 }
51733
51734 return false;
51735 }
51736
51737 /* Given OPERANDS of consecutive load/store, check if we can merge
51738 them into move multiple. LOAD is true if they are load instructions.
51739 MODE is the mode of memory operands. */
51740
51741 bool
51742 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51743 enum machine_mode mode)
51744 {
51745 HOST_WIDE_INT offval_1, offval_2, msize;
51746 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51747
51748 if (load)
51749 {
51750 mem_1 = operands[1];
51751 mem_2 = operands[3];
51752 reg_1 = operands[0];
51753 reg_2 = operands[2];
51754 }
51755 else
51756 {
51757 mem_1 = operands[0];
51758 mem_2 = operands[2];
51759 reg_1 = operands[1];
51760 reg_2 = operands[3];
51761 }
51762
51763 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51764
51765 if (REGNO (reg_1) != REGNO (reg_2))
51766 return false;
51767
51768 /* Check if the addresses are in the form of [base+offset]. */
51769 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51770 return false;
51771 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51772 return false;
51773
51774 /* Check if the bases are the same. */
51775 if (!rtx_equal_p (base_1, base_2))
51776 return false;
51777
51778 offval_1 = INTVAL (offset_1);
51779 offval_2 = INTVAL (offset_2);
51780 msize = GET_MODE_SIZE (mode);
51781 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51782 if (offval_1 + msize != offval_2)
51783 return false;
51784
51785 return true;
51786 }
51787
51788 /* Initialize the GCC target structure. */
51789 #undef TARGET_RETURN_IN_MEMORY
51790 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51791
51792 #undef TARGET_LEGITIMIZE_ADDRESS
51793 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51794
51795 #undef TARGET_ATTRIBUTE_TABLE
51796 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51797 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51798 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51799 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51800 # undef TARGET_MERGE_DECL_ATTRIBUTES
51801 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51802 #endif
51803
51804 #undef TARGET_COMP_TYPE_ATTRIBUTES
51805 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51806
51807 #undef TARGET_INIT_BUILTINS
51808 #define TARGET_INIT_BUILTINS ix86_init_builtins
51809 #undef TARGET_BUILTIN_DECL
51810 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51811 #undef TARGET_EXPAND_BUILTIN
51812 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51813
51814 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51815 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51816 ix86_builtin_vectorized_function
51817
51818 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51819 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51820
51821 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51822 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51823
51824 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51825 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51826
51827 #undef TARGET_BUILTIN_RECIPROCAL
51828 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51829
51830 #undef TARGET_ASM_FUNCTION_EPILOGUE
51831 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51832
51833 #undef TARGET_ENCODE_SECTION_INFO
51834 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51835 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51836 #else
51837 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51838 #endif
51839
51840 #undef TARGET_ASM_OPEN_PAREN
51841 #define TARGET_ASM_OPEN_PAREN ""
51842 #undef TARGET_ASM_CLOSE_PAREN
51843 #define TARGET_ASM_CLOSE_PAREN ""
51844
51845 #undef TARGET_ASM_BYTE_OP
51846 #define TARGET_ASM_BYTE_OP ASM_BYTE
51847
51848 #undef TARGET_ASM_ALIGNED_HI_OP
51849 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51850 #undef TARGET_ASM_ALIGNED_SI_OP
51851 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51852 #ifdef ASM_QUAD
51853 #undef TARGET_ASM_ALIGNED_DI_OP
51854 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51855 #endif
51856
51857 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51858 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51859
51860 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51861 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51862
51863 #undef TARGET_ASM_UNALIGNED_HI_OP
51864 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51865 #undef TARGET_ASM_UNALIGNED_SI_OP
51866 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51867 #undef TARGET_ASM_UNALIGNED_DI_OP
51868 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51869
51870 #undef TARGET_PRINT_OPERAND
51871 #define TARGET_PRINT_OPERAND ix86_print_operand
51872 #undef TARGET_PRINT_OPERAND_ADDRESS
51873 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51874 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51875 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51876 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51877 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51878
51879 #undef TARGET_SCHED_INIT_GLOBAL
51880 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51881 #undef TARGET_SCHED_ADJUST_COST
51882 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51883 #undef TARGET_SCHED_ISSUE_RATE
51884 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51885 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51886 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51887 ia32_multipass_dfa_lookahead
51888 #undef TARGET_SCHED_MACRO_FUSION_P
51889 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51890 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51891 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51892
51893 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51894 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51895
51896 #undef TARGET_MEMMODEL_CHECK
51897 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51898
51899 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51900 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51901
51902 #ifdef HAVE_AS_TLS
51903 #undef TARGET_HAVE_TLS
51904 #define TARGET_HAVE_TLS true
51905 #endif
51906 #undef TARGET_CANNOT_FORCE_CONST_MEM
51907 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51908 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51909 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51910
51911 #undef TARGET_DELEGITIMIZE_ADDRESS
51912 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51913
51914 #undef TARGET_MS_BITFIELD_LAYOUT_P
51915 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51916
51917 #if TARGET_MACHO
51918 #undef TARGET_BINDS_LOCAL_P
51919 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51920 #else
51921 #undef TARGET_BINDS_LOCAL_P
51922 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51923 #endif
51924 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51925 #undef TARGET_BINDS_LOCAL_P
51926 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51927 #endif
51928
51929 #undef TARGET_ASM_OUTPUT_MI_THUNK
51930 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51931 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51932 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51933
51934 #undef TARGET_ASM_FILE_START
51935 #define TARGET_ASM_FILE_START x86_file_start
51936
51937 #undef TARGET_OPTION_OVERRIDE
51938 #define TARGET_OPTION_OVERRIDE ix86_option_override
51939
51940 #undef TARGET_REGISTER_MOVE_COST
51941 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51942 #undef TARGET_MEMORY_MOVE_COST
51943 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51944 #undef TARGET_RTX_COSTS
51945 #define TARGET_RTX_COSTS ix86_rtx_costs
51946 #undef TARGET_ADDRESS_COST
51947 #define TARGET_ADDRESS_COST ix86_address_cost
51948
51949 #undef TARGET_FIXED_CONDITION_CODE_REGS
51950 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51951 #undef TARGET_CC_MODES_COMPATIBLE
51952 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51953
51954 #undef TARGET_MACHINE_DEPENDENT_REORG
51955 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51956
51957 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51958 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51959
51960 #undef TARGET_BUILD_BUILTIN_VA_LIST
51961 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51962
51963 #undef TARGET_FOLD_BUILTIN
51964 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51965
51966 #undef TARGET_COMPARE_VERSION_PRIORITY
51967 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51968
51969 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51970 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51971 ix86_generate_version_dispatcher_body
51972
51973 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51974 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51975 ix86_get_function_versions_dispatcher
51976
51977 #undef TARGET_ENUM_VA_LIST_P
51978 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51979
51980 #undef TARGET_FN_ABI_VA_LIST
51981 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51982
51983 #undef TARGET_CANONICAL_VA_LIST_TYPE
51984 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51985
51986 #undef TARGET_EXPAND_BUILTIN_VA_START
51987 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51988
51989 #undef TARGET_MD_ASM_ADJUST
51990 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
51991
51992 #undef TARGET_PROMOTE_PROTOTYPES
51993 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51994 #undef TARGET_SETUP_INCOMING_VARARGS
51995 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51996 #undef TARGET_MUST_PASS_IN_STACK
51997 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51998 #undef TARGET_FUNCTION_ARG_ADVANCE
51999 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52000 #undef TARGET_FUNCTION_ARG
52001 #define TARGET_FUNCTION_ARG ix86_function_arg
52002 #undef TARGET_INIT_PIC_REG
52003 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52004 #undef TARGET_USE_PSEUDO_PIC_REG
52005 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52006 #undef TARGET_FUNCTION_ARG_BOUNDARY
52007 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52008 #undef TARGET_PASS_BY_REFERENCE
52009 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52010 #undef TARGET_INTERNAL_ARG_POINTER
52011 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52012 #undef TARGET_UPDATE_STACK_BOUNDARY
52013 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52014 #undef TARGET_GET_DRAP_RTX
52015 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52016 #undef TARGET_STRICT_ARGUMENT_NAMING
52017 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52018 #undef TARGET_STATIC_CHAIN
52019 #define TARGET_STATIC_CHAIN ix86_static_chain
52020 #undef TARGET_TRAMPOLINE_INIT
52021 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52022 #undef TARGET_RETURN_POPS_ARGS
52023 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52024
52025 #undef TARGET_LEGITIMATE_COMBINED_INSN
52026 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52027
52028 #undef TARGET_ASAN_SHADOW_OFFSET
52029 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52030
52031 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52032 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52033
52034 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52035 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52036
52037 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52038 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52039
52040 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52041 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52042 ix86_libgcc_floating_mode_supported_p
52043
52044 #undef TARGET_C_MODE_FOR_SUFFIX
52045 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52046
52047 #ifdef HAVE_AS_TLS
52048 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52049 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52050 #endif
52051
52052 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52053 #undef TARGET_INSERT_ATTRIBUTES
52054 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52055 #endif
52056
52057 #undef TARGET_MANGLE_TYPE
52058 #define TARGET_MANGLE_TYPE ix86_mangle_type
52059
52060 #if !TARGET_MACHO
52061 #undef TARGET_STACK_PROTECT_FAIL
52062 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52063 #endif
52064
52065 #undef TARGET_FUNCTION_VALUE
52066 #define TARGET_FUNCTION_VALUE ix86_function_value
52067
52068 #undef TARGET_FUNCTION_VALUE_REGNO_P
52069 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52070
52071 #undef TARGET_PROMOTE_FUNCTION_MODE
52072 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52073
52074 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52075 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52076
52077 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52078 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52079
52080 #undef TARGET_INSTANTIATE_DECLS
52081 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52082
52083 #undef TARGET_SECONDARY_RELOAD
52084 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52085
52086 #undef TARGET_CLASS_MAX_NREGS
52087 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52088
52089 #undef TARGET_PREFERRED_RELOAD_CLASS
52090 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52091 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52092 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52093 #undef TARGET_CLASS_LIKELY_SPILLED_P
52094 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52095
52096 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52097 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52098 ix86_builtin_vectorization_cost
52099 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52100 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52101 ix86_vectorize_vec_perm_const_ok
52102 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52103 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52104 ix86_preferred_simd_mode
52105 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52106 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52107 ix86_autovectorize_vector_sizes
52108 #undef TARGET_VECTORIZE_INIT_COST
52109 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52110 #undef TARGET_VECTORIZE_ADD_STMT_COST
52111 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52112 #undef TARGET_VECTORIZE_FINISH_COST
52113 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52114 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52115 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52116
52117 #undef TARGET_SET_CURRENT_FUNCTION
52118 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52119
52120 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52121 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52122
52123 #undef TARGET_OPTION_SAVE
52124 #define TARGET_OPTION_SAVE ix86_function_specific_save
52125
52126 #undef TARGET_OPTION_RESTORE
52127 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52128
52129 #undef TARGET_OPTION_POST_STREAM_IN
52130 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52131
52132 #undef TARGET_OPTION_PRINT
52133 #define TARGET_OPTION_PRINT ix86_function_specific_print
52134
52135 #undef TARGET_OPTION_FUNCTION_VERSIONS
52136 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52137
52138 #undef TARGET_CAN_INLINE_P
52139 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52140
52141 #undef TARGET_EXPAND_TO_RTL_HOOK
52142 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52143
52144 #undef TARGET_LEGITIMATE_ADDRESS_P
52145 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52146
52147 #undef TARGET_LRA_P
52148 #define TARGET_LRA_P hook_bool_void_true
52149
52150 #undef TARGET_REGISTER_PRIORITY
52151 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52152
52153 #undef TARGET_REGISTER_USAGE_LEVELING_P
52154 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52155
52156 #undef TARGET_LEGITIMATE_CONSTANT_P
52157 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52158
52159 #undef TARGET_FRAME_POINTER_REQUIRED
52160 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52161
52162 #undef TARGET_CAN_ELIMINATE
52163 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52164
52165 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52166 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52167
52168 #undef TARGET_ASM_CODE_END
52169 #define TARGET_ASM_CODE_END ix86_code_end
52170
52171 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52172 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52173
52174 #if TARGET_MACHO
52175 #undef TARGET_INIT_LIBFUNCS
52176 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52177 #endif
52178
52179 #undef TARGET_LOOP_UNROLL_ADJUST
52180 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52181
52182 #undef TARGET_SPILL_CLASS
52183 #define TARGET_SPILL_CLASS ix86_spill_class
52184
52185 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52186 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52187 ix86_simd_clone_compute_vecsize_and_simdlen
52188
52189 #undef TARGET_SIMD_CLONE_ADJUST
52190 #define TARGET_SIMD_CLONE_ADJUST \
52191 ix86_simd_clone_adjust
52192
52193 #undef TARGET_SIMD_CLONE_USABLE
52194 #define TARGET_SIMD_CLONE_USABLE \
52195 ix86_simd_clone_usable
52196
52197 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52198 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52199 ix86_float_exceptions_rounding_supported_p
52200
52201 #undef TARGET_MODE_EMIT
52202 #define TARGET_MODE_EMIT ix86_emit_mode_set
52203
52204 #undef TARGET_MODE_NEEDED
52205 #define TARGET_MODE_NEEDED ix86_mode_needed
52206
52207 #undef TARGET_MODE_AFTER
52208 #define TARGET_MODE_AFTER ix86_mode_after
52209
52210 #undef TARGET_MODE_ENTRY
52211 #define TARGET_MODE_ENTRY ix86_mode_entry
52212
52213 #undef TARGET_MODE_EXIT
52214 #define TARGET_MODE_EXIT ix86_mode_exit
52215
52216 #undef TARGET_MODE_PRIORITY
52217 #define TARGET_MODE_PRIORITY ix86_mode_priority
52218
52219 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52220 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52221
52222 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52223 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52224
52225 #undef TARGET_STORE_BOUNDS_FOR_ARG
52226 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52227
52228 #undef TARGET_LOAD_RETURNED_BOUNDS
52229 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52230
52231 #undef TARGET_STORE_RETURNED_BOUNDS
52232 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52233
52234 #undef TARGET_CHKP_BOUND_MODE
52235 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52236
52237 #undef TARGET_BUILTIN_CHKP_FUNCTION
52238 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52239
52240 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52241 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52242
52243 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52244 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52245
52246 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52247 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52248
52249 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52250 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52251
52252 #undef TARGET_OFFLOAD_OPTIONS
52253 #define TARGET_OFFLOAD_OPTIONS \
52254 ix86_offload_options
52255
52256 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52257 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52258
52259 struct gcc_target targetm = TARGET_INITIALIZER;
52260 \f
52261 #include "gt-i386.h"