Properly handle -miamcu and -march=lakemont
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "cgraph.h"
38 #include "diagnostic.h"
39 #include "cfgbuild.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "attribs.h"
43 #include "calls.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "except.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "gimplify.h"
57 #include "dwarf2.h"
58 #include "tm-constrs.h"
59 #include "params.h"
60 #include "cselib.h"
61 #include "sched-int.h"
62 #include "opts.h"
63 #include "tree-pass.h"
64 #include "context.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "tree-vectorizer.h"
68 #include "shrink-wrap.h"
69 #include "builtins.h"
70 #include "rtl-iter.h"
71 #include "tree-iterator.h"
72 #include "tree-chkp.h"
73 #include "rtl-chkp.h"
74 #include "dbgcnt.h"
75
76 /* This file should be included last. */
77 #include "target-def.h"
78
79 static rtx legitimize_dllimport_symbol (rtx, bool);
80 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
81 static rtx legitimize_pe_coff_symbol (rtx, bool);
82
83 #ifndef CHECK_STACK_LIMIT
84 #define CHECK_STACK_LIMIT (-1)
85 #endif
86
87 /* Return index of given mode in mult and division cost tables. */
88 #define MODE_INDEX(mode) \
89 ((mode) == QImode ? 0 \
90 : (mode) == HImode ? 1 \
91 : (mode) == SImode ? 2 \
92 : (mode) == DImode ? 3 \
93 : 4)
94
95 /* Processor costs (relative to an add) */
96 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
97 #define COSTS_N_BYTES(N) ((N) * 2)
98
99 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
100
101 static stringop_algs ix86_size_memcpy[2] = {
102 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
103 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
104 static stringop_algs ix86_size_memset[2] = {
105 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
106 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
107
108 const
109 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
110 COSTS_N_BYTES (2), /* cost of an add instruction */
111 COSTS_N_BYTES (3), /* cost of a lea instruction */
112 COSTS_N_BYTES (2), /* variable shift costs */
113 COSTS_N_BYTES (3), /* constant shift costs */
114 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
115 COSTS_N_BYTES (3), /* HI */
116 COSTS_N_BYTES (3), /* SI */
117 COSTS_N_BYTES (3), /* DI */
118 COSTS_N_BYTES (5)}, /* other */
119 0, /* cost of multiply per each bit set */
120 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 COSTS_N_BYTES (3), /* cost of movsx */
126 COSTS_N_BYTES (3), /* cost of movzx */
127 0, /* "large" insn */
128 2, /* MOVE_RATIO */
129 2, /* cost for loading QImode using movzbl */
130 {2, 2, 2}, /* cost of loading integer registers
131 in QImode, HImode and SImode.
132 Relative to reg-reg move (2). */
133 {2, 2, 2}, /* cost of storing integer registers */
134 2, /* cost of reg,reg fld/fst */
135 {2, 2, 2}, /* cost of loading fp registers
136 in SFmode, DFmode and XFmode */
137 {2, 2, 2}, /* cost of storing fp registers
138 in SFmode, DFmode and XFmode */
139 3, /* cost of moving MMX register */
140 {3, 3}, /* cost of loading MMX registers
141 in SImode and DImode */
142 {3, 3}, /* cost of storing MMX registers
143 in SImode and DImode */
144 3, /* cost of moving SSE register */
145 {3, 3, 3}, /* cost of loading SSE registers
146 in SImode, DImode and TImode */
147 {3, 3, 3}, /* cost of storing SSE registers
148 in SImode, DImode and TImode */
149 3, /* MMX or SSE register to integer */
150 0, /* size of l1 cache */
151 0, /* size of l2 cache */
152 0, /* size of prefetch block */
153 0, /* number of parallel prefetches */
154 2, /* Branch cost */
155 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
156 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
157 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
158 COSTS_N_BYTES (2), /* cost of FABS instruction. */
159 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
160 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
161 ix86_size_memcpy,
162 ix86_size_memset,
163 1, /* scalar_stmt_cost. */
164 1, /* scalar load_cost. */
165 1, /* scalar_store_cost. */
166 1, /* vec_stmt_cost. */
167 1, /* vec_to_scalar_cost. */
168 1, /* scalar_to_vec_cost. */
169 1, /* vec_align_load_cost. */
170 1, /* vec_unalign_load_cost. */
171 1, /* vec_store_cost. */
172 1, /* cond_taken_branch_cost. */
173 1, /* cond_not_taken_branch_cost. */
174 };
175
176 /* Processor costs (relative to an add) */
177 static stringop_algs i386_memcpy[2] = {
178 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
179 DUMMY_STRINGOP_ALGS};
180 static stringop_algs i386_memset[2] = {
181 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
182 DUMMY_STRINGOP_ALGS};
183
184 static const
185 struct processor_costs i386_cost = { /* 386 specific costs */
186 COSTS_N_INSNS (1), /* cost of an add instruction */
187 COSTS_N_INSNS (1), /* cost of a lea instruction */
188 COSTS_N_INSNS (3), /* variable shift costs */
189 COSTS_N_INSNS (2), /* constant shift costs */
190 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
191 COSTS_N_INSNS (6), /* HI */
192 COSTS_N_INSNS (6), /* SI */
193 COSTS_N_INSNS (6), /* DI */
194 COSTS_N_INSNS (6)}, /* other */
195 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
196 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
197 COSTS_N_INSNS (23), /* HI */
198 COSTS_N_INSNS (23), /* SI */
199 COSTS_N_INSNS (23), /* DI */
200 COSTS_N_INSNS (23)}, /* other */
201 COSTS_N_INSNS (3), /* cost of movsx */
202 COSTS_N_INSNS (2), /* cost of movzx */
203 15, /* "large" insn */
204 3, /* MOVE_RATIO */
205 4, /* cost for loading QImode using movzbl */
206 {2, 4, 2}, /* cost of loading integer registers
207 in QImode, HImode and SImode.
208 Relative to reg-reg move (2). */
209 {2, 4, 2}, /* cost of storing integer registers */
210 2, /* cost of reg,reg fld/fst */
211 {8, 8, 8}, /* cost of loading fp registers
212 in SFmode, DFmode and XFmode */
213 {8, 8, 8}, /* cost of storing fp registers
214 in SFmode, DFmode and XFmode */
215 2, /* cost of moving MMX register */
216 {4, 8}, /* cost of loading MMX registers
217 in SImode and DImode */
218 {4, 8}, /* cost of storing MMX registers
219 in SImode and DImode */
220 2, /* cost of moving SSE register */
221 {4, 8, 16}, /* cost of loading SSE registers
222 in SImode, DImode and TImode */
223 {4, 8, 16}, /* cost of storing SSE registers
224 in SImode, DImode and TImode */
225 3, /* MMX or SSE register to integer */
226 0, /* size of l1 cache */
227 0, /* size of l2 cache */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
230 1, /* Branch cost */
231 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
232 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
233 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
234 COSTS_N_INSNS (22), /* cost of FABS instruction. */
235 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
236 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
237 i386_memcpy,
238 i386_memset,
239 1, /* scalar_stmt_cost. */
240 1, /* scalar load_cost. */
241 1, /* scalar_store_cost. */
242 1, /* vec_stmt_cost. */
243 1, /* vec_to_scalar_cost. */
244 1, /* scalar_to_vec_cost. */
245 1, /* vec_align_load_cost. */
246 2, /* vec_unalign_load_cost. */
247 1, /* vec_store_cost. */
248 3, /* cond_taken_branch_cost. */
249 1, /* cond_not_taken_branch_cost. */
250 };
251
252 static stringop_algs i486_memcpy[2] = {
253 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
254 DUMMY_STRINGOP_ALGS};
255 static stringop_algs i486_memset[2] = {
256 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
257 DUMMY_STRINGOP_ALGS};
258
259 static const
260 struct processor_costs i486_cost = { /* 486 specific costs */
261 COSTS_N_INSNS (1), /* cost of an add instruction */
262 COSTS_N_INSNS (1), /* cost of a lea instruction */
263 COSTS_N_INSNS (3), /* variable shift costs */
264 COSTS_N_INSNS (2), /* constant shift costs */
265 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
266 COSTS_N_INSNS (12), /* HI */
267 COSTS_N_INSNS (12), /* SI */
268 COSTS_N_INSNS (12), /* DI */
269 COSTS_N_INSNS (12)}, /* other */
270 1, /* cost of multiply per each bit set */
271 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
272 COSTS_N_INSNS (40), /* HI */
273 COSTS_N_INSNS (40), /* SI */
274 COSTS_N_INSNS (40), /* DI */
275 COSTS_N_INSNS (40)}, /* other */
276 COSTS_N_INSNS (3), /* cost of movsx */
277 COSTS_N_INSNS (2), /* cost of movzx */
278 15, /* "large" insn */
279 3, /* MOVE_RATIO */
280 4, /* cost for loading QImode using movzbl */
281 {2, 4, 2}, /* cost of loading integer registers
282 in QImode, HImode and SImode.
283 Relative to reg-reg move (2). */
284 {2, 4, 2}, /* cost of storing integer registers */
285 2, /* cost of reg,reg fld/fst */
286 {8, 8, 8}, /* cost of loading fp registers
287 in SFmode, DFmode and XFmode */
288 {8, 8, 8}, /* cost of storing fp registers
289 in SFmode, DFmode and XFmode */
290 2, /* cost of moving MMX register */
291 {4, 8}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {4, 8}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {4, 8, 16}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {4, 8, 16}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 3, /* MMX or SSE register to integer */
301 4, /* size of l1 cache. 486 has 8kB cache
302 shared for code and data, so 4kB is
303 not really precise. */
304 4, /* size of l2 cache */
305 0, /* size of prefetch block */
306 0, /* number of parallel prefetches */
307 1, /* Branch cost */
308 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
309 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
310 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
311 COSTS_N_INSNS (3), /* cost of FABS instruction. */
312 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
313 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
314 i486_memcpy,
315 i486_memset,
316 1, /* scalar_stmt_cost. */
317 1, /* scalar load_cost. */
318 1, /* scalar_store_cost. */
319 1, /* vec_stmt_cost. */
320 1, /* vec_to_scalar_cost. */
321 1, /* scalar_to_vec_cost. */
322 1, /* vec_align_load_cost. */
323 2, /* vec_unalign_load_cost. */
324 1, /* vec_store_cost. */
325 3, /* cond_taken_branch_cost. */
326 1, /* cond_not_taken_branch_cost. */
327 };
328
329 static stringop_algs pentium_memcpy[2] = {
330 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
331 DUMMY_STRINGOP_ALGS};
332 static stringop_algs pentium_memset[2] = {
333 {libcall, {{-1, rep_prefix_4_byte, false}}},
334 DUMMY_STRINGOP_ALGS};
335
336 static const
337 struct processor_costs pentium_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (4), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (11), /* HI */
344 COSTS_N_INSNS (11), /* SI */
345 COSTS_N_INSNS (11), /* DI */
346 COSTS_N_INSNS (11)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (25), /* HI */
350 COSTS_N_INSNS (25), /* SI */
351 COSTS_N_INSNS (25), /* DI */
352 COSTS_N_INSNS (25)}, /* other */
353 COSTS_N_INSNS (3), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
356 6, /* MOVE_RATIO */
357 6, /* cost for loading QImode using movzbl */
358 {2, 4, 2}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 4, 2}, /* cost of storing integer registers */
362 2, /* cost of reg,reg fld/fst */
363 {2, 2, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 8, /* cost of moving MMX register */
368 {8, 8}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {8, 8}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {4, 8, 16}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {4, 8, 16}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 3, /* MMX or SSE register to integer */
378 8, /* size of l1 cache. */
379 8, /* size of l2 cache */
380 0, /* size of prefetch block */
381 0, /* number of parallel prefetches */
382 2, /* Branch cost */
383 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
389 pentium_memcpy,
390 pentium_memset,
391 1, /* scalar_stmt_cost. */
392 1, /* scalar load_cost. */
393 1, /* scalar_store_cost. */
394 1, /* vec_stmt_cost. */
395 1, /* vec_to_scalar_cost. */
396 1, /* scalar_to_vec_cost. */
397 1, /* vec_align_load_cost. */
398 2, /* vec_unalign_load_cost. */
399 1, /* vec_store_cost. */
400 3, /* cond_taken_branch_cost. */
401 1, /* cond_not_taken_branch_cost. */
402 };
403
404 static const
405 struct processor_costs lakemont_cost = {
406 COSTS_N_INSNS (1), /* cost of an add instruction */
407 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
408 COSTS_N_INSNS (1), /* variable shift costs */
409 COSTS_N_INSNS (1), /* constant shift costs */
410 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
411 COSTS_N_INSNS (11), /* HI */
412 COSTS_N_INSNS (11), /* SI */
413 COSTS_N_INSNS (11), /* DI */
414 COSTS_N_INSNS (11)}, /* other */
415 0, /* cost of multiply per each bit set */
416 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
417 COSTS_N_INSNS (25), /* HI */
418 COSTS_N_INSNS (25), /* SI */
419 COSTS_N_INSNS (25), /* DI */
420 COSTS_N_INSNS (25)}, /* other */
421 COSTS_N_INSNS (3), /* cost of movsx */
422 COSTS_N_INSNS (2), /* cost of movzx */
423 8, /* "large" insn */
424 9, /* MOVE_RATIO */
425 6, /* cost for loading QImode using movzbl */
426 {2, 4, 2}, /* cost of loading integer registers
427 in QImode, HImode and SImode.
428 Relative to reg-reg move (2). */
429 {2, 4, 2}, /* cost of storing integer registers */
430 2, /* cost of reg,reg fld/fst */
431 {2, 2, 6}, /* cost of loading fp registers
432 in SFmode, DFmode and XFmode */
433 {4, 4, 6}, /* cost of storing fp registers
434 in SFmode, DFmode and XFmode */
435 8, /* cost of moving MMX register */
436 {8, 8}, /* cost of loading MMX registers
437 in SImode and DImode */
438 {8, 8}, /* cost of storing MMX registers
439 in SImode and DImode */
440 2, /* cost of moving SSE register */
441 {4, 8, 16}, /* cost of loading SSE registers
442 in SImode, DImode and TImode */
443 {4, 8, 16}, /* cost of storing SSE registers
444 in SImode, DImode and TImode */
445 3, /* MMX or SSE register to integer */
446 8, /* size of l1 cache. */
447 8, /* size of l2 cache */
448 0, /* size of prefetch block */
449 0, /* number of parallel prefetches */
450 2, /* Branch cost */
451 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
452 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
453 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
454 COSTS_N_INSNS (1), /* cost of FABS instruction. */
455 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
456 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
457 pentium_memcpy,
458 pentium_memset,
459 1, /* scalar_stmt_cost. */
460 1, /* scalar load_cost. */
461 1, /* scalar_store_cost. */
462 1, /* vec_stmt_cost. */
463 1, /* vec_to_scalar_cost. */
464 1, /* scalar_to_vec_cost. */
465 1, /* vec_align_load_cost. */
466 2, /* vec_unalign_load_cost. */
467 1, /* vec_store_cost. */
468 3, /* cond_taken_branch_cost. */
469 1, /* cond_not_taken_branch_cost. */
470 };
471
472 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
473 (we ensure the alignment). For small blocks inline loop is still a
474 noticeable win, for bigger blocks either rep movsl or rep movsb is
475 way to go. Rep movsb has apparently more expensive startup time in CPU,
476 but after 4K the difference is down in the noise. */
477 static stringop_algs pentiumpro_memcpy[2] = {
478 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
479 {8192, rep_prefix_4_byte, false},
480 {-1, rep_prefix_1_byte, false}}},
481 DUMMY_STRINGOP_ALGS};
482 static stringop_algs pentiumpro_memset[2] = {
483 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
484 {8192, rep_prefix_4_byte, false},
485 {-1, libcall, false}}},
486 DUMMY_STRINGOP_ALGS};
487 static const
488 struct processor_costs pentiumpro_cost = {
489 COSTS_N_INSNS (1), /* cost of an add instruction */
490 COSTS_N_INSNS (1), /* cost of a lea instruction */
491 COSTS_N_INSNS (1), /* variable shift costs */
492 COSTS_N_INSNS (1), /* constant shift costs */
493 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
494 COSTS_N_INSNS (4), /* HI */
495 COSTS_N_INSNS (4), /* SI */
496 COSTS_N_INSNS (4), /* DI */
497 COSTS_N_INSNS (4)}, /* other */
498 0, /* cost of multiply per each bit set */
499 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
500 COSTS_N_INSNS (17), /* HI */
501 COSTS_N_INSNS (17), /* SI */
502 COSTS_N_INSNS (17), /* DI */
503 COSTS_N_INSNS (17)}, /* other */
504 COSTS_N_INSNS (1), /* cost of movsx */
505 COSTS_N_INSNS (1), /* cost of movzx */
506 8, /* "large" insn */
507 6, /* MOVE_RATIO */
508 2, /* cost for loading QImode using movzbl */
509 {4, 4, 4}, /* cost of loading integer registers
510 in QImode, HImode and SImode.
511 Relative to reg-reg move (2). */
512 {2, 2, 2}, /* cost of storing integer registers */
513 2, /* cost of reg,reg fld/fst */
514 {2, 2, 6}, /* cost of loading fp registers
515 in SFmode, DFmode and XFmode */
516 {4, 4, 6}, /* cost of storing fp registers
517 in SFmode, DFmode and XFmode */
518 2, /* cost of moving MMX register */
519 {2, 2}, /* cost of loading MMX registers
520 in SImode and DImode */
521 {2, 2}, /* cost of storing MMX registers
522 in SImode and DImode */
523 2, /* cost of moving SSE register */
524 {2, 2, 8}, /* cost of loading SSE registers
525 in SImode, DImode and TImode */
526 {2, 2, 8}, /* cost of storing SSE registers
527 in SImode, DImode and TImode */
528 3, /* MMX or SSE register to integer */
529 8, /* size of l1 cache. */
530 256, /* size of l2 cache */
531 32, /* size of prefetch block */
532 6, /* number of parallel prefetches */
533 2, /* Branch cost */
534 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
535 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
536 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
537 COSTS_N_INSNS (2), /* cost of FABS instruction. */
538 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
539 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
540 pentiumpro_memcpy,
541 pentiumpro_memset,
542 1, /* scalar_stmt_cost. */
543 1, /* scalar load_cost. */
544 1, /* scalar_store_cost. */
545 1, /* vec_stmt_cost. */
546 1, /* vec_to_scalar_cost. */
547 1, /* scalar_to_vec_cost. */
548 1, /* vec_align_load_cost. */
549 2, /* vec_unalign_load_cost. */
550 1, /* vec_store_cost. */
551 3, /* cond_taken_branch_cost. */
552 1, /* cond_not_taken_branch_cost. */
553 };
554
555 static stringop_algs geode_memcpy[2] = {
556 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
557 DUMMY_STRINGOP_ALGS};
558 static stringop_algs geode_memset[2] = {
559 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
560 DUMMY_STRINGOP_ALGS};
561 static const
562 struct processor_costs geode_cost = {
563 COSTS_N_INSNS (1), /* cost of an add instruction */
564 COSTS_N_INSNS (1), /* cost of a lea instruction */
565 COSTS_N_INSNS (2), /* variable shift costs */
566 COSTS_N_INSNS (1), /* constant shift costs */
567 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
568 COSTS_N_INSNS (4), /* HI */
569 COSTS_N_INSNS (7), /* SI */
570 COSTS_N_INSNS (7), /* DI */
571 COSTS_N_INSNS (7)}, /* other */
572 0, /* cost of multiply per each bit set */
573 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
574 COSTS_N_INSNS (23), /* HI */
575 COSTS_N_INSNS (39), /* SI */
576 COSTS_N_INSNS (39), /* DI */
577 COSTS_N_INSNS (39)}, /* other */
578 COSTS_N_INSNS (1), /* cost of movsx */
579 COSTS_N_INSNS (1), /* cost of movzx */
580 8, /* "large" insn */
581 4, /* MOVE_RATIO */
582 1, /* cost for loading QImode using movzbl */
583 {1, 1, 1}, /* cost of loading integer registers
584 in QImode, HImode and SImode.
585 Relative to reg-reg move (2). */
586 {1, 1, 1}, /* cost of storing integer registers */
587 1, /* cost of reg,reg fld/fst */
588 {1, 1, 1}, /* cost of loading fp registers
589 in SFmode, DFmode and XFmode */
590 {4, 6, 6}, /* cost of storing fp registers
591 in SFmode, DFmode and XFmode */
592
593 1, /* cost of moving MMX register */
594 {1, 1}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {1, 1}, /* cost of storing MMX registers
597 in SImode and DImode */
598 1, /* cost of moving SSE register */
599 {1, 1, 1}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {1, 1, 1}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 1, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 128, /* size of l2 cache. */
606 32, /* size of prefetch block */
607 1, /* number of parallel prefetches */
608 1, /* Branch cost */
609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
610 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
611 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
612 COSTS_N_INSNS (1), /* cost of FABS instruction. */
613 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
614 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
615 geode_memcpy,
616 geode_memset,
617 1, /* scalar_stmt_cost. */
618 1, /* scalar load_cost. */
619 1, /* scalar_store_cost. */
620 1, /* vec_stmt_cost. */
621 1, /* vec_to_scalar_cost. */
622 1, /* scalar_to_vec_cost. */
623 1, /* vec_align_load_cost. */
624 2, /* vec_unalign_load_cost. */
625 1, /* vec_store_cost. */
626 3, /* cond_taken_branch_cost. */
627 1, /* cond_not_taken_branch_cost. */
628 };
629
630 static stringop_algs k6_memcpy[2] = {
631 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
632 DUMMY_STRINGOP_ALGS};
633 static stringop_algs k6_memset[2] = {
634 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
635 DUMMY_STRINGOP_ALGS};
636 static const
637 struct processor_costs k6_cost = {
638 COSTS_N_INSNS (1), /* cost of an add instruction */
639 COSTS_N_INSNS (2), /* cost of a lea instruction */
640 COSTS_N_INSNS (1), /* variable shift costs */
641 COSTS_N_INSNS (1), /* constant shift costs */
642 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
643 COSTS_N_INSNS (3), /* HI */
644 COSTS_N_INSNS (3), /* SI */
645 COSTS_N_INSNS (3), /* DI */
646 COSTS_N_INSNS (3)}, /* other */
647 0, /* cost of multiply per each bit set */
648 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
649 COSTS_N_INSNS (18), /* HI */
650 COSTS_N_INSNS (18), /* SI */
651 COSTS_N_INSNS (18), /* DI */
652 COSTS_N_INSNS (18)}, /* other */
653 COSTS_N_INSNS (2), /* cost of movsx */
654 COSTS_N_INSNS (2), /* cost of movzx */
655 8, /* "large" insn */
656 4, /* MOVE_RATIO */
657 3, /* cost for loading QImode using movzbl */
658 {4, 5, 4}, /* cost of loading integer registers
659 in QImode, HImode and SImode.
660 Relative to reg-reg move (2). */
661 {2, 3, 2}, /* cost of storing integer registers */
662 4, /* cost of reg,reg fld/fst */
663 {6, 6, 6}, /* cost of loading fp registers
664 in SFmode, DFmode and XFmode */
665 {4, 4, 4}, /* cost of storing fp registers
666 in SFmode, DFmode and XFmode */
667 2, /* cost of moving MMX register */
668 {2, 2}, /* cost of loading MMX registers
669 in SImode and DImode */
670 {2, 2}, /* cost of storing MMX registers
671 in SImode and DImode */
672 2, /* cost of moving SSE register */
673 {2, 2, 8}, /* cost of loading SSE registers
674 in SImode, DImode and TImode */
675 {2, 2, 8}, /* cost of storing SSE registers
676 in SImode, DImode and TImode */
677 6, /* MMX or SSE register to integer */
678 32, /* size of l1 cache. */
679 32, /* size of l2 cache. Some models
680 have integrated l2 cache, but
681 optimizing for k6 is not important
682 enough to worry about that. */
683 32, /* size of prefetch block */
684 1, /* number of parallel prefetches */
685 1, /* Branch cost */
686 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
687 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
688 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
689 COSTS_N_INSNS (2), /* cost of FABS instruction. */
690 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
691 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
692 k6_memcpy,
693 k6_memset,
694 1, /* scalar_stmt_cost. */
695 1, /* scalar load_cost. */
696 1, /* scalar_store_cost. */
697 1, /* vec_stmt_cost. */
698 1, /* vec_to_scalar_cost. */
699 1, /* scalar_to_vec_cost. */
700 1, /* vec_align_load_cost. */
701 2, /* vec_unalign_load_cost. */
702 1, /* vec_store_cost. */
703 3, /* cond_taken_branch_cost. */
704 1, /* cond_not_taken_branch_cost. */
705 };
706
707 /* For some reason, Athlon deals better with REP prefix (relative to loops)
708 compared to K8. Alignment becomes important after 8 bytes for memcpy and
709 128 bytes for memset. */
710 static stringop_algs athlon_memcpy[2] = {
711 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
712 DUMMY_STRINGOP_ALGS};
713 static stringop_algs athlon_memset[2] = {
714 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
715 DUMMY_STRINGOP_ALGS};
716 static const
717 struct processor_costs athlon_cost = {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (2), /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (5), /* HI */
724 COSTS_N_INSNS (5), /* SI */
725 COSTS_N_INSNS (5), /* DI */
726 COSTS_N_INSNS (5)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
736 9, /* MOVE_RATIO */
737 4, /* cost for loading QImode using movzbl */
738 {3, 4, 3}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {3, 4, 3}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {4, 4, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {4, 4}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {4, 4}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {4, 4, 6}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {4, 4, 5}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of l1 cache. */
759 256, /* size of l2 cache. */
760 64, /* size of prefetch block */
761 6, /* number of parallel prefetches */
762 5, /* Branch cost */
763 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
764 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
765 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
766 COSTS_N_INSNS (2), /* cost of FABS instruction. */
767 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
768 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
769 athlon_memcpy,
770 athlon_memset,
771 1, /* scalar_stmt_cost. */
772 1, /* scalar load_cost. */
773 1, /* scalar_store_cost. */
774 1, /* vec_stmt_cost. */
775 1, /* vec_to_scalar_cost. */
776 1, /* scalar_to_vec_cost. */
777 1, /* vec_align_load_cost. */
778 2, /* vec_unalign_load_cost. */
779 1, /* vec_store_cost. */
780 3, /* cond_taken_branch_cost. */
781 1, /* cond_not_taken_branch_cost. */
782 };
783
784 /* K8 has optimized REP instruction for medium sized blocks, but for very
785 small blocks it is better to use loop. For large blocks, libcall can
786 do nontemporary accesses and beat inline considerably. */
787 static stringop_algs k8_memcpy[2] = {
788 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
789 {-1, rep_prefix_4_byte, false}}},
790 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
791 {-1, libcall, false}}}};
792 static stringop_algs k8_memset[2] = {
793 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
794 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
795 {libcall, {{48, unrolled_loop, false},
796 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
797 static const
798 struct processor_costs k8_cost = {
799 COSTS_N_INSNS (1), /* cost of an add instruction */
800 COSTS_N_INSNS (2), /* cost of a lea instruction */
801 COSTS_N_INSNS (1), /* variable shift costs */
802 COSTS_N_INSNS (1), /* constant shift costs */
803 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
804 COSTS_N_INSNS (4), /* HI */
805 COSTS_N_INSNS (3), /* SI */
806 COSTS_N_INSNS (4), /* DI */
807 COSTS_N_INSNS (5)}, /* other */
808 0, /* cost of multiply per each bit set */
809 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
810 COSTS_N_INSNS (26), /* HI */
811 COSTS_N_INSNS (42), /* SI */
812 COSTS_N_INSNS (74), /* DI */
813 COSTS_N_INSNS (74)}, /* other */
814 COSTS_N_INSNS (1), /* cost of movsx */
815 COSTS_N_INSNS (1), /* cost of movzx */
816 8, /* "large" insn */
817 9, /* MOVE_RATIO */
818 4, /* cost for loading QImode using movzbl */
819 {3, 4, 3}, /* cost of loading integer registers
820 in QImode, HImode and SImode.
821 Relative to reg-reg move (2). */
822 {3, 4, 3}, /* cost of storing integer registers */
823 4, /* cost of reg,reg fld/fst */
824 {4, 4, 12}, /* cost of loading fp registers
825 in SFmode, DFmode and XFmode */
826 {6, 6, 8}, /* cost of storing fp registers
827 in SFmode, DFmode and XFmode */
828 2, /* cost of moving MMX register */
829 {3, 3}, /* cost of loading MMX registers
830 in SImode and DImode */
831 {4, 4}, /* cost of storing MMX registers
832 in SImode and DImode */
833 2, /* cost of moving SSE register */
834 {4, 3, 6}, /* cost of loading SSE registers
835 in SImode, DImode and TImode */
836 {4, 4, 5}, /* cost of storing SSE registers
837 in SImode, DImode and TImode */
838 5, /* MMX or SSE register to integer */
839 64, /* size of l1 cache. */
840 512, /* size of l2 cache. */
841 64, /* size of prefetch block */
842 /* New AMD processors never drop prefetches; if they cannot be performed
843 immediately, they are queued. We set number of simultaneous prefetches
844 to a large constant to reflect this (it probably is not a good idea not
845 to limit number of prefetches at all, as their execution also takes some
846 time). */
847 100, /* number of parallel prefetches */
848 3, /* Branch cost */
849 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
850 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
851 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
852 COSTS_N_INSNS (2), /* cost of FABS instruction. */
853 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
854 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
855
856 k8_memcpy,
857 k8_memset,
858 4, /* scalar_stmt_cost. */
859 2, /* scalar load_cost. */
860 2, /* scalar_store_cost. */
861 5, /* vec_stmt_cost. */
862 0, /* vec_to_scalar_cost. */
863 2, /* scalar_to_vec_cost. */
864 2, /* vec_align_load_cost. */
865 3, /* vec_unalign_load_cost. */
866 3, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 2, /* cond_not_taken_branch_cost. */
869 };
870
871 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
872 very small blocks it is better to use loop. For large blocks, libcall can
873 do nontemporary accesses and beat inline considerably. */
874 static stringop_algs amdfam10_memcpy[2] = {
875 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
876 {-1, rep_prefix_4_byte, false}}},
877 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
878 {-1, libcall, false}}}};
879 static stringop_algs amdfam10_memset[2] = {
880 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
881 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
882 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
883 {-1, libcall, false}}}};
884 struct processor_costs amdfam10_cost = {
885 COSTS_N_INSNS (1), /* cost of an add instruction */
886 COSTS_N_INSNS (2), /* cost of a lea instruction */
887 COSTS_N_INSNS (1), /* variable shift costs */
888 COSTS_N_INSNS (1), /* constant shift costs */
889 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
890 COSTS_N_INSNS (4), /* HI */
891 COSTS_N_INSNS (3), /* SI */
892 COSTS_N_INSNS (4), /* DI */
893 COSTS_N_INSNS (5)}, /* other */
894 0, /* cost of multiply per each bit set */
895 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
896 COSTS_N_INSNS (35), /* HI */
897 COSTS_N_INSNS (51), /* SI */
898 COSTS_N_INSNS (83), /* DI */
899 COSTS_N_INSNS (83)}, /* other */
900 COSTS_N_INSNS (1), /* cost of movsx */
901 COSTS_N_INSNS (1), /* cost of movzx */
902 8, /* "large" insn */
903 9, /* MOVE_RATIO */
904 4, /* cost for loading QImode using movzbl */
905 {3, 4, 3}, /* cost of loading integer registers
906 in QImode, HImode and SImode.
907 Relative to reg-reg move (2). */
908 {3, 4, 3}, /* cost of storing integer registers */
909 4, /* cost of reg,reg fld/fst */
910 {4, 4, 12}, /* cost of loading fp registers
911 in SFmode, DFmode and XFmode */
912 {6, 6, 8}, /* cost of storing fp registers
913 in SFmode, DFmode and XFmode */
914 2, /* cost of moving MMX register */
915 {3, 3}, /* cost of loading MMX registers
916 in SImode and DImode */
917 {4, 4}, /* cost of storing MMX registers
918 in SImode and DImode */
919 2, /* cost of moving SSE register */
920 {4, 4, 3}, /* cost of loading SSE registers
921 in SImode, DImode and TImode */
922 {4, 4, 5}, /* cost of storing SSE registers
923 in SImode, DImode and TImode */
924 3, /* MMX or SSE register to integer */
925 /* On K8:
926 MOVD reg64, xmmreg Double FSTORE 4
927 MOVD reg32, xmmreg Double FSTORE 4
928 On AMDFAM10:
929 MOVD reg64, xmmreg Double FADD 3
930 1/1 1/1
931 MOVD reg32, xmmreg Double FADD 3
932 1/1 1/1 */
933 64, /* size of l1 cache. */
934 512, /* size of l2 cache. */
935 64, /* size of prefetch block */
936 /* New AMD processors never drop prefetches; if they cannot be performed
937 immediately, they are queued. We set number of simultaneous prefetches
938 to a large constant to reflect this (it probably is not a good idea not
939 to limit number of prefetches at all, as their execution also takes some
940 time). */
941 100, /* number of parallel prefetches */
942 2, /* Branch cost */
943 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (2), /* cost of FABS instruction. */
947 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
949
950 amdfam10_memcpy,
951 amdfam10_memset,
952 4, /* scalar_stmt_cost. */
953 2, /* scalar load_cost. */
954 2, /* scalar_store_cost. */
955 6, /* vec_stmt_cost. */
956 0, /* vec_to_scalar_cost. */
957 2, /* scalar_to_vec_cost. */
958 2, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 2, /* vec_store_cost. */
961 2, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
963 };
964
965 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
966 very small blocks it is better to use loop. For large blocks, libcall
967 can do nontemporary accesses and beat inline considerably. */
968 static stringop_algs bdver1_memcpy[2] = {
969 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
970 {-1, rep_prefix_4_byte, false}}},
971 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
972 {-1, libcall, false}}}};
973 static stringop_algs bdver1_memset[2] = {
974 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
975 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
976 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
977 {-1, libcall, false}}}};
978
979 const struct processor_costs bdver1_cost = {
980 COSTS_N_INSNS (1), /* cost of an add instruction */
981 COSTS_N_INSNS (1), /* cost of a lea instruction */
982 COSTS_N_INSNS (1), /* variable shift costs */
983 COSTS_N_INSNS (1), /* constant shift costs */
984 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
985 COSTS_N_INSNS (4), /* HI */
986 COSTS_N_INSNS (4), /* SI */
987 COSTS_N_INSNS (6), /* DI */
988 COSTS_N_INSNS (6)}, /* other */
989 0, /* cost of multiply per each bit set */
990 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
991 COSTS_N_INSNS (35), /* HI */
992 COSTS_N_INSNS (51), /* SI */
993 COSTS_N_INSNS (83), /* DI */
994 COSTS_N_INSNS (83)}, /* other */
995 COSTS_N_INSNS (1), /* cost of movsx */
996 COSTS_N_INSNS (1), /* cost of movzx */
997 8, /* "large" insn */
998 9, /* MOVE_RATIO */
999 4, /* cost for loading QImode using movzbl */
1000 {5, 5, 4}, /* cost of loading integer registers
1001 in QImode, HImode and SImode.
1002 Relative to reg-reg move (2). */
1003 {4, 4, 4}, /* cost of storing integer registers */
1004 2, /* cost of reg,reg fld/fst */
1005 {5, 5, 12}, /* cost of loading fp registers
1006 in SFmode, DFmode and XFmode */
1007 {4, 4, 8}, /* cost of storing fp registers
1008 in SFmode, DFmode and XFmode */
1009 2, /* cost of moving MMX register */
1010 {4, 4}, /* cost of loading MMX registers
1011 in SImode and DImode */
1012 {4, 4}, /* cost of storing MMX registers
1013 in SImode and DImode */
1014 2, /* cost of moving SSE register */
1015 {4, 4, 4}, /* cost of loading SSE registers
1016 in SImode, DImode and TImode */
1017 {4, 4, 4}, /* cost of storing SSE registers
1018 in SImode, DImode and TImode */
1019 2, /* MMX or SSE register to integer */
1020 /* On K8:
1021 MOVD reg64, xmmreg Double FSTORE 4
1022 MOVD reg32, xmmreg Double FSTORE 4
1023 On AMDFAM10:
1024 MOVD reg64, xmmreg Double FADD 3
1025 1/1 1/1
1026 MOVD reg32, xmmreg Double FADD 3
1027 1/1 1/1 */
1028 16, /* size of l1 cache. */
1029 2048, /* size of l2 cache. */
1030 64, /* size of prefetch block */
1031 /* New AMD processors never drop prefetches; if they cannot be performed
1032 immediately, they are queued. We set number of simultaneous prefetches
1033 to a large constant to reflect this (it probably is not a good idea not
1034 to limit number of prefetches at all, as their execution also takes some
1035 time). */
1036 100, /* number of parallel prefetches */
1037 2, /* Branch cost */
1038 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1039 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1040 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1041 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1042 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1043 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1044
1045 bdver1_memcpy,
1046 bdver1_memset,
1047 6, /* scalar_stmt_cost. */
1048 4, /* scalar load_cost. */
1049 4, /* scalar_store_cost. */
1050 6, /* vec_stmt_cost. */
1051 0, /* vec_to_scalar_cost. */
1052 2, /* scalar_to_vec_cost. */
1053 4, /* vec_align_load_cost. */
1054 4, /* vec_unalign_load_cost. */
1055 4, /* vec_store_cost. */
1056 4, /* cond_taken_branch_cost. */
1057 2, /* cond_not_taken_branch_cost. */
1058 };
1059
1060 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1061 very small blocks it is better to use loop. For large blocks, libcall
1062 can do nontemporary accesses and beat inline considerably. */
1063
1064 static stringop_algs bdver2_memcpy[2] = {
1065 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1066 {-1, rep_prefix_4_byte, false}}},
1067 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1068 {-1, libcall, false}}}};
1069 static stringop_algs bdver2_memset[2] = {
1070 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1071 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1072 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1073 {-1, libcall, false}}}};
1074
1075 const struct processor_costs bdver2_cost = {
1076 COSTS_N_INSNS (1), /* cost of an add instruction */
1077 COSTS_N_INSNS (1), /* cost of a lea instruction */
1078 COSTS_N_INSNS (1), /* variable shift costs */
1079 COSTS_N_INSNS (1), /* constant shift costs */
1080 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1081 COSTS_N_INSNS (4), /* HI */
1082 COSTS_N_INSNS (4), /* SI */
1083 COSTS_N_INSNS (6), /* DI */
1084 COSTS_N_INSNS (6)}, /* other */
1085 0, /* cost of multiply per each bit set */
1086 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1087 COSTS_N_INSNS (35), /* HI */
1088 COSTS_N_INSNS (51), /* SI */
1089 COSTS_N_INSNS (83), /* DI */
1090 COSTS_N_INSNS (83)}, /* other */
1091 COSTS_N_INSNS (1), /* cost of movsx */
1092 COSTS_N_INSNS (1), /* cost of movzx */
1093 8, /* "large" insn */
1094 9, /* MOVE_RATIO */
1095 4, /* cost for loading QImode using movzbl */
1096 {5, 5, 4}, /* cost of loading integer registers
1097 in QImode, HImode and SImode.
1098 Relative to reg-reg move (2). */
1099 {4, 4, 4}, /* cost of storing integer registers */
1100 2, /* cost of reg,reg fld/fst */
1101 {5, 5, 12}, /* cost of loading fp registers
1102 in SFmode, DFmode and XFmode */
1103 {4, 4, 8}, /* cost of storing fp registers
1104 in SFmode, DFmode and XFmode */
1105 2, /* cost of moving MMX register */
1106 {4, 4}, /* cost of loading MMX registers
1107 in SImode and DImode */
1108 {4, 4}, /* cost of storing MMX registers
1109 in SImode and DImode */
1110 2, /* cost of moving SSE register */
1111 {4, 4, 4}, /* cost of loading SSE registers
1112 in SImode, DImode and TImode */
1113 {4, 4, 4}, /* cost of storing SSE registers
1114 in SImode, DImode and TImode */
1115 2, /* MMX or SSE register to integer */
1116 /* On K8:
1117 MOVD reg64, xmmreg Double FSTORE 4
1118 MOVD reg32, xmmreg Double FSTORE 4
1119 On AMDFAM10:
1120 MOVD reg64, xmmreg Double FADD 3
1121 1/1 1/1
1122 MOVD reg32, xmmreg Double FADD 3
1123 1/1 1/1 */
1124 16, /* size of l1 cache. */
1125 2048, /* size of l2 cache. */
1126 64, /* size of prefetch block */
1127 /* New AMD processors never drop prefetches; if they cannot be performed
1128 immediately, they are queued. We set number of simultaneous prefetches
1129 to a large constant to reflect this (it probably is not a good idea not
1130 to limit number of prefetches at all, as their execution also takes some
1131 time). */
1132 100, /* number of parallel prefetches */
1133 2, /* Branch cost */
1134 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1135 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1136 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1137 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1138 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1139 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1140
1141 bdver2_memcpy,
1142 bdver2_memset,
1143 6, /* scalar_stmt_cost. */
1144 4, /* scalar load_cost. */
1145 4, /* scalar_store_cost. */
1146 6, /* vec_stmt_cost. */
1147 0, /* vec_to_scalar_cost. */
1148 2, /* scalar_to_vec_cost. */
1149 4, /* vec_align_load_cost. */
1150 4, /* vec_unalign_load_cost. */
1151 4, /* vec_store_cost. */
1152 4, /* cond_taken_branch_cost. */
1153 2, /* cond_not_taken_branch_cost. */
1154 };
1155
1156
1157 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1158 very small blocks it is better to use loop. For large blocks, libcall
1159 can do nontemporary accesses and beat inline considerably. */
1160 static stringop_algs bdver3_memcpy[2] = {
1161 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1162 {-1, rep_prefix_4_byte, false}}},
1163 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1164 {-1, libcall, false}}}};
1165 static stringop_algs bdver3_memset[2] = {
1166 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1167 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1168 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1169 {-1, libcall, false}}}};
1170 struct processor_costs bdver3_cost = {
1171 COSTS_N_INSNS (1), /* cost of an add instruction */
1172 COSTS_N_INSNS (1), /* cost of a lea instruction */
1173 COSTS_N_INSNS (1), /* variable shift costs */
1174 COSTS_N_INSNS (1), /* constant shift costs */
1175 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1176 COSTS_N_INSNS (4), /* HI */
1177 COSTS_N_INSNS (4), /* SI */
1178 COSTS_N_INSNS (6), /* DI */
1179 COSTS_N_INSNS (6)}, /* other */
1180 0, /* cost of multiply per each bit set */
1181 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1182 COSTS_N_INSNS (35), /* HI */
1183 COSTS_N_INSNS (51), /* SI */
1184 COSTS_N_INSNS (83), /* DI */
1185 COSTS_N_INSNS (83)}, /* other */
1186 COSTS_N_INSNS (1), /* cost of movsx */
1187 COSTS_N_INSNS (1), /* cost of movzx */
1188 8, /* "large" insn */
1189 9, /* MOVE_RATIO */
1190 4, /* cost for loading QImode using movzbl */
1191 {5, 5, 4}, /* cost of loading integer registers
1192 in QImode, HImode and SImode.
1193 Relative to reg-reg move (2). */
1194 {4, 4, 4}, /* cost of storing integer registers */
1195 2, /* cost of reg,reg fld/fst */
1196 {5, 5, 12}, /* cost of loading fp registers
1197 in SFmode, DFmode and XFmode */
1198 {4, 4, 8}, /* cost of storing fp registers
1199 in SFmode, DFmode and XFmode */
1200 2, /* cost of moving MMX register */
1201 {4, 4}, /* cost of loading MMX registers
1202 in SImode and DImode */
1203 {4, 4}, /* cost of storing MMX registers
1204 in SImode and DImode */
1205 2, /* cost of moving SSE register */
1206 {4, 4, 4}, /* cost of loading SSE registers
1207 in SImode, DImode and TImode */
1208 {4, 4, 4}, /* cost of storing SSE registers
1209 in SImode, DImode and TImode */
1210 2, /* MMX or SSE register to integer */
1211 16, /* size of l1 cache. */
1212 2048, /* size of l2 cache. */
1213 64, /* size of prefetch block */
1214 /* New AMD processors never drop prefetches; if they cannot be performed
1215 immediately, they are queued. We set number of simultaneous prefetches
1216 to a large constant to reflect this (it probably is not a good idea not
1217 to limit number of prefetches at all, as their execution also takes some
1218 time). */
1219 100, /* number of parallel prefetches */
1220 2, /* Branch cost */
1221 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1222 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1223 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1224 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1225 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1226 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1227
1228 bdver3_memcpy,
1229 bdver3_memset,
1230 6, /* scalar_stmt_cost. */
1231 4, /* scalar load_cost. */
1232 4, /* scalar_store_cost. */
1233 6, /* vec_stmt_cost. */
1234 0, /* vec_to_scalar_cost. */
1235 2, /* scalar_to_vec_cost. */
1236 4, /* vec_align_load_cost. */
1237 4, /* vec_unalign_load_cost. */
1238 4, /* vec_store_cost. */
1239 4, /* cond_taken_branch_cost. */
1240 2, /* cond_not_taken_branch_cost. */
1241 };
1242
1243 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1244 very small blocks it is better to use loop. For large blocks, libcall
1245 can do nontemporary accesses and beat inline considerably. */
1246 static stringop_algs bdver4_memcpy[2] = {
1247 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1248 {-1, rep_prefix_4_byte, false}}},
1249 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1250 {-1, libcall, false}}}};
1251 static stringop_algs bdver4_memset[2] = {
1252 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1253 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1254 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1255 {-1, libcall, false}}}};
1256 struct processor_costs bdver4_cost = {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1275 9, /* MOVE_RATIO */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1297 16, /* size of l1 cache. */
1298 2048, /* size of l2 cache. */
1299 64, /* size of prefetch block */
1300 /* New AMD processors never drop prefetches; if they cannot be performed
1301 immediately, they are queued. We set number of simultaneous prefetches
1302 to a large constant to reflect this (it probably is not a good idea not
1303 to limit number of prefetches at all, as their execution also takes some
1304 time). */
1305 100, /* number of parallel prefetches */
1306 2, /* Branch cost */
1307 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1308 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1309 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1310 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1311 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1312 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1313
1314 bdver4_memcpy,
1315 bdver4_memset,
1316 6, /* scalar_stmt_cost. */
1317 4, /* scalar load_cost. */
1318 4, /* scalar_store_cost. */
1319 6, /* vec_stmt_cost. */
1320 0, /* vec_to_scalar_cost. */
1321 2, /* scalar_to_vec_cost. */
1322 4, /* vec_align_load_cost. */
1323 4, /* vec_unalign_load_cost. */
1324 4, /* vec_store_cost. */
1325 4, /* cond_taken_branch_cost. */
1326 2, /* cond_not_taken_branch_cost. */
1327 };
1328
1329
1330 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1331 very small blocks it is better to use loop. For large blocks, libcall
1332 can do nontemporary accesses and beat inline considerably. */
1333 static stringop_algs znver1_memcpy[2] = {
1334 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1335 {-1, rep_prefix_4_byte, false}}},
1336 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1337 {-1, libcall, false}}}};
1338 static stringop_algs znver1_memset[2] = {
1339 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1340 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1341 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1342 {-1, libcall, false}}}};
1343 struct processor_costs znver1_cost = {
1344 COSTS_N_INSNS (1), /* cost of an add instruction. */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1346 COSTS_N_INSNS (1), /* variable shift costs. */
1347 COSTS_N_INSNS (1), /* constant shift costs. */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
1349 COSTS_N_INSNS (4), /* HI. */
1350 COSTS_N_INSNS (4), /* SI. */
1351 COSTS_N_INSNS (6), /* DI. */
1352 COSTS_N_INSNS (6)}, /* other. */
1353 0, /* cost of multiply per each bit
1354 set. */
1355 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1356 COSTS_N_INSNS (35), /* HI. */
1357 COSTS_N_INSNS (51), /* SI. */
1358 COSTS_N_INSNS (83), /* DI. */
1359 COSTS_N_INSNS (83)}, /* other. */
1360 COSTS_N_INSNS (1), /* cost of movsx. */
1361 COSTS_N_INSNS (1), /* cost of movzx. */
1362 8, /* "large" insn. */
1363 9, /* MOVE_RATIO. */
1364 4, /* cost for loading QImode using
1365 movzbl. */
1366 {5, 5, 4}, /* cost of loading integer registers
1367 in QImode, HImode and SImode.
1368 Relative to reg-reg move (2). */
1369 {4, 4, 4}, /* cost of storing integer
1370 registers. */
1371 2, /* cost of reg,reg fld/fst. */
1372 {5, 5, 12}, /* cost of loading fp registers
1373 in SFmode, DFmode and XFmode. */
1374 {4, 4, 8}, /* cost of storing fp registers
1375 in SFmode, DFmode and XFmode. */
1376 2, /* cost of moving MMX register. */
1377 {4, 4}, /* cost of loading MMX registers
1378 in SImode and DImode. */
1379 {4, 4}, /* cost of storing MMX registers
1380 in SImode and DImode. */
1381 2, /* cost of moving SSE register. */
1382 {4, 4, 4}, /* cost of loading SSE registers
1383 in SImode, DImode and TImode. */
1384 {4, 4, 4}, /* cost of storing SSE registers
1385 in SImode, DImode and TImode. */
1386 2, /* MMX or SSE register to integer. */
1387 32, /* size of l1 cache. */
1388 512, /* size of l2 cache. */
1389 64, /* size of prefetch block. */
1390 /* New AMD processors never drop prefetches; if they cannot be performed
1391 immediately, they are queued. We set number of simultaneous prefetches
1392 to a large constant to reflect this (it probably is not a good idea not
1393 to limit number of prefetches at all, as their execution also takes some
1394 time). */
1395 100, /* number of parallel prefetches. */
1396 2, /* Branch cost. */
1397 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1398 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1399 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1400 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1401 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1402 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1403
1404 znver1_memcpy,
1405 znver1_memset,
1406 6, /* scalar_stmt_cost. */
1407 4, /* scalar load_cost. */
1408 4, /* scalar_store_cost. */
1409 6, /* vec_stmt_cost. */
1410 0, /* vec_to_scalar_cost. */
1411 2, /* scalar_to_vec_cost. */
1412 4, /* vec_align_load_cost. */
1413 4, /* vec_unalign_load_cost. */
1414 4, /* vec_store_cost. */
1415 4, /* cond_taken_branch_cost. */
1416 2, /* cond_not_taken_branch_cost. */
1417 };
1418
1419 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1420 very small blocks it is better to use loop. For large blocks, libcall can
1421 do nontemporary accesses and beat inline considerably. */
1422 static stringop_algs btver1_memcpy[2] = {
1423 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1424 {-1, rep_prefix_4_byte, false}}},
1425 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1426 {-1, libcall, false}}}};
1427 static stringop_algs btver1_memset[2] = {
1428 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1429 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1430 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1431 {-1, libcall, false}}}};
1432 const struct processor_costs btver1_cost = {
1433 COSTS_N_INSNS (1), /* cost of an add instruction */
1434 COSTS_N_INSNS (2), /* cost of a lea instruction */
1435 COSTS_N_INSNS (1), /* variable shift costs */
1436 COSTS_N_INSNS (1), /* constant shift costs */
1437 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1438 COSTS_N_INSNS (4), /* HI */
1439 COSTS_N_INSNS (3), /* SI */
1440 COSTS_N_INSNS (4), /* DI */
1441 COSTS_N_INSNS (5)}, /* other */
1442 0, /* cost of multiply per each bit set */
1443 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1444 COSTS_N_INSNS (35), /* HI */
1445 COSTS_N_INSNS (51), /* SI */
1446 COSTS_N_INSNS (83), /* DI */
1447 COSTS_N_INSNS (83)}, /* other */
1448 COSTS_N_INSNS (1), /* cost of movsx */
1449 COSTS_N_INSNS (1), /* cost of movzx */
1450 8, /* "large" insn */
1451 9, /* MOVE_RATIO */
1452 4, /* cost for loading QImode using movzbl */
1453 {3, 4, 3}, /* cost of loading integer registers
1454 in QImode, HImode and SImode.
1455 Relative to reg-reg move (2). */
1456 {3, 4, 3}, /* cost of storing integer registers */
1457 4, /* cost of reg,reg fld/fst */
1458 {4, 4, 12}, /* cost of loading fp registers
1459 in SFmode, DFmode and XFmode */
1460 {6, 6, 8}, /* cost of storing fp registers
1461 in SFmode, DFmode and XFmode */
1462 2, /* cost of moving MMX register */
1463 {3, 3}, /* cost of loading MMX registers
1464 in SImode and DImode */
1465 {4, 4}, /* cost of storing MMX registers
1466 in SImode and DImode */
1467 2, /* cost of moving SSE register */
1468 {4, 4, 3}, /* cost of loading SSE registers
1469 in SImode, DImode and TImode */
1470 {4, 4, 5}, /* cost of storing SSE registers
1471 in SImode, DImode and TImode */
1472 3, /* MMX or SSE register to integer */
1473 /* On K8:
1474 MOVD reg64, xmmreg Double FSTORE 4
1475 MOVD reg32, xmmreg Double FSTORE 4
1476 On AMDFAM10:
1477 MOVD reg64, xmmreg Double FADD 3
1478 1/1 1/1
1479 MOVD reg32, xmmreg Double FADD 3
1480 1/1 1/1 */
1481 32, /* size of l1 cache. */
1482 512, /* size of l2 cache. */
1483 64, /* size of prefetch block */
1484 100, /* number of parallel prefetches */
1485 2, /* Branch cost */
1486 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1487 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1488 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1489 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1490 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1491 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1492
1493 btver1_memcpy,
1494 btver1_memset,
1495 4, /* scalar_stmt_cost. */
1496 2, /* scalar load_cost. */
1497 2, /* scalar_store_cost. */
1498 6, /* vec_stmt_cost. */
1499 0, /* vec_to_scalar_cost. */
1500 2, /* scalar_to_vec_cost. */
1501 2, /* vec_align_load_cost. */
1502 2, /* vec_unalign_load_cost. */
1503 2, /* vec_store_cost. */
1504 2, /* cond_taken_branch_cost. */
1505 1, /* cond_not_taken_branch_cost. */
1506 };
1507
1508 static stringop_algs btver2_memcpy[2] = {
1509 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1510 {-1, rep_prefix_4_byte, false}}},
1511 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1512 {-1, libcall, false}}}};
1513 static stringop_algs btver2_memset[2] = {
1514 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1515 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1516 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1517 {-1, libcall, false}}}};
1518 const struct processor_costs btver2_cost = {
1519 COSTS_N_INSNS (1), /* cost of an add instruction */
1520 COSTS_N_INSNS (2), /* cost of a lea instruction */
1521 COSTS_N_INSNS (1), /* variable shift costs */
1522 COSTS_N_INSNS (1), /* constant shift costs */
1523 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1524 COSTS_N_INSNS (4), /* HI */
1525 COSTS_N_INSNS (3), /* SI */
1526 COSTS_N_INSNS (4), /* DI */
1527 COSTS_N_INSNS (5)}, /* other */
1528 0, /* cost of multiply per each bit set */
1529 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1530 COSTS_N_INSNS (35), /* HI */
1531 COSTS_N_INSNS (51), /* SI */
1532 COSTS_N_INSNS (83), /* DI */
1533 COSTS_N_INSNS (83)}, /* other */
1534 COSTS_N_INSNS (1), /* cost of movsx */
1535 COSTS_N_INSNS (1), /* cost of movzx */
1536 8, /* "large" insn */
1537 9, /* MOVE_RATIO */
1538 4, /* cost for loading QImode using movzbl */
1539 {3, 4, 3}, /* cost of loading integer registers
1540 in QImode, HImode and SImode.
1541 Relative to reg-reg move (2). */
1542 {3, 4, 3}, /* cost of storing integer registers */
1543 4, /* cost of reg,reg fld/fst */
1544 {4, 4, 12}, /* cost of loading fp registers
1545 in SFmode, DFmode and XFmode */
1546 {6, 6, 8}, /* cost of storing fp registers
1547 in SFmode, DFmode and XFmode */
1548 2, /* cost of moving MMX register */
1549 {3, 3}, /* cost of loading MMX registers
1550 in SImode and DImode */
1551 {4, 4}, /* cost of storing MMX registers
1552 in SImode and DImode */
1553 2, /* cost of moving SSE register */
1554 {4, 4, 3}, /* cost of loading SSE registers
1555 in SImode, DImode and TImode */
1556 {4, 4, 5}, /* cost of storing SSE registers
1557 in SImode, DImode and TImode */
1558 3, /* MMX or SSE register to integer */
1559 /* On K8:
1560 MOVD reg64, xmmreg Double FSTORE 4
1561 MOVD reg32, xmmreg Double FSTORE 4
1562 On AMDFAM10:
1563 MOVD reg64, xmmreg Double FADD 3
1564 1/1 1/1
1565 MOVD reg32, xmmreg Double FADD 3
1566 1/1 1/1 */
1567 32, /* size of l1 cache. */
1568 2048, /* size of l2 cache. */
1569 64, /* size of prefetch block */
1570 100, /* number of parallel prefetches */
1571 2, /* Branch cost */
1572 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1573 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1574 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1575 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1576 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1577 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1578 btver2_memcpy,
1579 btver2_memset,
1580 4, /* scalar_stmt_cost. */
1581 2, /* scalar load_cost. */
1582 2, /* scalar_store_cost. */
1583 6, /* vec_stmt_cost. */
1584 0, /* vec_to_scalar_cost. */
1585 2, /* scalar_to_vec_cost. */
1586 2, /* vec_align_load_cost. */
1587 2, /* vec_unalign_load_cost. */
1588 2, /* vec_store_cost. */
1589 2, /* cond_taken_branch_cost. */
1590 1, /* cond_not_taken_branch_cost. */
1591 };
1592
1593 static stringop_algs pentium4_memcpy[2] = {
1594 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1595 DUMMY_STRINGOP_ALGS};
1596 static stringop_algs pentium4_memset[2] = {
1597 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1598 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1599 DUMMY_STRINGOP_ALGS};
1600
1601 static const
1602 struct processor_costs pentium4_cost = {
1603 COSTS_N_INSNS (1), /* cost of an add instruction */
1604 COSTS_N_INSNS (3), /* cost of a lea instruction */
1605 COSTS_N_INSNS (4), /* variable shift costs */
1606 COSTS_N_INSNS (4), /* constant shift costs */
1607 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1608 COSTS_N_INSNS (15), /* HI */
1609 COSTS_N_INSNS (15), /* SI */
1610 COSTS_N_INSNS (15), /* DI */
1611 COSTS_N_INSNS (15)}, /* other */
1612 0, /* cost of multiply per each bit set */
1613 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1614 COSTS_N_INSNS (56), /* HI */
1615 COSTS_N_INSNS (56), /* SI */
1616 COSTS_N_INSNS (56), /* DI */
1617 COSTS_N_INSNS (56)}, /* other */
1618 COSTS_N_INSNS (1), /* cost of movsx */
1619 COSTS_N_INSNS (1), /* cost of movzx */
1620 16, /* "large" insn */
1621 6, /* MOVE_RATIO */
1622 2, /* cost for loading QImode using movzbl */
1623 {4, 5, 4}, /* cost of loading integer registers
1624 in QImode, HImode and SImode.
1625 Relative to reg-reg move (2). */
1626 {2, 3, 2}, /* cost of storing integer registers */
1627 2, /* cost of reg,reg fld/fst */
1628 {2, 2, 6}, /* cost of loading fp registers
1629 in SFmode, DFmode and XFmode */
1630 {4, 4, 6}, /* cost of storing fp registers
1631 in SFmode, DFmode and XFmode */
1632 2, /* cost of moving MMX register */
1633 {2, 2}, /* cost of loading MMX registers
1634 in SImode and DImode */
1635 {2, 2}, /* cost of storing MMX registers
1636 in SImode and DImode */
1637 12, /* cost of moving SSE register */
1638 {12, 12, 12}, /* cost of loading SSE registers
1639 in SImode, DImode and TImode */
1640 {2, 2, 8}, /* cost of storing SSE registers
1641 in SImode, DImode and TImode */
1642 10, /* MMX or SSE register to integer */
1643 8, /* size of l1 cache. */
1644 256, /* size of l2 cache. */
1645 64, /* size of prefetch block */
1646 6, /* number of parallel prefetches */
1647 2, /* Branch cost */
1648 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1649 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1650 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1651 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1652 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1653 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1654 pentium4_memcpy,
1655 pentium4_memset,
1656 1, /* scalar_stmt_cost. */
1657 1, /* scalar load_cost. */
1658 1, /* scalar_store_cost. */
1659 1, /* vec_stmt_cost. */
1660 1, /* vec_to_scalar_cost. */
1661 1, /* scalar_to_vec_cost. */
1662 1, /* vec_align_load_cost. */
1663 2, /* vec_unalign_load_cost. */
1664 1, /* vec_store_cost. */
1665 3, /* cond_taken_branch_cost. */
1666 1, /* cond_not_taken_branch_cost. */
1667 };
1668
1669 static stringop_algs nocona_memcpy[2] = {
1670 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1671 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1672 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1673
1674 static stringop_algs nocona_memset[2] = {
1675 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1676 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1677 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1678 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1679
1680 static const
1681 struct processor_costs nocona_cost = {
1682 COSTS_N_INSNS (1), /* cost of an add instruction */
1683 COSTS_N_INSNS (1), /* cost of a lea instruction */
1684 COSTS_N_INSNS (1), /* variable shift costs */
1685 COSTS_N_INSNS (1), /* constant shift costs */
1686 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1687 COSTS_N_INSNS (10), /* HI */
1688 COSTS_N_INSNS (10), /* SI */
1689 COSTS_N_INSNS (10), /* DI */
1690 COSTS_N_INSNS (10)}, /* other */
1691 0, /* cost of multiply per each bit set */
1692 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1693 COSTS_N_INSNS (66), /* HI */
1694 COSTS_N_INSNS (66), /* SI */
1695 COSTS_N_INSNS (66), /* DI */
1696 COSTS_N_INSNS (66)}, /* other */
1697 COSTS_N_INSNS (1), /* cost of movsx */
1698 COSTS_N_INSNS (1), /* cost of movzx */
1699 16, /* "large" insn */
1700 17, /* MOVE_RATIO */
1701 4, /* cost for loading QImode using movzbl */
1702 {4, 4, 4}, /* cost of loading integer registers
1703 in QImode, HImode and SImode.
1704 Relative to reg-reg move (2). */
1705 {4, 4, 4}, /* cost of storing integer registers */
1706 3, /* cost of reg,reg fld/fst */
1707 {12, 12, 12}, /* cost of loading fp registers
1708 in SFmode, DFmode and XFmode */
1709 {4, 4, 4}, /* cost of storing fp registers
1710 in SFmode, DFmode and XFmode */
1711 6, /* cost of moving MMX register */
1712 {12, 12}, /* cost of loading MMX registers
1713 in SImode and DImode */
1714 {12, 12}, /* cost of storing MMX registers
1715 in SImode and DImode */
1716 6, /* cost of moving SSE register */
1717 {12, 12, 12}, /* cost of loading SSE registers
1718 in SImode, DImode and TImode */
1719 {12, 12, 12}, /* cost of storing SSE registers
1720 in SImode, DImode and TImode */
1721 8, /* MMX or SSE register to integer */
1722 8, /* size of l1 cache. */
1723 1024, /* size of l2 cache. */
1724 64, /* size of prefetch block */
1725 8, /* number of parallel prefetches */
1726 1, /* Branch cost */
1727 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1728 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1729 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1730 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1731 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1732 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1733 nocona_memcpy,
1734 nocona_memset,
1735 1, /* scalar_stmt_cost. */
1736 1, /* scalar load_cost. */
1737 1, /* scalar_store_cost. */
1738 1, /* vec_stmt_cost. */
1739 1, /* vec_to_scalar_cost. */
1740 1, /* scalar_to_vec_cost. */
1741 1, /* vec_align_load_cost. */
1742 2, /* vec_unalign_load_cost. */
1743 1, /* vec_store_cost. */
1744 3, /* cond_taken_branch_cost. */
1745 1, /* cond_not_taken_branch_cost. */
1746 };
1747
1748 static stringop_algs atom_memcpy[2] = {
1749 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1750 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1751 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1752 static stringop_algs atom_memset[2] = {
1753 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1754 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1755 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static const
1758 struct processor_costs atom_cost = {
1759 COSTS_N_INSNS (1), /* cost of an add instruction */
1760 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1761 COSTS_N_INSNS (1), /* variable shift costs */
1762 COSTS_N_INSNS (1), /* constant shift costs */
1763 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1764 COSTS_N_INSNS (4), /* HI */
1765 COSTS_N_INSNS (3), /* SI */
1766 COSTS_N_INSNS (4), /* DI */
1767 COSTS_N_INSNS (2)}, /* other */
1768 0, /* cost of multiply per each bit set */
1769 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1770 COSTS_N_INSNS (26), /* HI */
1771 COSTS_N_INSNS (42), /* SI */
1772 COSTS_N_INSNS (74), /* DI */
1773 COSTS_N_INSNS (74)}, /* other */
1774 COSTS_N_INSNS (1), /* cost of movsx */
1775 COSTS_N_INSNS (1), /* cost of movzx */
1776 8, /* "large" insn */
1777 17, /* MOVE_RATIO */
1778 4, /* cost for loading QImode using movzbl */
1779 {4, 4, 4}, /* cost of loading integer registers
1780 in QImode, HImode and SImode.
1781 Relative to reg-reg move (2). */
1782 {4, 4, 4}, /* cost of storing integer registers */
1783 4, /* cost of reg,reg fld/fst */
1784 {12, 12, 12}, /* cost of loading fp registers
1785 in SFmode, DFmode and XFmode */
1786 {6, 6, 8}, /* cost of storing fp registers
1787 in SFmode, DFmode and XFmode */
1788 2, /* cost of moving MMX register */
1789 {8, 8}, /* cost of loading MMX registers
1790 in SImode and DImode */
1791 {8, 8}, /* cost of storing MMX registers
1792 in SImode and DImode */
1793 2, /* cost of moving SSE register */
1794 {8, 8, 8}, /* cost of loading SSE registers
1795 in SImode, DImode and TImode */
1796 {8, 8, 8}, /* cost of storing SSE registers
1797 in SImode, DImode and TImode */
1798 5, /* MMX or SSE register to integer */
1799 32, /* size of l1 cache. */
1800 256, /* size of l2 cache. */
1801 64, /* size of prefetch block */
1802 6, /* number of parallel prefetches */
1803 3, /* Branch cost */
1804 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1805 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1806 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1807 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1808 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1809 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1810 atom_memcpy,
1811 atom_memset,
1812 1, /* scalar_stmt_cost. */
1813 1, /* scalar load_cost. */
1814 1, /* scalar_store_cost. */
1815 1, /* vec_stmt_cost. */
1816 1, /* vec_to_scalar_cost. */
1817 1, /* scalar_to_vec_cost. */
1818 1, /* vec_align_load_cost. */
1819 2, /* vec_unalign_load_cost. */
1820 1, /* vec_store_cost. */
1821 3, /* cond_taken_branch_cost. */
1822 1, /* cond_not_taken_branch_cost. */
1823 };
1824
1825 static stringop_algs slm_memcpy[2] = {
1826 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1827 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1828 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1829 static stringop_algs slm_memset[2] = {
1830 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1831 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1832 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1833 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1834 static const
1835 struct processor_costs slm_cost = {
1836 COSTS_N_INSNS (1), /* cost of an add instruction */
1837 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1838 COSTS_N_INSNS (1), /* variable shift costs */
1839 COSTS_N_INSNS (1), /* constant shift costs */
1840 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1841 COSTS_N_INSNS (3), /* HI */
1842 COSTS_N_INSNS (3), /* SI */
1843 COSTS_N_INSNS (4), /* DI */
1844 COSTS_N_INSNS (2)}, /* other */
1845 0, /* cost of multiply per each bit set */
1846 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1847 COSTS_N_INSNS (26), /* HI */
1848 COSTS_N_INSNS (42), /* SI */
1849 COSTS_N_INSNS (74), /* DI */
1850 COSTS_N_INSNS (74)}, /* other */
1851 COSTS_N_INSNS (1), /* cost of movsx */
1852 COSTS_N_INSNS (1), /* cost of movzx */
1853 8, /* "large" insn */
1854 17, /* MOVE_RATIO */
1855 4, /* cost for loading QImode using movzbl */
1856 {4, 4, 4}, /* cost of loading integer registers
1857 in QImode, HImode and SImode.
1858 Relative to reg-reg move (2). */
1859 {4, 4, 4}, /* cost of storing integer registers */
1860 4, /* cost of reg,reg fld/fst */
1861 {12, 12, 12}, /* cost of loading fp registers
1862 in SFmode, DFmode and XFmode */
1863 {6, 6, 8}, /* cost of storing fp registers
1864 in SFmode, DFmode and XFmode */
1865 2, /* cost of moving MMX register */
1866 {8, 8}, /* cost of loading MMX registers
1867 in SImode and DImode */
1868 {8, 8}, /* cost of storing MMX registers
1869 in SImode and DImode */
1870 2, /* cost of moving SSE register */
1871 {8, 8, 8}, /* cost of loading SSE registers
1872 in SImode, DImode and TImode */
1873 {8, 8, 8}, /* cost of storing SSE registers
1874 in SImode, DImode and TImode */
1875 5, /* MMX or SSE register to integer */
1876 32, /* size of l1 cache. */
1877 256, /* size of l2 cache. */
1878 64, /* size of prefetch block */
1879 6, /* number of parallel prefetches */
1880 3, /* Branch cost */
1881 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1882 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1883 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1884 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1885 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1886 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1887 slm_memcpy,
1888 slm_memset,
1889 1, /* scalar_stmt_cost. */
1890 1, /* scalar load_cost. */
1891 1, /* scalar_store_cost. */
1892 1, /* vec_stmt_cost. */
1893 4, /* vec_to_scalar_cost. */
1894 1, /* scalar_to_vec_cost. */
1895 1, /* vec_align_load_cost. */
1896 2, /* vec_unalign_load_cost. */
1897 1, /* vec_store_cost. */
1898 3, /* cond_taken_branch_cost. */
1899 1, /* cond_not_taken_branch_cost. */
1900 };
1901
1902 static stringop_algs intel_memcpy[2] = {
1903 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1904 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1905 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1906 static stringop_algs intel_memset[2] = {
1907 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1908 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1909 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1910 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1911 static const
1912 struct processor_costs intel_cost = {
1913 COSTS_N_INSNS (1), /* cost of an add instruction */
1914 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1915 COSTS_N_INSNS (1), /* variable shift costs */
1916 COSTS_N_INSNS (1), /* constant shift costs */
1917 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1918 COSTS_N_INSNS (3), /* HI */
1919 COSTS_N_INSNS (3), /* SI */
1920 COSTS_N_INSNS (4), /* DI */
1921 COSTS_N_INSNS (2)}, /* other */
1922 0, /* cost of multiply per each bit set */
1923 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1924 COSTS_N_INSNS (26), /* HI */
1925 COSTS_N_INSNS (42), /* SI */
1926 COSTS_N_INSNS (74), /* DI */
1927 COSTS_N_INSNS (74)}, /* other */
1928 COSTS_N_INSNS (1), /* cost of movsx */
1929 COSTS_N_INSNS (1), /* cost of movzx */
1930 8, /* "large" insn */
1931 17, /* MOVE_RATIO */
1932 4, /* cost for loading QImode using movzbl */
1933 {4, 4, 4}, /* cost of loading integer registers
1934 in QImode, HImode and SImode.
1935 Relative to reg-reg move (2). */
1936 {4, 4, 4}, /* cost of storing integer registers */
1937 4, /* cost of reg,reg fld/fst */
1938 {12, 12, 12}, /* cost of loading fp registers
1939 in SFmode, DFmode and XFmode */
1940 {6, 6, 8}, /* cost of storing fp registers
1941 in SFmode, DFmode and XFmode */
1942 2, /* cost of moving MMX register */
1943 {8, 8}, /* cost of loading MMX registers
1944 in SImode and DImode */
1945 {8, 8}, /* cost of storing MMX registers
1946 in SImode and DImode */
1947 2, /* cost of moving SSE register */
1948 {8, 8, 8}, /* cost of loading SSE registers
1949 in SImode, DImode and TImode */
1950 {8, 8, 8}, /* cost of storing SSE registers
1951 in SImode, DImode and TImode */
1952 5, /* MMX or SSE register to integer */
1953 32, /* size of l1 cache. */
1954 256, /* size of l2 cache. */
1955 64, /* size of prefetch block */
1956 6, /* number of parallel prefetches */
1957 3, /* Branch cost */
1958 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1959 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1960 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1961 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1962 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1963 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1964 intel_memcpy,
1965 intel_memset,
1966 1, /* scalar_stmt_cost. */
1967 1, /* scalar load_cost. */
1968 1, /* scalar_store_cost. */
1969 1, /* vec_stmt_cost. */
1970 4, /* vec_to_scalar_cost. */
1971 1, /* scalar_to_vec_cost. */
1972 1, /* vec_align_load_cost. */
1973 2, /* vec_unalign_load_cost. */
1974 1, /* vec_store_cost. */
1975 3, /* cond_taken_branch_cost. */
1976 1, /* cond_not_taken_branch_cost. */
1977 };
1978
1979 /* Generic should produce code tuned for Core-i7 (and newer chips)
1980 and btver1 (and newer chips). */
1981
1982 static stringop_algs generic_memcpy[2] = {
1983 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1984 {-1, libcall, false}}},
1985 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1986 {-1, libcall, false}}}};
1987 static stringop_algs generic_memset[2] = {
1988 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1989 {-1, libcall, false}}},
1990 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1991 {-1, libcall, false}}}};
1992 static const
1993 struct processor_costs generic_cost = {
1994 COSTS_N_INSNS (1), /* cost of an add instruction */
1995 /* On all chips taken into consideration lea is 2 cycles and more. With
1996 this cost however our current implementation of synth_mult results in
1997 use of unnecessary temporary registers causing regression on several
1998 SPECfp benchmarks. */
1999 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2000 COSTS_N_INSNS (1), /* variable shift costs */
2001 COSTS_N_INSNS (1), /* constant shift costs */
2002 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2003 COSTS_N_INSNS (4), /* HI */
2004 COSTS_N_INSNS (3), /* SI */
2005 COSTS_N_INSNS (4), /* DI */
2006 COSTS_N_INSNS (2)}, /* other */
2007 0, /* cost of multiply per each bit set */
2008 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2009 COSTS_N_INSNS (26), /* HI */
2010 COSTS_N_INSNS (42), /* SI */
2011 COSTS_N_INSNS (74), /* DI */
2012 COSTS_N_INSNS (74)}, /* other */
2013 COSTS_N_INSNS (1), /* cost of movsx */
2014 COSTS_N_INSNS (1), /* cost of movzx */
2015 8, /* "large" insn */
2016 17, /* MOVE_RATIO */
2017 4, /* cost for loading QImode using movzbl */
2018 {4, 4, 4}, /* cost of loading integer registers
2019 in QImode, HImode and SImode.
2020 Relative to reg-reg move (2). */
2021 {4, 4, 4}, /* cost of storing integer registers */
2022 4, /* cost of reg,reg fld/fst */
2023 {12, 12, 12}, /* cost of loading fp registers
2024 in SFmode, DFmode and XFmode */
2025 {6, 6, 8}, /* cost of storing fp registers
2026 in SFmode, DFmode and XFmode */
2027 2, /* cost of moving MMX register */
2028 {8, 8}, /* cost of loading MMX registers
2029 in SImode and DImode */
2030 {8, 8}, /* cost of storing MMX registers
2031 in SImode and DImode */
2032 2, /* cost of moving SSE register */
2033 {8, 8, 8}, /* cost of loading SSE registers
2034 in SImode, DImode and TImode */
2035 {8, 8, 8}, /* cost of storing SSE registers
2036 in SImode, DImode and TImode */
2037 5, /* MMX or SSE register to integer */
2038 32, /* size of l1 cache. */
2039 512, /* size of l2 cache. */
2040 64, /* size of prefetch block */
2041 6, /* number of parallel prefetches */
2042 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2043 value is increased to perhaps more appropriate value of 5. */
2044 3, /* Branch cost */
2045 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2046 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2047 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2048 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2049 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2050 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2051 generic_memcpy,
2052 generic_memset,
2053 1, /* scalar_stmt_cost. */
2054 1, /* scalar load_cost. */
2055 1, /* scalar_store_cost. */
2056 1, /* vec_stmt_cost. */
2057 1, /* vec_to_scalar_cost. */
2058 1, /* scalar_to_vec_cost. */
2059 1, /* vec_align_load_cost. */
2060 2, /* vec_unalign_load_cost. */
2061 1, /* vec_store_cost. */
2062 3, /* cond_taken_branch_cost. */
2063 1, /* cond_not_taken_branch_cost. */
2064 };
2065
2066 /* core_cost should produce code tuned for Core familly of CPUs. */
2067 static stringop_algs core_memcpy[2] = {
2068 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2069 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2070 {-1, libcall, false}}}};
2071 static stringop_algs core_memset[2] = {
2072 {libcall, {{6, loop_1_byte, true},
2073 {24, loop, true},
2074 {8192, rep_prefix_4_byte, true},
2075 {-1, libcall, false}}},
2076 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2077 {-1, libcall, false}}}};
2078
2079 static const
2080 struct processor_costs core_cost = {
2081 COSTS_N_INSNS (1), /* cost of an add instruction */
2082 /* On all chips taken into consideration lea is 2 cycles and more. With
2083 this cost however our current implementation of synth_mult results in
2084 use of unnecessary temporary registers causing regression on several
2085 SPECfp benchmarks. */
2086 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2087 COSTS_N_INSNS (1), /* variable shift costs */
2088 COSTS_N_INSNS (1), /* constant shift costs */
2089 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2090 COSTS_N_INSNS (4), /* HI */
2091 COSTS_N_INSNS (3), /* SI */
2092 COSTS_N_INSNS (4), /* DI */
2093 COSTS_N_INSNS (2)}, /* other */
2094 0, /* cost of multiply per each bit set */
2095 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2096 COSTS_N_INSNS (26), /* HI */
2097 COSTS_N_INSNS (42), /* SI */
2098 COSTS_N_INSNS (74), /* DI */
2099 COSTS_N_INSNS (74)}, /* other */
2100 COSTS_N_INSNS (1), /* cost of movsx */
2101 COSTS_N_INSNS (1), /* cost of movzx */
2102 8, /* "large" insn */
2103 17, /* MOVE_RATIO */
2104 4, /* cost for loading QImode using movzbl */
2105 {4, 4, 4}, /* cost of loading integer registers
2106 in QImode, HImode and SImode.
2107 Relative to reg-reg move (2). */
2108 {4, 4, 4}, /* cost of storing integer registers */
2109 4, /* cost of reg,reg fld/fst */
2110 {12, 12, 12}, /* cost of loading fp registers
2111 in SFmode, DFmode and XFmode */
2112 {6, 6, 8}, /* cost of storing fp registers
2113 in SFmode, DFmode and XFmode */
2114 2, /* cost of moving MMX register */
2115 {8, 8}, /* cost of loading MMX registers
2116 in SImode and DImode */
2117 {8, 8}, /* cost of storing MMX registers
2118 in SImode and DImode */
2119 2, /* cost of moving SSE register */
2120 {8, 8, 8}, /* cost of loading SSE registers
2121 in SImode, DImode and TImode */
2122 {8, 8, 8}, /* cost of storing SSE registers
2123 in SImode, DImode and TImode */
2124 5, /* MMX or SSE register to integer */
2125 64, /* size of l1 cache. */
2126 512, /* size of l2 cache. */
2127 64, /* size of prefetch block */
2128 6, /* number of parallel prefetches */
2129 /* FIXME perhaps more appropriate value is 5. */
2130 3, /* Branch cost */
2131 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2132 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2133 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2134 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2135 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2136 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2137 core_memcpy,
2138 core_memset,
2139 1, /* scalar_stmt_cost. */
2140 1, /* scalar load_cost. */
2141 1, /* scalar_store_cost. */
2142 1, /* vec_stmt_cost. */
2143 1, /* vec_to_scalar_cost. */
2144 1, /* scalar_to_vec_cost. */
2145 1, /* vec_align_load_cost. */
2146 2, /* vec_unalign_load_cost. */
2147 1, /* vec_store_cost. */
2148 3, /* cond_taken_branch_cost. */
2149 1, /* cond_not_taken_branch_cost. */
2150 };
2151
2152
2153 /* Set by -mtune. */
2154 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2155
2156 /* Set by -mtune or -Os. */
2157 const struct processor_costs *ix86_cost = &pentium_cost;
2158
2159 /* Processor feature/optimization bitmasks. */
2160 #define m_386 (1<<PROCESSOR_I386)
2161 #define m_486 (1<<PROCESSOR_I486)
2162 #define m_PENT (1<<PROCESSOR_PENTIUM)
2163 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2164 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2165 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2166 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2167 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2168 #define m_CORE2 (1<<PROCESSOR_CORE2)
2169 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2170 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2171 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2172 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2173 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2174 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2175 #define m_KNL (1<<PROCESSOR_KNL)
2176 #define m_SKYLAKE_AVX512 (1<<PROCESSOT_SKYLAKE_AVX512)
2177 #define m_INTEL (1<<PROCESSOR_INTEL)
2178
2179 #define m_GEODE (1<<PROCESSOR_GEODE)
2180 #define m_K6 (1<<PROCESSOR_K6)
2181 #define m_K6_GEODE (m_K6 | m_GEODE)
2182 #define m_K8 (1<<PROCESSOR_K8)
2183 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2184 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2185 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2186 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2187 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2188 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2189 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2190 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2191 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2192 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2193 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2194 #define m_BTVER (m_BTVER1 | m_BTVER2)
2195 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2196 | m_ZNVER1)
2197
2198 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2199
2200 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2201 #undef DEF_TUNE
2202 #define DEF_TUNE(tune, name, selector) name,
2203 #include "x86-tune.def"
2204 #undef DEF_TUNE
2205 };
2206
2207 /* Feature tests against the various tunings. */
2208 unsigned char ix86_tune_features[X86_TUNE_LAST];
2209
2210 /* Feature tests against the various tunings used to create ix86_tune_features
2211 based on the processor mask. */
2212 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2213 #undef DEF_TUNE
2214 #define DEF_TUNE(tune, name, selector) selector,
2215 #include "x86-tune.def"
2216 #undef DEF_TUNE
2217 };
2218
2219 /* Feature tests against the various architecture variations. */
2220 unsigned char ix86_arch_features[X86_ARCH_LAST];
2221
2222 /* Feature tests against the various architecture variations, used to create
2223 ix86_arch_features based on the processor mask. */
2224 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2225 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2226 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2227
2228 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2229 ~m_386,
2230
2231 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2232 ~(m_386 | m_486),
2233
2234 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2235 ~m_386,
2236
2237 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2238 ~m_386,
2239 };
2240
2241 /* In case the average insn count for single function invocation is
2242 lower than this constant, emit fast (but longer) prologue and
2243 epilogue code. */
2244 #define FAST_PROLOGUE_INSN_COUNT 20
2245
2246 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2247 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2248 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2249 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2250
2251 /* Array of the smallest class containing reg number REGNO, indexed by
2252 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2253
2254 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2255 {
2256 /* ax, dx, cx, bx */
2257 AREG, DREG, CREG, BREG,
2258 /* si, di, bp, sp */
2259 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2260 /* FP registers */
2261 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2262 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2263 /* arg pointer */
2264 NON_Q_REGS,
2265 /* flags, fpsr, fpcr, frame */
2266 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2267 /* SSE registers */
2268 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2269 SSE_REGS, SSE_REGS,
2270 /* MMX registers */
2271 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2272 MMX_REGS, MMX_REGS,
2273 /* REX registers */
2274 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2275 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2276 /* SSE REX registers */
2277 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2278 SSE_REGS, SSE_REGS,
2279 /* AVX-512 SSE registers */
2280 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2281 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2282 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2283 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2284 /* Mask registers. */
2285 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2286 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2287 /* MPX bound registers */
2288 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2289 };
2290
2291 /* The "default" register map used in 32bit mode. */
2292
2293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2294 {
2295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2302 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2303 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2304 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2305 101, 102, 103, 104, /* bound registers */
2306 };
2307
2308 /* The "default" register map used in 64bit mode. */
2309
2310 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2311 {
2312 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2313 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2314 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2315 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2316 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2317 8,9,10,11,12,13,14,15, /* extended integer registers */
2318 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2319 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2320 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2321 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2322 126, 127, 128, 129, /* bound registers */
2323 };
2324
2325 /* Define the register numbers to be used in Dwarf debugging information.
2326 The SVR4 reference port C compiler uses the following register numbers
2327 in its Dwarf output code:
2328 0 for %eax (gcc regno = 0)
2329 1 for %ecx (gcc regno = 2)
2330 2 for %edx (gcc regno = 1)
2331 3 for %ebx (gcc regno = 3)
2332 4 for %esp (gcc regno = 7)
2333 5 for %ebp (gcc regno = 6)
2334 6 for %esi (gcc regno = 4)
2335 7 for %edi (gcc regno = 5)
2336 The following three DWARF register numbers are never generated by
2337 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2338 believes these numbers have these meanings.
2339 8 for %eip (no gcc equivalent)
2340 9 for %eflags (gcc regno = 17)
2341 10 for %trapno (no gcc equivalent)
2342 It is not at all clear how we should number the FP stack registers
2343 for the x86 architecture. If the version of SDB on x86/svr4 were
2344 a bit less brain dead with respect to floating-point then we would
2345 have a precedent to follow with respect to DWARF register numbers
2346 for x86 FP registers, but the SDB on x86/svr4 is so completely
2347 broken with respect to FP registers that it is hardly worth thinking
2348 of it as something to strive for compatibility with.
2349 The version of x86/svr4 SDB I have at the moment does (partially)
2350 seem to believe that DWARF register number 11 is associated with
2351 the x86 register %st(0), but that's about all. Higher DWARF
2352 register numbers don't seem to be associated with anything in
2353 particular, and even for DWARF regno 11, SDB only seems to under-
2354 stand that it should say that a variable lives in %st(0) (when
2355 asked via an `=' command) if we said it was in DWARF regno 11,
2356 but SDB still prints garbage when asked for the value of the
2357 variable in question (via a `/' command).
2358 (Also note that the labels SDB prints for various FP stack regs
2359 when doing an `x' command are all wrong.)
2360 Note that these problems generally don't affect the native SVR4
2361 C compiler because it doesn't allow the use of -O with -g and
2362 because when it is *not* optimizing, it allocates a memory
2363 location for each floating-point variable, and the memory
2364 location is what gets described in the DWARF AT_location
2365 attribute for the variable in question.
2366 Regardless of the severe mental illness of the x86/svr4 SDB, we
2367 do something sensible here and we use the following DWARF
2368 register numbers. Note that these are all stack-top-relative
2369 numbers.
2370 11 for %st(0) (gcc regno = 8)
2371 12 for %st(1) (gcc regno = 9)
2372 13 for %st(2) (gcc regno = 10)
2373 14 for %st(3) (gcc regno = 11)
2374 15 for %st(4) (gcc regno = 12)
2375 16 for %st(5) (gcc regno = 13)
2376 17 for %st(6) (gcc regno = 14)
2377 18 for %st(7) (gcc regno = 15)
2378 */
2379 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2380 {
2381 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2382 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2383 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2384 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2385 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2387 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2388 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2389 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2390 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2391 101, 102, 103, 104, /* bound registers */
2392 };
2393
2394 /* Define parameter passing and return registers. */
2395
2396 static int const x86_64_int_parameter_registers[6] =
2397 {
2398 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2399 };
2400
2401 static int const x86_64_ms_abi_int_parameter_registers[4] =
2402 {
2403 CX_REG, DX_REG, R8_REG, R9_REG
2404 };
2405
2406 static int const x86_64_int_return_registers[4] =
2407 {
2408 AX_REG, DX_REG, DI_REG, SI_REG
2409 };
2410
2411 /* Additional registers that are clobbered by SYSV calls. */
2412
2413 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2414 {
2415 SI_REG, DI_REG,
2416 XMM6_REG, XMM7_REG,
2417 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2418 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2419 };
2420
2421 /* Define the structure for the machine field in struct function. */
2422
2423 struct GTY(()) stack_local_entry {
2424 unsigned short mode;
2425 unsigned short n;
2426 rtx rtl;
2427 struct stack_local_entry *next;
2428 };
2429
2430 /* Structure describing stack frame layout.
2431 Stack grows downward:
2432
2433 [arguments]
2434 <- ARG_POINTER
2435 saved pc
2436
2437 saved static chain if ix86_static_chain_on_stack
2438
2439 saved frame pointer if frame_pointer_needed
2440 <- HARD_FRAME_POINTER
2441 [saved regs]
2442 <- regs_save_offset
2443 [padding0]
2444
2445 [saved SSE regs]
2446 <- sse_regs_save_offset
2447 [padding1] |
2448 | <- FRAME_POINTER
2449 [va_arg registers] |
2450 |
2451 [frame] |
2452 |
2453 [padding2] | = to_allocate
2454 <- STACK_POINTER
2455 */
2456 struct ix86_frame
2457 {
2458 int nsseregs;
2459 int nregs;
2460 int va_arg_size;
2461 int red_zone_size;
2462 int outgoing_arguments_size;
2463
2464 /* The offsets relative to ARG_POINTER. */
2465 HOST_WIDE_INT frame_pointer_offset;
2466 HOST_WIDE_INT hard_frame_pointer_offset;
2467 HOST_WIDE_INT stack_pointer_offset;
2468 HOST_WIDE_INT hfp_save_offset;
2469 HOST_WIDE_INT reg_save_offset;
2470 HOST_WIDE_INT sse_reg_save_offset;
2471
2472 /* When save_regs_using_mov is set, emit prologue using
2473 move instead of push instructions. */
2474 bool save_regs_using_mov;
2475 };
2476
2477 /* Which cpu are we scheduling for. */
2478 enum attr_cpu ix86_schedule;
2479
2480 /* Which cpu are we optimizing for. */
2481 enum processor_type ix86_tune;
2482
2483 /* Which instruction set architecture to use. */
2484 enum processor_type ix86_arch;
2485
2486 /* True if processor has SSE prefetch instruction. */
2487 unsigned char x86_prefetch_sse;
2488
2489 /* -mstackrealign option */
2490 static const char ix86_force_align_arg_pointer_string[]
2491 = "force_align_arg_pointer";
2492
2493 static rtx (*ix86_gen_leave) (void);
2494 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2495 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2496 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2497 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2498 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2499 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2501 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2502 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2503 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2505 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2506
2507 /* Preferred alignment for stack boundary in bits. */
2508 unsigned int ix86_preferred_stack_boundary;
2509
2510 /* Alignment for incoming stack boundary in bits specified at
2511 command line. */
2512 static unsigned int ix86_user_incoming_stack_boundary;
2513
2514 /* Default alignment for incoming stack boundary in bits. */
2515 static unsigned int ix86_default_incoming_stack_boundary;
2516
2517 /* Alignment for incoming stack boundary in bits. */
2518 unsigned int ix86_incoming_stack_boundary;
2519
2520 /* Calling abi specific va_list type nodes. */
2521 static GTY(()) tree sysv_va_list_type_node;
2522 static GTY(()) tree ms_va_list_type_node;
2523
2524 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2525 char internal_label_prefix[16];
2526 int internal_label_prefix_len;
2527
2528 /* Fence to use after loop using movnt. */
2529 tree x86_mfence;
2530
2531 /* Register class used for passing given 64bit part of the argument.
2532 These represent classes as documented by the PS ABI, with the exception
2533 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2534 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2535
2536 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2537 whenever possible (upper half does contain padding). */
2538 enum x86_64_reg_class
2539 {
2540 X86_64_NO_CLASS,
2541 X86_64_INTEGER_CLASS,
2542 X86_64_INTEGERSI_CLASS,
2543 X86_64_SSE_CLASS,
2544 X86_64_SSESF_CLASS,
2545 X86_64_SSEDF_CLASS,
2546 X86_64_SSEUP_CLASS,
2547 X86_64_X87_CLASS,
2548 X86_64_X87UP_CLASS,
2549 X86_64_COMPLEX_X87_CLASS,
2550 X86_64_MEMORY_CLASS
2551 };
2552
2553 #define MAX_CLASSES 8
2554
2555 /* Table of constants used by fldpi, fldln2, etc.... */
2556 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2557 static bool ext_80387_constants_init = 0;
2558
2559 \f
2560 static struct machine_function * ix86_init_machine_status (void);
2561 static rtx ix86_function_value (const_tree, const_tree, bool);
2562 static bool ix86_function_value_regno_p (const unsigned int);
2563 static unsigned int ix86_function_arg_boundary (machine_mode,
2564 const_tree);
2565 static rtx ix86_static_chain (const_tree, bool);
2566 static int ix86_function_regparm (const_tree, const_tree);
2567 static void ix86_compute_frame_layout (struct ix86_frame *);
2568 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2569 rtx, rtx, int);
2570 static void ix86_add_new_builtins (HOST_WIDE_INT);
2571 static tree ix86_canonical_va_list_type (tree);
2572 static void predict_jump (int);
2573 static unsigned int split_stack_prologue_scratch_regno (void);
2574 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2575
2576 enum ix86_function_specific_strings
2577 {
2578 IX86_FUNCTION_SPECIFIC_ARCH,
2579 IX86_FUNCTION_SPECIFIC_TUNE,
2580 IX86_FUNCTION_SPECIFIC_MAX
2581 };
2582
2583 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2584 const char *, enum fpmath_unit, bool);
2585 static void ix86_function_specific_save (struct cl_target_option *,
2586 struct gcc_options *opts);
2587 static void ix86_function_specific_restore (struct gcc_options *opts,
2588 struct cl_target_option *);
2589 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2590 static void ix86_function_specific_print (FILE *, int,
2591 struct cl_target_option *);
2592 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2593 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2594 struct gcc_options *,
2595 struct gcc_options *,
2596 struct gcc_options *);
2597 static bool ix86_can_inline_p (tree, tree);
2598 static void ix86_set_current_function (tree);
2599 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2600
2601 static enum calling_abi ix86_function_abi (const_tree);
2602
2603 \f
2604 #ifndef SUBTARGET32_DEFAULT_CPU
2605 #define SUBTARGET32_DEFAULT_CPU "i386"
2606 #endif
2607
2608 /* Whether -mtune= or -march= were specified */
2609 static int ix86_tune_defaulted;
2610 static int ix86_arch_specified;
2611
2612 /* Vectorization library interface and handlers. */
2613 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2614
2615 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2616 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2617
2618 /* Processor target table, indexed by processor number */
2619 struct ptt
2620 {
2621 const char *const name; /* processor name */
2622 const struct processor_costs *cost; /* Processor costs */
2623 const int align_loop; /* Default alignments. */
2624 const int align_loop_max_skip;
2625 const int align_jump;
2626 const int align_jump_max_skip;
2627 const int align_func;
2628 };
2629
2630 /* This table must be in sync with enum processor_type in i386.h. */
2631 static const struct ptt processor_target_table[PROCESSOR_max] =
2632 {
2633 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2634 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2635 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2636 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2637 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2638 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2639 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2640 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2641 {"core2", &core_cost, 16, 10, 16, 10, 16},
2642 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2643 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2644 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2645 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2646 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2647 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2648 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2649 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2650 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2651 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2652 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2653 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2654 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2655 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2656 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2657 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2658 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2659 {"znver1", &znver1_cost, 16, 10, 16, 7, 11},
2660 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2661 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2662 };
2663 \f
2664 static unsigned int
2665 rest_of_handle_insert_vzeroupper (void)
2666 {
2667 int i;
2668
2669 /* vzeroupper instructions are inserted immediately after reload to
2670 account for possible spills from 256bit registers. The pass
2671 reuses mode switching infrastructure by re-running mode insertion
2672 pass, so disable entities that have already been processed. */
2673 for (i = 0; i < MAX_386_ENTITIES; i++)
2674 ix86_optimize_mode_switching[i] = 0;
2675
2676 ix86_optimize_mode_switching[AVX_U128] = 1;
2677
2678 /* Call optimize_mode_switching. */
2679 g->get_passes ()->execute_pass_mode_switching ();
2680 return 0;
2681 }
2682
2683 /* Return 1 if INSN uses or defines a hard register.
2684 Hard register uses in a memory address are ignored.
2685 Clobbers and flags definitions are ignored. */
2686
2687 static bool
2688 has_non_address_hard_reg (rtx_insn *insn)
2689 {
2690 df_ref ref;
2691 FOR_EACH_INSN_DEF (ref, insn)
2692 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2693 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2694 && DF_REF_REGNO (ref) != FLAGS_REG)
2695 return true;
2696
2697 FOR_EACH_INSN_USE (ref, insn)
2698 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2699 return true;
2700
2701 return false;
2702 }
2703
2704 /* Check if comparison INSN may be transformed
2705 into vector comparison. Currently we transform
2706 zero checks only which look like:
2707
2708 (set (reg:CCZ 17 flags)
2709 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2710 (subreg:SI (reg:DI x) 0))
2711 (const_int 0 [0]))) */
2712
2713 static bool
2714 convertible_comparison_p (rtx_insn *insn)
2715 {
2716 if (!TARGET_SSE4_1)
2717 return false;
2718
2719 rtx def_set = single_set (insn);
2720
2721 gcc_assert (def_set);
2722
2723 rtx src = SET_SRC (def_set);
2724 rtx dst = SET_DEST (def_set);
2725
2726 gcc_assert (GET_CODE (src) == COMPARE);
2727
2728 if (GET_CODE (dst) != REG
2729 || REGNO (dst) != FLAGS_REG
2730 || GET_MODE (dst) != CCZmode)
2731 return false;
2732
2733 rtx op1 = XEXP (src, 0);
2734 rtx op2 = XEXP (src, 1);
2735
2736 if (op2 != CONST0_RTX (GET_MODE (op2)))
2737 return false;
2738
2739 if (GET_CODE (op1) != IOR)
2740 return false;
2741
2742 op2 = XEXP (op1, 1);
2743 op1 = XEXP (op1, 0);
2744
2745 if (!SUBREG_P (op1)
2746 || !SUBREG_P (op2)
2747 || GET_MODE (op1) != SImode
2748 || GET_MODE (op2) != SImode
2749 || ((SUBREG_BYTE (op1) != 0
2750 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2751 && (SUBREG_BYTE (op2) != 0
2752 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2753 return false;
2754
2755 op1 = SUBREG_REG (op1);
2756 op2 = SUBREG_REG (op2);
2757
2758 if (op1 != op2
2759 || !REG_P (op1)
2760 || GET_MODE (op1) != DImode)
2761 return false;
2762
2763 return true;
2764 }
2765
2766 /* Return 1 if INSN may be converted into vector
2767 instruction. */
2768
2769 static bool
2770 scalar_to_vector_candidate_p (rtx_insn *insn)
2771 {
2772 rtx def_set = single_set (insn);
2773
2774 if (!def_set)
2775 return false;
2776
2777 if (has_non_address_hard_reg (insn))
2778 return false;
2779
2780 rtx src = SET_SRC (def_set);
2781 rtx dst = SET_DEST (def_set);
2782
2783 if (GET_CODE (src) == COMPARE)
2784 return convertible_comparison_p (insn);
2785
2786 /* We are interested in DImode promotion only. */
2787 if (GET_MODE (src) != DImode
2788 || GET_MODE (dst) != DImode)
2789 return false;
2790
2791 if (!REG_P (dst) && !MEM_P (dst))
2792 return false;
2793
2794 switch (GET_CODE (src))
2795 {
2796 case PLUS:
2797 case MINUS:
2798 case IOR:
2799 case XOR:
2800 case AND:
2801 break;
2802
2803 case REG:
2804 return true;
2805
2806 case MEM:
2807 return REG_P (dst);
2808
2809 default:
2810 return false;
2811 }
2812
2813 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
2814 return false;
2815
2816 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2817 return false;
2818
2819 if (GET_MODE (XEXP (src, 0)) != DImode
2820 || GET_MODE (XEXP (src, 1)) != DImode)
2821 return false;
2822
2823 return true;
2824 }
2825
2826 /* For a given bitmap of insn UIDs scans all instruction and
2827 remove insn from CANDIDATES in case it has both convertible
2828 and not convertible definitions.
2829
2830 All insns in a bitmap are conversion candidates according to
2831 scalar_to_vector_candidate_p. Currently it implies all insns
2832 are single_set. */
2833
2834 static void
2835 remove_non_convertible_regs (bitmap candidates)
2836 {
2837 bitmap_iterator bi;
2838 unsigned id;
2839 bitmap regs = BITMAP_ALLOC (NULL);
2840
2841 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2842 {
2843 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2844 rtx reg = SET_DEST (def_set);
2845
2846 if (!REG_P (reg)
2847 || bitmap_bit_p (regs, REGNO (reg))
2848 || HARD_REGISTER_P (reg))
2849 continue;
2850
2851 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2852 def;
2853 def = DF_REF_NEXT_REG (def))
2854 {
2855 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2856 {
2857 if (dump_file)
2858 fprintf (dump_file,
2859 "r%d has non convertible definition in insn %d\n",
2860 REGNO (reg), DF_REF_INSN_UID (def));
2861
2862 bitmap_set_bit (regs, REGNO (reg));
2863 break;
2864 }
2865 }
2866 }
2867
2868 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2869 {
2870 for (df_ref def = DF_REG_DEF_CHAIN (id);
2871 def;
2872 def = DF_REF_NEXT_REG (def))
2873 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2874 {
2875 if (dump_file)
2876 fprintf (dump_file, "Removing insn %d from candidates list\n",
2877 DF_REF_INSN_UID (def));
2878
2879 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2880 }
2881 }
2882
2883 BITMAP_FREE (regs);
2884 }
2885
2886 class scalar_chain
2887 {
2888 public:
2889 scalar_chain ();
2890 ~scalar_chain ();
2891
2892 static unsigned max_id;
2893
2894 /* ID of a chain. */
2895 unsigned int chain_id;
2896 /* A queue of instructions to be included into a chain. */
2897 bitmap queue;
2898 /* Instructions included into a chain. */
2899 bitmap insns;
2900 /* All registers defined by a chain. */
2901 bitmap defs;
2902 /* Registers used in both vector and sclar modes. */
2903 bitmap defs_conv;
2904
2905 void build (bitmap candidates, unsigned insn_uid);
2906 int compute_convert_gain ();
2907 int convert ();
2908
2909 private:
2910 void add_insn (bitmap candidates, unsigned insn_uid);
2911 void add_to_queue (unsigned insn_uid);
2912 void mark_dual_mode_def (df_ref def);
2913 void analyze_register_chain (bitmap candidates, df_ref ref);
2914 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2915 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2916 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2917 void convert_insn (rtx_insn *insn);
2918 void convert_op (rtx *op, rtx_insn *insn);
2919 void convert_reg (unsigned regno);
2920 void make_vector_copies (unsigned regno);
2921 };
2922
2923 unsigned scalar_chain::max_id = 0;
2924
2925 /* Initialize new chain. */
2926
2927 scalar_chain::scalar_chain ()
2928 {
2929 chain_id = ++max_id;
2930
2931 if (dump_file)
2932 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2933
2934 bitmap_obstack_initialize (NULL);
2935 insns = BITMAP_ALLOC (NULL);
2936 defs = BITMAP_ALLOC (NULL);
2937 defs_conv = BITMAP_ALLOC (NULL);
2938 queue = NULL;
2939 }
2940
2941 /* Free chain's data. */
2942
2943 scalar_chain::~scalar_chain ()
2944 {
2945 BITMAP_FREE (insns);
2946 BITMAP_FREE (defs);
2947 BITMAP_FREE (defs_conv);
2948 bitmap_obstack_release (NULL);
2949 }
2950
2951 /* Add instruction into chains' queue. */
2952
2953 void
2954 scalar_chain::add_to_queue (unsigned insn_uid)
2955 {
2956 if (bitmap_bit_p (insns, insn_uid)
2957 || bitmap_bit_p (queue, insn_uid))
2958 return;
2959
2960 if (dump_file)
2961 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2962 insn_uid, chain_id);
2963 bitmap_set_bit (queue, insn_uid);
2964 }
2965
2966 /* Mark register defined by DEF as requiring conversion. */
2967
2968 void
2969 scalar_chain::mark_dual_mode_def (df_ref def)
2970 {
2971 gcc_assert (DF_REF_REG_DEF_P (def));
2972
2973 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2974 return;
2975
2976 if (dump_file)
2977 fprintf (dump_file,
2978 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2979 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2980
2981 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2982 }
2983
2984 /* Check REF's chain to add new insns into a queue
2985 and find registers requiring conversion. */
2986
2987 void
2988 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
2989 {
2990 df_link *chain;
2991
2992 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
2993 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
2994 add_to_queue (DF_REF_INSN_UID (ref));
2995
2996 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
2997 {
2998 unsigned uid = DF_REF_INSN_UID (chain->ref);
2999
3000 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3001 continue;
3002
3003 if (!DF_REF_REG_MEM_P (chain->ref))
3004 {
3005 if (bitmap_bit_p (insns, uid))
3006 continue;
3007
3008 if (bitmap_bit_p (candidates, uid))
3009 {
3010 add_to_queue (uid);
3011 continue;
3012 }
3013 }
3014
3015 if (DF_REF_REG_DEF_P (chain->ref))
3016 {
3017 if (dump_file)
3018 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3019 DF_REF_REGNO (chain->ref), uid);
3020 mark_dual_mode_def (chain->ref);
3021 }
3022 else
3023 {
3024 if (dump_file)
3025 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3026 DF_REF_REGNO (chain->ref), uid);
3027 mark_dual_mode_def (ref);
3028 }
3029 }
3030 }
3031
3032 /* Add instruction into a chain. */
3033
3034 void
3035 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3036 {
3037 if (bitmap_bit_p (insns, insn_uid))
3038 return;
3039
3040 if (dump_file)
3041 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3042
3043 bitmap_set_bit (insns, insn_uid);
3044
3045 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3046 rtx def_set = single_set (insn);
3047 if (def_set && REG_P (SET_DEST (def_set))
3048 && !HARD_REGISTER_P (SET_DEST (def_set)))
3049 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3050
3051 df_ref ref;
3052 df_ref def;
3053 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3054 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3055 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3056 def;
3057 def = DF_REF_NEXT_REG (def))
3058 analyze_register_chain (candidates, def);
3059 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3060 if (!DF_REF_REG_MEM_P (ref))
3061 analyze_register_chain (candidates, ref);
3062 }
3063
3064 /* Build new chain starting from insn INSN_UID recursively
3065 adding all dependent uses and definitions. */
3066
3067 void
3068 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3069 {
3070 queue = BITMAP_ALLOC (NULL);
3071 bitmap_set_bit (queue, insn_uid);
3072
3073 if (dump_file)
3074 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3075
3076 while (!bitmap_empty_p (queue))
3077 {
3078 insn_uid = bitmap_first_set_bit (queue);
3079 bitmap_clear_bit (queue, insn_uid);
3080 bitmap_clear_bit (candidates, insn_uid);
3081 add_insn (candidates, insn_uid);
3082 }
3083
3084 if (dump_file)
3085 {
3086 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3087 fprintf (dump_file, " insns: ");
3088 dump_bitmap (dump_file, insns);
3089 if (!bitmap_empty_p (defs_conv))
3090 {
3091 bitmap_iterator bi;
3092 unsigned id;
3093 const char *comma = "";
3094 fprintf (dump_file, " defs to convert: ");
3095 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3096 {
3097 fprintf (dump_file, "%sr%d", comma, id);
3098 comma = ", ";
3099 }
3100 fprintf (dump_file, "\n");
3101 }
3102 }
3103
3104 BITMAP_FREE (queue);
3105 }
3106
3107 /* Compute a gain for chain conversion. */
3108
3109 int
3110 scalar_chain::compute_convert_gain ()
3111 {
3112 bitmap_iterator bi;
3113 unsigned insn_uid;
3114 int gain = 0;
3115 int cost = 0;
3116
3117 if (dump_file)
3118 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3119
3120 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3121 {
3122 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3123 rtx def_set = single_set (insn);
3124 rtx src = SET_SRC (def_set);
3125 rtx dst = SET_DEST (def_set);
3126
3127 if (REG_P (src) && REG_P (dst))
3128 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3129 else if (REG_P (src) && MEM_P (dst))
3130 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3131 else if (MEM_P (src) && REG_P (dst))
3132 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3133 else if (GET_CODE (src) == PLUS
3134 || GET_CODE (src) == MINUS
3135 || GET_CODE (src) == IOR
3136 || GET_CODE (src) == XOR
3137 || GET_CODE (src) == AND)
3138 gain += ix86_cost->add;
3139 else if (GET_CODE (src) == COMPARE)
3140 {
3141 /* Assume comparison cost is the same. */
3142 }
3143 else
3144 gcc_unreachable ();
3145 }
3146
3147 if (dump_file)
3148 fprintf (dump_file, " Instruction convertion gain: %d\n", gain);
3149
3150 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3151 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3152
3153 if (dump_file)
3154 fprintf (dump_file, " Registers convertion cost: %d\n", cost);
3155
3156 gain -= cost;
3157
3158 if (dump_file)
3159 fprintf (dump_file, " Total gain: %d\n", gain);
3160
3161 return gain;
3162 }
3163
3164 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3165
3166 rtx
3167 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3168 {
3169 if (x == reg)
3170 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3171
3172 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3173 int i, j;
3174 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3175 {
3176 if (fmt[i] == 'e')
3177 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3178 else if (fmt[i] == 'E')
3179 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3180 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3181 reg, new_reg);
3182 }
3183
3184 return x;
3185 }
3186
3187 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3188
3189 void
3190 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3191 {
3192 replace_with_subreg (single_set (insn), reg, new_reg);
3193 }
3194
3195 /* Insert generated conversion instruction sequence INSNS
3196 after instruction AFTER. New BB may be required in case
3197 instruction has EH region attached. */
3198
3199 void
3200 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3201 {
3202 if (!control_flow_insn_p (after))
3203 {
3204 emit_insn_after (insns, after);
3205 return;
3206 }
3207
3208 basic_block bb = BLOCK_FOR_INSN (after);
3209 edge e = find_fallthru_edge (bb->succs);
3210 gcc_assert (e);
3211
3212 basic_block new_bb = split_edge (e);
3213 emit_insn_after (insns, BB_HEAD (new_bb));
3214 }
3215
3216 /* Make vector copies for all register REGNO definitions
3217 and replace its uses in a chain. */
3218
3219 void
3220 scalar_chain::make_vector_copies (unsigned regno)
3221 {
3222 rtx reg = regno_reg_rtx[regno];
3223 rtx vreg = gen_reg_rtx (DImode);
3224 df_ref ref;
3225
3226 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3227 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3228 {
3229 rtx_insn *insn = DF_REF_INSN (ref);
3230
3231 start_sequence ();
3232 if (TARGET_SSE4_1)
3233 {
3234 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3235 CONST0_RTX (V4SImode),
3236 gen_rtx_SUBREG (SImode, reg, 0)));
3237 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3238 gen_rtx_SUBREG (V4SImode, vreg, 0),
3239 gen_rtx_SUBREG (SImode, reg, 4),
3240 GEN_INT (2)));
3241 }
3242 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3243 {
3244 rtx tmp = gen_reg_rtx (DImode);
3245 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3246 CONST0_RTX (V4SImode),
3247 gen_rtx_SUBREG (SImode, reg, 0)));
3248 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3249 CONST0_RTX (V4SImode),
3250 gen_rtx_SUBREG (SImode, reg, 4)));
3251 emit_insn (gen_vec_interleave_lowv4si
3252 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3253 gen_rtx_SUBREG (V4SImode, vreg, 0),
3254 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3255 }
3256 else
3257 {
3258 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3259 emit_move_insn (adjust_address (tmp, SImode, 0),
3260 gen_rtx_SUBREG (SImode, reg, 0));
3261 emit_move_insn (adjust_address (tmp, SImode, 4),
3262 gen_rtx_SUBREG (SImode, reg, 4));
3263 emit_move_insn (vreg, tmp);
3264 }
3265 emit_conversion_insns (get_insns (), insn);
3266 end_sequence ();
3267
3268 if (dump_file)
3269 fprintf (dump_file,
3270 " Copied r%d to a vector register r%d for insn %d\n",
3271 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3272 }
3273
3274 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3275 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3276 {
3277 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3278
3279 if (dump_file)
3280 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3281 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3282 }
3283 }
3284
3285 /* Convert all definitions of register REGNO
3286 and fix its uses. Scalar copies may be created
3287 in case register is used in not convertible insn. */
3288
3289 void
3290 scalar_chain::convert_reg (unsigned regno)
3291 {
3292 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3293 rtx reg = regno_reg_rtx[regno];
3294 rtx scopy = NULL_RTX;
3295 df_ref ref;
3296 bitmap conv;
3297
3298 conv = BITMAP_ALLOC (NULL);
3299 bitmap_copy (conv, insns);
3300
3301 if (scalar_copy)
3302 scopy = gen_reg_rtx (DImode);
3303
3304 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3305 {
3306 rtx_insn *insn = DF_REF_INSN (ref);
3307 rtx def_set = single_set (insn);
3308 rtx src = SET_SRC (def_set);
3309 rtx reg = DF_REF_REG (ref);
3310
3311 if (!MEM_P (src))
3312 {
3313 replace_with_subreg_in_insn (insn, reg, reg);
3314 bitmap_clear_bit (conv, INSN_UID (insn));
3315 }
3316
3317 if (scalar_copy)
3318 {
3319 rtx vcopy = gen_reg_rtx (V2DImode);
3320
3321 start_sequence ();
3322 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3323 {
3324 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3325 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3326 gen_rtx_SUBREG (SImode, vcopy, 0));
3327 emit_move_insn (vcopy,
3328 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3329 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3330 gen_rtx_SUBREG (SImode, vcopy, 0));
3331 }
3332 else
3333 {
3334 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3335 emit_move_insn (tmp, reg);
3336 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3337 adjust_address (tmp, SImode, 0));
3338 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3339 adjust_address (tmp, SImode, 4));
3340 }
3341 emit_conversion_insns (get_insns (), insn);
3342 end_sequence ();
3343
3344 if (dump_file)
3345 fprintf (dump_file,
3346 " Copied r%d to a scalar register r%d for insn %d\n",
3347 regno, REGNO (scopy), INSN_UID (insn));
3348 }
3349 }
3350
3351 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3352 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3353 {
3354 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3355 {
3356 rtx def_set = single_set (DF_REF_INSN (ref));
3357 if (!MEM_P (SET_DEST (def_set))
3358 || !REG_P (SET_SRC (def_set)))
3359 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3360 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3361 }
3362 }
3363 else if (NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3364 {
3365 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3366 df_insn_rescan (DF_REF_INSN (ref));
3367 }
3368
3369 BITMAP_FREE (conv);
3370 }
3371
3372 /* Convert operand OP in INSN. All register uses
3373 are converted during registers conversion.
3374 Therefore we should just handle memory operands. */
3375
3376 void
3377 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3378 {
3379 *op = copy_rtx_if_shared (*op);
3380
3381 if (MEM_P (*op))
3382 {
3383 rtx tmp = gen_reg_rtx (DImode);
3384
3385 emit_insn_before (gen_move_insn (tmp, *op), insn);
3386 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3387
3388 if (dump_file)
3389 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3390 INSN_UID (insn), REGNO (tmp));
3391 }
3392 else
3393 {
3394 gcc_assert (SUBREG_P (*op));
3395 gcc_assert (GET_MODE (*op) == V2DImode);
3396 }
3397 }
3398
3399 /* Convert INSN to vector mode. */
3400
3401 void
3402 scalar_chain::convert_insn (rtx_insn *insn)
3403 {
3404 rtx def_set = single_set (insn);
3405 rtx src = SET_SRC (def_set);
3406 rtx dst = SET_DEST (def_set);
3407 rtx subreg;
3408
3409 if (MEM_P (dst) && !REG_P (src))
3410 {
3411 /* There are no scalar integer instructions and therefore
3412 temporary register usage is required. */
3413 rtx tmp = gen_reg_rtx (DImode);
3414 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3415 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3416 }
3417
3418 switch (GET_CODE (src))
3419 {
3420 case PLUS:
3421 case MINUS:
3422 case IOR:
3423 case XOR:
3424 case AND:
3425 convert_op (&XEXP (src, 0), insn);
3426 convert_op (&XEXP (src, 1), insn);
3427 PUT_MODE (src, V2DImode);
3428 break;
3429
3430 case MEM:
3431 if (!REG_P (dst))
3432 convert_op (&src, insn);
3433 break;
3434
3435 case REG:
3436 break;
3437
3438 case SUBREG:
3439 gcc_assert (GET_MODE (src) == V2DImode);
3440 break;
3441
3442 case COMPARE:
3443 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3444
3445 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3446 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3447
3448 if (REG_P (src))
3449 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3450 else
3451 subreg = copy_rtx_if_shared (src);
3452 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3453 copy_rtx_if_shared (subreg),
3454 copy_rtx_if_shared (subreg)),
3455 insn);
3456 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3457 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3458 copy_rtx_if_shared (src)),
3459 UNSPEC_PTEST);
3460 break;
3461
3462 default:
3463 gcc_unreachable ();
3464 }
3465
3466 SET_SRC (def_set) = src;
3467 SET_DEST (def_set) = dst;
3468
3469 /* Drop possible dead definitions. */
3470 PATTERN (insn) = def_set;
3471
3472 INSN_CODE (insn) = -1;
3473 recog_memoized (insn);
3474 df_insn_rescan (insn);
3475 }
3476
3477 /* Convert whole chain creating required register
3478 conversions and copies. */
3479
3480 int
3481 scalar_chain::convert ()
3482 {
3483 bitmap_iterator bi;
3484 unsigned id;
3485 int converted_insns = 0;
3486
3487 if (!dbg_cnt (stv_conversion))
3488 return 0;
3489
3490 if (dump_file)
3491 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3492
3493 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3494 convert_reg (id);
3495
3496 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3497 make_vector_copies (id);
3498
3499 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3500 {
3501 convert_insn (DF_INSN_UID_GET (id)->insn);
3502 converted_insns++;
3503 }
3504
3505 return converted_insns;
3506 }
3507
3508 /* Main STV pass function. Find and convert scalar
3509 instructions into vector mode when profitable. */
3510
3511 static unsigned int
3512 convert_scalars_to_vector ()
3513 {
3514 basic_block bb;
3515 bitmap candidates;
3516 int converted_insns = 0;
3517
3518 bitmap_obstack_initialize (NULL);
3519 candidates = BITMAP_ALLOC (NULL);
3520
3521 calculate_dominance_info (CDI_DOMINATORS);
3522 df_set_flags (DF_DEFER_INSN_RESCAN);
3523 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3524 df_md_add_problem ();
3525 df_analyze ();
3526
3527 /* Find all instructions we want to convert into vector mode. */
3528 if (dump_file)
3529 fprintf (dump_file, "Searching for mode convertion candidates...\n");
3530
3531 FOR_EACH_BB_FN (bb, cfun)
3532 {
3533 rtx_insn *insn;
3534 FOR_BB_INSNS (bb, insn)
3535 if (scalar_to_vector_candidate_p (insn))
3536 {
3537 if (dump_file)
3538 fprintf (dump_file, " insn %d is marked as a candidate\n",
3539 INSN_UID (insn));
3540
3541 bitmap_set_bit (candidates, INSN_UID (insn));
3542 }
3543 }
3544
3545 remove_non_convertible_regs (candidates);
3546
3547 if (bitmap_empty_p (candidates))
3548 if (dump_file)
3549 fprintf (dump_file, "There are no candidates for optimization.\n");
3550
3551 while (!bitmap_empty_p (candidates))
3552 {
3553 unsigned uid = bitmap_first_set_bit (candidates);
3554 scalar_chain chain;
3555
3556 /* Find instructions chain we want to convert to vector mode.
3557 Check all uses and definitions to estimate all required
3558 conversions. */
3559 chain.build (candidates, uid);
3560
3561 if (chain.compute_convert_gain () > 0)
3562 converted_insns += chain.convert ();
3563 else
3564 if (dump_file)
3565 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3566 chain.chain_id);
3567 }
3568
3569 if (dump_file)
3570 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3571
3572 BITMAP_FREE (candidates);
3573 bitmap_obstack_release (NULL);
3574 df_process_deferred_rescans ();
3575
3576 /* Conversion means we may have 128bit register spills/fills
3577 which require aligned stack. */
3578 if (converted_insns)
3579 {
3580 if (crtl->stack_alignment_needed < 128)
3581 crtl->stack_alignment_needed = 128;
3582 if (crtl->stack_alignment_estimated < 128)
3583 crtl->stack_alignment_estimated = 128;
3584 }
3585
3586 return 0;
3587 }
3588
3589 namespace {
3590
3591 const pass_data pass_data_insert_vzeroupper =
3592 {
3593 RTL_PASS, /* type */
3594 "vzeroupper", /* name */
3595 OPTGROUP_NONE, /* optinfo_flags */
3596 TV_NONE, /* tv_id */
3597 0, /* properties_required */
3598 0, /* properties_provided */
3599 0, /* properties_destroyed */
3600 0, /* todo_flags_start */
3601 TODO_df_finish, /* todo_flags_finish */
3602 };
3603
3604 class pass_insert_vzeroupper : public rtl_opt_pass
3605 {
3606 public:
3607 pass_insert_vzeroupper(gcc::context *ctxt)
3608 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3609 {}
3610
3611 /* opt_pass methods: */
3612 virtual bool gate (function *)
3613 {
3614 return TARGET_AVX && !TARGET_AVX512F
3615 && TARGET_VZEROUPPER && flag_expensive_optimizations
3616 && !optimize_size;
3617 }
3618
3619 virtual unsigned int execute (function *)
3620 {
3621 return rest_of_handle_insert_vzeroupper ();
3622 }
3623
3624 }; // class pass_insert_vzeroupper
3625
3626 const pass_data pass_data_stv =
3627 {
3628 RTL_PASS, /* type */
3629 "stv", /* name */
3630 OPTGROUP_NONE, /* optinfo_flags */
3631 TV_NONE, /* tv_id */
3632 0, /* properties_required */
3633 0, /* properties_provided */
3634 0, /* properties_destroyed */
3635 0, /* todo_flags_start */
3636 TODO_df_finish, /* todo_flags_finish */
3637 };
3638
3639 class pass_stv : public rtl_opt_pass
3640 {
3641 public:
3642 pass_stv (gcc::context *ctxt)
3643 : rtl_opt_pass (pass_data_stv, ctxt)
3644 {}
3645
3646 /* opt_pass methods: */
3647 virtual bool gate (function *)
3648 {
3649 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3650 }
3651
3652 virtual unsigned int execute (function *)
3653 {
3654 return convert_scalars_to_vector ();
3655 }
3656
3657 }; // class pass_stv
3658
3659 } // anon namespace
3660
3661 rtl_opt_pass *
3662 make_pass_insert_vzeroupper (gcc::context *ctxt)
3663 {
3664 return new pass_insert_vzeroupper (ctxt);
3665 }
3666
3667 rtl_opt_pass *
3668 make_pass_stv (gcc::context *ctxt)
3669 {
3670 return new pass_stv (ctxt);
3671 }
3672
3673 /* Return true if a red-zone is in use. */
3674
3675 static inline bool
3676 ix86_using_red_zone (void)
3677 {
3678 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3679 }
3680 \f
3681 /* Return a string that documents the current -m options. The caller is
3682 responsible for freeing the string. */
3683
3684 static char *
3685 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3686 const char *tune, enum fpmath_unit fpmath,
3687 bool add_nl_p)
3688 {
3689 struct ix86_target_opts
3690 {
3691 const char *option; /* option string */
3692 HOST_WIDE_INT mask; /* isa mask options */
3693 };
3694
3695 /* This table is ordered so that options like -msse4.2 that imply
3696 preceding options while match those first. */
3697 static struct ix86_target_opts isa_opts[] =
3698 {
3699 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3700 { "-mfma", OPTION_MASK_ISA_FMA },
3701 { "-mxop", OPTION_MASK_ISA_XOP },
3702 { "-mlwp", OPTION_MASK_ISA_LWP },
3703 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3704 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3705 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3706 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3707 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3708 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3709 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3710 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3711 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3712 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3713 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3714 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3715 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3716 { "-msse3", OPTION_MASK_ISA_SSE3 },
3717 { "-msse2", OPTION_MASK_ISA_SSE2 },
3718 { "-msse", OPTION_MASK_ISA_SSE },
3719 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3720 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3721 { "-mmmx", OPTION_MASK_ISA_MMX },
3722 { "-mabm", OPTION_MASK_ISA_ABM },
3723 { "-mbmi", OPTION_MASK_ISA_BMI },
3724 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3725 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3726 { "-mhle", OPTION_MASK_ISA_HLE },
3727 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3728 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3729 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3730 { "-madx", OPTION_MASK_ISA_ADX },
3731 { "-mtbm", OPTION_MASK_ISA_TBM },
3732 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3733 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3734 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3735 { "-maes", OPTION_MASK_ISA_AES },
3736 { "-msha", OPTION_MASK_ISA_SHA },
3737 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3738 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3739 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3740 { "-mf16c", OPTION_MASK_ISA_F16C },
3741 { "-mrtm", OPTION_MASK_ISA_RTM },
3742 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3743 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3744 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3745 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3746 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3747 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3748 { "-mmpx", OPTION_MASK_ISA_MPX },
3749 { "-mclwb", OPTION_MASK_ISA_CLWB },
3750 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3751 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3752 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3753 };
3754
3755 /* Flag options. */
3756 static struct ix86_target_opts flag_opts[] =
3757 {
3758 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3759 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3760 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3761 { "-m80387", MASK_80387 },
3762 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3763 { "-malign-double", MASK_ALIGN_DOUBLE },
3764 { "-mcld", MASK_CLD },
3765 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3766 { "-mieee-fp", MASK_IEEE_FP },
3767 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3768 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3769 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3770 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3771 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3772 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3773 { "-mno-red-zone", MASK_NO_RED_ZONE },
3774 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3775 { "-mrecip", MASK_RECIP },
3776 { "-mrtd", MASK_RTD },
3777 { "-msseregparm", MASK_SSEREGPARM },
3778 { "-mstack-arg-probe", MASK_STACK_PROBE },
3779 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3780 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3781 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3782 { "-mvzeroupper", MASK_VZEROUPPER },
3783 { "-mstv", MASK_STV},
3784 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3785 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3786 { "-mprefer-avx128", MASK_PREFER_AVX128},
3787 };
3788
3789 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3790
3791 char isa_other[40];
3792 char target_other[40];
3793 unsigned num = 0;
3794 unsigned i, j;
3795 char *ret;
3796 char *ptr;
3797 size_t len;
3798 size_t line_len;
3799 size_t sep_len;
3800 const char *abi;
3801
3802 memset (opts, '\0', sizeof (opts));
3803
3804 /* Add -march= option. */
3805 if (arch)
3806 {
3807 opts[num][0] = "-march=";
3808 opts[num++][1] = arch;
3809 }
3810
3811 /* Add -mtune= option. */
3812 if (tune)
3813 {
3814 opts[num][0] = "-mtune=";
3815 opts[num++][1] = tune;
3816 }
3817
3818 /* Add -m32/-m64/-mx32. */
3819 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3820 {
3821 if ((isa & OPTION_MASK_ABI_64) != 0)
3822 abi = "-m64";
3823 else
3824 abi = "-mx32";
3825 isa &= ~ (OPTION_MASK_ISA_64BIT
3826 | OPTION_MASK_ABI_64
3827 | OPTION_MASK_ABI_X32);
3828 }
3829 else
3830 abi = "-m32";
3831 opts[num++][0] = abi;
3832
3833 /* Pick out the options in isa options. */
3834 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3835 {
3836 if ((isa & isa_opts[i].mask) != 0)
3837 {
3838 opts[num++][0] = isa_opts[i].option;
3839 isa &= ~ isa_opts[i].mask;
3840 }
3841 }
3842
3843 if (isa && add_nl_p)
3844 {
3845 opts[num++][0] = isa_other;
3846 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3847 isa);
3848 }
3849
3850 /* Add flag options. */
3851 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3852 {
3853 if ((flags & flag_opts[i].mask) != 0)
3854 {
3855 opts[num++][0] = flag_opts[i].option;
3856 flags &= ~ flag_opts[i].mask;
3857 }
3858 }
3859
3860 if (flags && add_nl_p)
3861 {
3862 opts[num++][0] = target_other;
3863 sprintf (target_other, "(other flags: %#x)", flags);
3864 }
3865
3866 /* Add -fpmath= option. */
3867 if (fpmath)
3868 {
3869 opts[num][0] = "-mfpmath=";
3870 switch ((int) fpmath)
3871 {
3872 case FPMATH_387:
3873 opts[num++][1] = "387";
3874 break;
3875
3876 case FPMATH_SSE:
3877 opts[num++][1] = "sse";
3878 break;
3879
3880 case FPMATH_387 | FPMATH_SSE:
3881 opts[num++][1] = "sse+387";
3882 break;
3883
3884 default:
3885 gcc_unreachable ();
3886 }
3887 }
3888
3889 /* Any options? */
3890 if (num == 0)
3891 return NULL;
3892
3893 gcc_assert (num < ARRAY_SIZE (opts));
3894
3895 /* Size the string. */
3896 len = 0;
3897 sep_len = (add_nl_p) ? 3 : 1;
3898 for (i = 0; i < num; i++)
3899 {
3900 len += sep_len;
3901 for (j = 0; j < 2; j++)
3902 if (opts[i][j])
3903 len += strlen (opts[i][j]);
3904 }
3905
3906 /* Build the string. */
3907 ret = ptr = (char *) xmalloc (len);
3908 line_len = 0;
3909
3910 for (i = 0; i < num; i++)
3911 {
3912 size_t len2[2];
3913
3914 for (j = 0; j < 2; j++)
3915 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3916
3917 if (i != 0)
3918 {
3919 *ptr++ = ' ';
3920 line_len++;
3921
3922 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3923 {
3924 *ptr++ = '\\';
3925 *ptr++ = '\n';
3926 line_len = 0;
3927 }
3928 }
3929
3930 for (j = 0; j < 2; j++)
3931 if (opts[i][j])
3932 {
3933 memcpy (ptr, opts[i][j], len2[j]);
3934 ptr += len2[j];
3935 line_len += len2[j];
3936 }
3937 }
3938
3939 *ptr = '\0';
3940 gcc_assert (ret + len >= ptr);
3941
3942 return ret;
3943 }
3944
3945 /* Return true, if profiling code should be emitted before
3946 prologue. Otherwise it returns false.
3947 Note: For x86 with "hotfix" it is sorried. */
3948 static bool
3949 ix86_profile_before_prologue (void)
3950 {
3951 return flag_fentry != 0;
3952 }
3953
3954 /* Function that is callable from the debugger to print the current
3955 options. */
3956 void ATTRIBUTE_UNUSED
3957 ix86_debug_options (void)
3958 {
3959 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3960 ix86_arch_string, ix86_tune_string,
3961 ix86_fpmath, true);
3962
3963 if (opts)
3964 {
3965 fprintf (stderr, "%s\n\n", opts);
3966 free (opts);
3967 }
3968 else
3969 fputs ("<no options>\n\n", stderr);
3970
3971 return;
3972 }
3973
3974 static const char *stringop_alg_names[] = {
3975 #define DEF_ENUM
3976 #define DEF_ALG(alg, name) #name,
3977 #include "stringop.def"
3978 #undef DEF_ENUM
3979 #undef DEF_ALG
3980 };
3981
3982 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3983 The string is of the following form (or comma separated list of it):
3984
3985 strategy_alg:max_size:[align|noalign]
3986
3987 where the full size range for the strategy is either [0, max_size] or
3988 [min_size, max_size], in which min_size is the max_size + 1 of the
3989 preceding range. The last size range must have max_size == -1.
3990
3991 Examples:
3992
3993 1.
3994 -mmemcpy-strategy=libcall:-1:noalign
3995
3996 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3997
3998
3999 2.
4000 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4001
4002 This is to tell the compiler to use the following strategy for memset
4003 1) when the expected size is between [1, 16], use rep_8byte strategy;
4004 2) when the size is between [17, 2048], use vector_loop;
4005 3) when the size is > 2048, use libcall. */
4006
4007 struct stringop_size_range
4008 {
4009 int max;
4010 stringop_alg alg;
4011 bool noalign;
4012 };
4013
4014 static void
4015 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4016 {
4017 const struct stringop_algs *default_algs;
4018 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4019 char *curr_range_str, *next_range_str;
4020 int i = 0, n = 0;
4021
4022 if (is_memset)
4023 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4024 else
4025 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4026
4027 curr_range_str = strategy_str;
4028
4029 do
4030 {
4031 int maxs;
4032 char alg_name[128];
4033 char align[16];
4034 next_range_str = strchr (curr_range_str, ',');
4035 if (next_range_str)
4036 *next_range_str++ = '\0';
4037
4038 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4039 alg_name, &maxs, align))
4040 {
4041 error ("wrong arg %s to option %s", curr_range_str,
4042 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4043 return;
4044 }
4045
4046 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4047 {
4048 error ("size ranges of option %s should be increasing",
4049 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4050 return;
4051 }
4052
4053 for (i = 0; i < last_alg; i++)
4054 if (!strcmp (alg_name, stringop_alg_names[i]))
4055 break;
4056
4057 if (i == last_alg)
4058 {
4059 error ("wrong stringop strategy name %s specified for option %s",
4060 alg_name,
4061 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4062 return;
4063 }
4064
4065 if ((stringop_alg) i == rep_prefix_8_byte
4066 && !TARGET_64BIT)
4067 {
4068 /* rep; movq isn't available in 32-bit code. */
4069 error ("stringop strategy name %s specified for option %s "
4070 "not supported for 32-bit code",
4071 alg_name,
4072 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4073 return;
4074 }
4075
4076 input_ranges[n].max = maxs;
4077 input_ranges[n].alg = (stringop_alg) i;
4078 if (!strcmp (align, "align"))
4079 input_ranges[n].noalign = false;
4080 else if (!strcmp (align, "noalign"))
4081 input_ranges[n].noalign = true;
4082 else
4083 {
4084 error ("unknown alignment %s specified for option %s",
4085 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4086 return;
4087 }
4088 n++;
4089 curr_range_str = next_range_str;
4090 }
4091 while (curr_range_str);
4092
4093 if (input_ranges[n - 1].max != -1)
4094 {
4095 error ("the max value for the last size range should be -1"
4096 " for option %s",
4097 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4098 return;
4099 }
4100
4101 if (n > MAX_STRINGOP_ALGS)
4102 {
4103 error ("too many size ranges specified in option %s",
4104 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4105 return;
4106 }
4107
4108 /* Now override the default algs array. */
4109 for (i = 0; i < n; i++)
4110 {
4111 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4112 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4113 = input_ranges[i].alg;
4114 *const_cast<int *>(&default_algs->size[i].noalign)
4115 = input_ranges[i].noalign;
4116 }
4117 }
4118
4119 \f
4120 /* parse -mtune-ctrl= option. When DUMP is true,
4121 print the features that are explicitly set. */
4122
4123 static void
4124 parse_mtune_ctrl_str (bool dump)
4125 {
4126 if (!ix86_tune_ctrl_string)
4127 return;
4128
4129 char *next_feature_string = NULL;
4130 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4131 char *orig = curr_feature_string;
4132 int i;
4133 do
4134 {
4135 bool clear = false;
4136
4137 next_feature_string = strchr (curr_feature_string, ',');
4138 if (next_feature_string)
4139 *next_feature_string++ = '\0';
4140 if (*curr_feature_string == '^')
4141 {
4142 curr_feature_string++;
4143 clear = true;
4144 }
4145 for (i = 0; i < X86_TUNE_LAST; i++)
4146 {
4147 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4148 {
4149 ix86_tune_features[i] = !clear;
4150 if (dump)
4151 fprintf (stderr, "Explicitly %s feature %s\n",
4152 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4153 break;
4154 }
4155 }
4156 if (i == X86_TUNE_LAST)
4157 error ("Unknown parameter to option -mtune-ctrl: %s",
4158 clear ? curr_feature_string - 1 : curr_feature_string);
4159 curr_feature_string = next_feature_string;
4160 }
4161 while (curr_feature_string);
4162 free (orig);
4163 }
4164
4165 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4166 processor type. */
4167
4168 static void
4169 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4170 {
4171 unsigned int ix86_tune_mask = 1u << ix86_tune;
4172 int i;
4173
4174 for (i = 0; i < X86_TUNE_LAST; ++i)
4175 {
4176 if (ix86_tune_no_default)
4177 ix86_tune_features[i] = 0;
4178 else
4179 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4180 }
4181
4182 if (dump)
4183 {
4184 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4185 for (i = 0; i < X86_TUNE_LAST; i++)
4186 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4187 ix86_tune_features[i] ? "on" : "off");
4188 }
4189
4190 parse_mtune_ctrl_str (dump);
4191 }
4192
4193
4194 /* Default align_* from the processor table. */
4195
4196 static void
4197 ix86_default_align (struct gcc_options *opts)
4198 {
4199 if (opts->x_align_loops == 0)
4200 {
4201 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4202 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4203 }
4204 if (opts->x_align_jumps == 0)
4205 {
4206 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4207 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4208 }
4209 if (opts->x_align_functions == 0)
4210 {
4211 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4212 }
4213 }
4214
4215 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4216
4217 static void
4218 ix86_override_options_after_change (void)
4219 {
4220 ix86_default_align (&global_options);
4221 }
4222
4223 /* Override various settings based on options. If MAIN_ARGS_P, the
4224 options are from the command line, otherwise they are from
4225 attributes. */
4226
4227 static void
4228 ix86_option_override_internal (bool main_args_p,
4229 struct gcc_options *opts,
4230 struct gcc_options *opts_set)
4231 {
4232 int i;
4233 unsigned int ix86_arch_mask;
4234 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4235 const char *prefix;
4236 const char *suffix;
4237 const char *sw;
4238
4239 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4240 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4241 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4242 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4243 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4244 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4245 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4246 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4247 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4248 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4249 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4250 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4251 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4252 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4253 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4254 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4255 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4256 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4257 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4258 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4259 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4260 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4261 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4262 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4263 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4264 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4265 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4266 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4267 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4268 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4269 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4270 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4271 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4272 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4273 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4274 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4275 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4276 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4277 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4278 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4279 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4280 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4281 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4282 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4283 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4284 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4285 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4286 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4287 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4288 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4289 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4290 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4291 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4292 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4293 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4294 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4295 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4296 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4297 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4298 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4299
4300 #define PTA_CORE2 \
4301 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4302 | PTA_CX16 | PTA_FXSR)
4303 #define PTA_NEHALEM \
4304 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4305 #define PTA_WESTMERE \
4306 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4307 #define PTA_SANDYBRIDGE \
4308 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4309 #define PTA_IVYBRIDGE \
4310 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4311 #define PTA_HASWELL \
4312 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4313 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4314 #define PTA_BROADWELL \
4315 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4316 #define PTA_SKYLAKE \
4317 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4318 #define PTA_SKYLAKE_AVX512 \
4319 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4320 | PTA_AVX512BW | PTA_AVX512DQ)
4321 #define PTA_KNL \
4322 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4323 #define PTA_BONNELL \
4324 (PTA_CORE2 | PTA_MOVBE)
4325 #define PTA_SILVERMONT \
4326 (PTA_WESTMERE | PTA_MOVBE)
4327
4328 /* if this reaches 64, need to widen struct pta flags below */
4329
4330 static struct pta
4331 {
4332 const char *const name; /* processor name or nickname. */
4333 const enum processor_type processor;
4334 const enum attr_cpu schedule;
4335 const unsigned HOST_WIDE_INT flags;
4336 }
4337 const processor_alias_table[] =
4338 {
4339 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4340 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4341 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4342 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4343 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4344 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4345 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4346 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4347 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4348 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4349 PTA_MMX | PTA_SSE | PTA_FXSR},
4350 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4351 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4352 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4353 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4354 PTA_MMX | PTA_SSE | PTA_FXSR},
4355 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4356 PTA_MMX | PTA_SSE | PTA_FXSR},
4357 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4358 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4359 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4360 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4361 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4362 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4363 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4364 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4365 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4366 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4367 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4368 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4369 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4370 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4371 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4372 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4373 PTA_SANDYBRIDGE},
4374 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4375 PTA_SANDYBRIDGE},
4376 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4377 PTA_IVYBRIDGE},
4378 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4379 PTA_IVYBRIDGE},
4380 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4381 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4382 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4383 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4384 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4385 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4386 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4387 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4388 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4389 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4390 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4391 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4392 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4393 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4394 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4395 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4396 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4397 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4398 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4399 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4400 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4401 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4402 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4403 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4404 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4405 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4406 {"x86-64", PROCESSOR_K8, CPU_K8,
4407 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4408 {"k8", PROCESSOR_K8, CPU_K8,
4409 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4410 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4411 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4412 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4413 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4414 {"opteron", PROCESSOR_K8, CPU_K8,
4415 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4416 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4417 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4418 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4419 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4420 {"athlon64", PROCESSOR_K8, CPU_K8,
4421 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4422 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4423 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4424 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4425 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4426 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4427 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4428 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4429 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4430 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4431 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4432 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4433 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4434 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4435 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4436 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4437 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4438 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4439 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4440 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4441 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4442 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4443 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4444 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4445 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4446 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4447 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4448 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4449 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4450 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4451 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4452 | PTA_XSAVEOPT | PTA_FSGSBASE},
4453 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4454 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4455 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4456 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4457 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4458 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4459 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4460 | PTA_MOVBE | PTA_MWAITX},
4461 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4462 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4463 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4464 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4465 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4466 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4467 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4468 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4469 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4470 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4471 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4472 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4473 | PTA_FXSR | PTA_XSAVE},
4474 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4475 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4476 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4477 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4478 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4479 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4480
4481 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4482 PTA_64BIT
4483 | PTA_HLE /* flags are only used for -march switch. */ },
4484 };
4485
4486 /* -mrecip options. */
4487 static struct
4488 {
4489 const char *string; /* option name */
4490 unsigned int mask; /* mask bits to set */
4491 }
4492 const recip_options[] =
4493 {
4494 { "all", RECIP_MASK_ALL },
4495 { "none", RECIP_MASK_NONE },
4496 { "div", RECIP_MASK_DIV },
4497 { "sqrt", RECIP_MASK_SQRT },
4498 { "vec-div", RECIP_MASK_VEC_DIV },
4499 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4500 };
4501
4502 int const pta_size = ARRAY_SIZE (processor_alias_table);
4503
4504 /* Set up prefix/suffix so the error messages refer to either the command
4505 line argument, or the attribute(target). */
4506 if (main_args_p)
4507 {
4508 prefix = "-m";
4509 suffix = "";
4510 sw = "switch";
4511 }
4512 else
4513 {
4514 prefix = "option(\"";
4515 suffix = "\")";
4516 sw = "attribute";
4517 }
4518
4519 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4520 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4521 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4522 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4523 #ifdef TARGET_BI_ARCH
4524 else
4525 {
4526 #if TARGET_BI_ARCH == 1
4527 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4528 is on and OPTION_MASK_ABI_X32 is off. We turn off
4529 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4530 -mx32. */
4531 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4532 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4533 #else
4534 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4535 on and OPTION_MASK_ABI_64 is off. We turn off
4536 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4537 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4538 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4539 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4540 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4541 #endif
4542 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4543 && TARGET_IAMCU_P (opts->x_target_flags))
4544 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4545 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4546 }
4547 #endif
4548
4549 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4550 {
4551 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4552 OPTION_MASK_ABI_64 for TARGET_X32. */
4553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4554 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4555 }
4556 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4557 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4558 | OPTION_MASK_ABI_X32
4559 | OPTION_MASK_ABI_64);
4560 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4561 {
4562 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4563 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4565 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4566 }
4567
4568 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4569 SUBTARGET_OVERRIDE_OPTIONS;
4570 #endif
4571
4572 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4573 SUBSUBTARGET_OVERRIDE_OPTIONS;
4574 #endif
4575
4576 /* -fPIC is the default for x86_64. */
4577 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4578 opts->x_flag_pic = 2;
4579
4580 /* Need to check -mtune=generic first. */
4581 if (opts->x_ix86_tune_string)
4582 {
4583 /* As special support for cross compilers we read -mtune=native
4584 as -mtune=generic. With native compilers we won't see the
4585 -mtune=native, as it was changed by the driver. */
4586 if (!strcmp (opts->x_ix86_tune_string, "native"))
4587 {
4588 opts->x_ix86_tune_string = "generic";
4589 }
4590 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4591 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4592 "%stune=k8%s or %stune=generic%s instead as appropriate",
4593 prefix, suffix, prefix, suffix, prefix, suffix);
4594 }
4595 else
4596 {
4597 if (opts->x_ix86_arch_string)
4598 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4599 if (!opts->x_ix86_tune_string)
4600 {
4601 opts->x_ix86_tune_string
4602 = processor_target_table[TARGET_CPU_DEFAULT].name;
4603 ix86_tune_defaulted = 1;
4604 }
4605
4606 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4607 or defaulted. We need to use a sensible tune option. */
4608 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4609 {
4610 opts->x_ix86_tune_string = "generic";
4611 }
4612 }
4613
4614 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4615 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4616 {
4617 /* rep; movq isn't available in 32-bit code. */
4618 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4619 opts->x_ix86_stringop_alg = no_stringop;
4620 }
4621
4622 if (!opts->x_ix86_arch_string)
4623 opts->x_ix86_arch_string
4624 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4625 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4626 else
4627 ix86_arch_specified = 1;
4628
4629 if (opts_set->x_ix86_pmode)
4630 {
4631 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4632 && opts->x_ix86_pmode == PMODE_SI)
4633 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4634 && opts->x_ix86_pmode == PMODE_DI))
4635 error ("address mode %qs not supported in the %s bit mode",
4636 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4637 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4638 }
4639 else
4640 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4641 ? PMODE_DI : PMODE_SI;
4642
4643 if (!opts_set->x_ix86_abi)
4644 opts->x_ix86_abi = DEFAULT_ABI;
4645
4646 /* For targets using ms ABI enable ms-extensions, if not
4647 explicit turned off. For non-ms ABI we turn off this
4648 option. */
4649 if (!opts_set->x_flag_ms_extensions)
4650 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4651
4652 if (opts_set->x_ix86_cmodel)
4653 {
4654 switch (opts->x_ix86_cmodel)
4655 {
4656 case CM_SMALL:
4657 case CM_SMALL_PIC:
4658 if (opts->x_flag_pic)
4659 opts->x_ix86_cmodel = CM_SMALL_PIC;
4660 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4661 error ("code model %qs not supported in the %s bit mode",
4662 "small", "32");
4663 break;
4664
4665 case CM_MEDIUM:
4666 case CM_MEDIUM_PIC:
4667 if (opts->x_flag_pic)
4668 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4669 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4670 error ("code model %qs not supported in the %s bit mode",
4671 "medium", "32");
4672 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4673 error ("code model %qs not supported in x32 mode",
4674 "medium");
4675 break;
4676
4677 case CM_LARGE:
4678 case CM_LARGE_PIC:
4679 if (opts->x_flag_pic)
4680 opts->x_ix86_cmodel = CM_LARGE_PIC;
4681 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4682 error ("code model %qs not supported in the %s bit mode",
4683 "large", "32");
4684 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4685 error ("code model %qs not supported in x32 mode",
4686 "large");
4687 break;
4688
4689 case CM_32:
4690 if (opts->x_flag_pic)
4691 error ("code model %s does not support PIC mode", "32");
4692 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4693 error ("code model %qs not supported in the %s bit mode",
4694 "32", "64");
4695 break;
4696
4697 case CM_KERNEL:
4698 if (opts->x_flag_pic)
4699 {
4700 error ("code model %s does not support PIC mode", "kernel");
4701 opts->x_ix86_cmodel = CM_32;
4702 }
4703 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4704 error ("code model %qs not supported in the %s bit mode",
4705 "kernel", "32");
4706 break;
4707
4708 default:
4709 gcc_unreachable ();
4710 }
4711 }
4712 else
4713 {
4714 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4715 use of rip-relative addressing. This eliminates fixups that
4716 would otherwise be needed if this object is to be placed in a
4717 DLL, and is essentially just as efficient as direct addressing. */
4718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4719 && (TARGET_RDOS || TARGET_PECOFF))
4720 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4721 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4722 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4723 else
4724 opts->x_ix86_cmodel = CM_32;
4725 }
4726 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4727 {
4728 error ("-masm=intel not supported in this configuration");
4729 opts->x_ix86_asm_dialect = ASM_ATT;
4730 }
4731 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4732 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4733 sorry ("%i-bit mode not compiled in",
4734 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4735
4736 for (i = 0; i < pta_size; i++)
4737 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4738 {
4739 ix86_schedule = processor_alias_table[i].schedule;
4740 ix86_arch = processor_alias_table[i].processor;
4741 /* Default cpu tuning to the architecture. */
4742 ix86_tune = ix86_arch;
4743
4744 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4745 && !(processor_alias_table[i].flags & PTA_64BIT))
4746 error ("CPU you selected does not support x86-64 "
4747 "instruction set");
4748
4749 if (processor_alias_table[i].flags & PTA_MMX
4750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4752 if (processor_alias_table[i].flags & PTA_3DNOW
4753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4755 if (processor_alias_table[i].flags & PTA_3DNOW_A
4756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4758 if (processor_alias_table[i].flags & PTA_SSE
4759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4761 if (processor_alias_table[i].flags & PTA_SSE2
4762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4764 if (processor_alias_table[i].flags & PTA_SSE3
4765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4767 if (processor_alias_table[i].flags & PTA_SSSE3
4768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4770 if (processor_alias_table[i].flags & PTA_SSE4_1
4771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4773 if (processor_alias_table[i].flags & PTA_SSE4_2
4774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4776 if (processor_alias_table[i].flags & PTA_AVX
4777 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4778 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4779 if (processor_alias_table[i].flags & PTA_AVX2
4780 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4781 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4782 if (processor_alias_table[i].flags & PTA_FMA
4783 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4784 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4785 if (processor_alias_table[i].flags & PTA_SSE4A
4786 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4787 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4788 if (processor_alias_table[i].flags & PTA_FMA4
4789 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4790 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4791 if (processor_alias_table[i].flags & PTA_XOP
4792 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4793 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4794 if (processor_alias_table[i].flags & PTA_LWP
4795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4797 if (processor_alias_table[i].flags & PTA_ABM
4798 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4799 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4800 if (processor_alias_table[i].flags & PTA_BMI
4801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4803 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4804 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4805 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4806 if (processor_alias_table[i].flags & PTA_TBM
4807 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4808 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4809 if (processor_alias_table[i].flags & PTA_BMI2
4810 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4811 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4812 if (processor_alias_table[i].flags & PTA_CX16
4813 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4814 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4815 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4816 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4817 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4818 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4819 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4822 if (processor_alias_table[i].flags & PTA_MOVBE
4823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4825 if (processor_alias_table[i].flags & PTA_AES
4826 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4827 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4828 if (processor_alias_table[i].flags & PTA_SHA
4829 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4830 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4831 if (processor_alias_table[i].flags & PTA_PCLMUL
4832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4834 if (processor_alias_table[i].flags & PTA_FSGSBASE
4835 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4836 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4837 if (processor_alias_table[i].flags & PTA_RDRND
4838 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4840 if (processor_alias_table[i].flags & PTA_F16C
4841 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4842 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4843 if (processor_alias_table[i].flags & PTA_RTM
4844 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4846 if (processor_alias_table[i].flags & PTA_HLE
4847 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4848 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4849 if (processor_alias_table[i].flags & PTA_PRFCHW
4850 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4851 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4852 if (processor_alias_table[i].flags & PTA_RDSEED
4853 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4854 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4855 if (processor_alias_table[i].flags & PTA_ADX
4856 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4857 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4858 if (processor_alias_table[i].flags & PTA_FXSR
4859 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4860 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4861 if (processor_alias_table[i].flags & PTA_XSAVE
4862 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4863 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4864 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4865 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4866 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4867 if (processor_alias_table[i].flags & PTA_AVX512F
4868 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4869 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4870 if (processor_alias_table[i].flags & PTA_AVX512ER
4871 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4872 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4873 if (processor_alias_table[i].flags & PTA_AVX512PF
4874 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4875 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4876 if (processor_alias_table[i].flags & PTA_AVX512CD
4877 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4878 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4879 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4880 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4881 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4882 if (processor_alias_table[i].flags & PTA_PCOMMIT
4883 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4884 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4885 if (processor_alias_table[i].flags & PTA_CLWB
4886 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4887 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4888 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4889 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4890 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4891 if (processor_alias_table[i].flags & PTA_CLZERO
4892 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4893 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4894 if (processor_alias_table[i].flags & PTA_XSAVEC
4895 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4896 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4897 if (processor_alias_table[i].flags & PTA_XSAVES
4898 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4899 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4900 if (processor_alias_table[i].flags & PTA_AVX512DQ
4901 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4902 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4903 if (processor_alias_table[i].flags & PTA_AVX512BW
4904 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4905 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4906 if (processor_alias_table[i].flags & PTA_AVX512VL
4907 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4908 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4909 if (processor_alias_table[i].flags & PTA_MPX
4910 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4911 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4912 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4913 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4914 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4915 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4916 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4917 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4918 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4919 x86_prefetch_sse = true;
4920 if (processor_alias_table[i].flags & PTA_MWAITX
4921 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4922 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4923
4924 if (!(opts_set->x_target_flags & MASK_80387))
4925 {
4926 if (processor_alias_table[i].flags & PTA_NO_80387)
4927 opts->x_target_flags &= ~MASK_80387;
4928 else
4929 opts->x_target_flags |= MASK_80387;
4930 }
4931 break;
4932 }
4933
4934 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4935 error ("Intel MPX does not support x32");
4936
4937 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4938 error ("Intel MPX does not support x32");
4939
4940 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4941 error ("generic CPU can be used only for %stune=%s %s",
4942 prefix, suffix, sw);
4943 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4944 error ("intel CPU can be used only for %stune=%s %s",
4945 prefix, suffix, sw);
4946 else if (i == pta_size)
4947 error ("bad value (%s) for %sarch=%s %s",
4948 opts->x_ix86_arch_string, prefix, suffix, sw);
4949
4950 ix86_arch_mask = 1u << ix86_arch;
4951 for (i = 0; i < X86_ARCH_LAST; ++i)
4952 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4953
4954 for (i = 0; i < pta_size; i++)
4955 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4956 {
4957 ix86_schedule = processor_alias_table[i].schedule;
4958 ix86_tune = processor_alias_table[i].processor;
4959 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4960 {
4961 if (!(processor_alias_table[i].flags & PTA_64BIT))
4962 {
4963 if (ix86_tune_defaulted)
4964 {
4965 opts->x_ix86_tune_string = "x86-64";
4966 for (i = 0; i < pta_size; i++)
4967 if (! strcmp (opts->x_ix86_tune_string,
4968 processor_alias_table[i].name))
4969 break;
4970 ix86_schedule = processor_alias_table[i].schedule;
4971 ix86_tune = processor_alias_table[i].processor;
4972 }
4973 else
4974 error ("CPU you selected does not support x86-64 "
4975 "instruction set");
4976 }
4977 }
4978 /* Intel CPUs have always interpreted SSE prefetch instructions as
4979 NOPs; so, we can enable SSE prefetch instructions even when
4980 -mtune (rather than -march) points us to a processor that has them.
4981 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4982 higher processors. */
4983 if (TARGET_CMOV
4984 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4985 x86_prefetch_sse = true;
4986 break;
4987 }
4988
4989 if (ix86_tune_specified && i == pta_size)
4990 error ("bad value (%s) for %stune=%s %s",
4991 opts->x_ix86_tune_string, prefix, suffix, sw);
4992
4993 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4994
4995 #ifndef USE_IX86_FRAME_POINTER
4996 #define USE_IX86_FRAME_POINTER 0
4997 #endif
4998
4999 #ifndef USE_X86_64_FRAME_POINTER
5000 #define USE_X86_64_FRAME_POINTER 0
5001 #endif
5002
5003 /* Set the default values for switches whose default depends on TARGET_64BIT
5004 in case they weren't overwritten by command line options. */
5005 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5006 {
5007 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5008 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5009 if (opts->x_flag_asynchronous_unwind_tables
5010 && !opts_set->x_flag_unwind_tables
5011 && TARGET_64BIT_MS_ABI)
5012 opts->x_flag_unwind_tables = 1;
5013 if (opts->x_flag_asynchronous_unwind_tables == 2)
5014 opts->x_flag_unwind_tables
5015 = opts->x_flag_asynchronous_unwind_tables = 1;
5016 if (opts->x_flag_pcc_struct_return == 2)
5017 opts->x_flag_pcc_struct_return = 0;
5018 }
5019 else
5020 {
5021 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5022 opts->x_flag_omit_frame_pointer
5023 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5024 if (opts->x_flag_asynchronous_unwind_tables == 2)
5025 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5026 if (opts->x_flag_pcc_struct_return == 2)
5027 {
5028 /* Intel MCU psABI specifies that -freg-struct-return should
5029 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5030 we check -miamcu so that -freg-struct-return is always
5031 turned on if -miamcu is used. */
5032 if (TARGET_IAMCU_P (opts->x_target_flags))
5033 opts->x_flag_pcc_struct_return = 0;
5034 else
5035 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5036 }
5037 }
5038
5039 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5040 /* TODO: ix86_cost should be chosen at instruction or function granuality
5041 so for cold code we use size_cost even in !optimize_size compilation. */
5042 if (opts->x_optimize_size)
5043 ix86_cost = &ix86_size_cost;
5044 else
5045 ix86_cost = ix86_tune_cost;
5046
5047 /* Arrange to set up i386_stack_locals for all functions. */
5048 init_machine_status = ix86_init_machine_status;
5049
5050 /* Validate -mregparm= value. */
5051 if (opts_set->x_ix86_regparm)
5052 {
5053 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5054 warning (0, "-mregparm is ignored in 64-bit mode");
5055 else if (TARGET_IAMCU_P (opts->x_target_flags))
5056 warning (0, "-mregparm is ignored for Intel MCU psABI");
5057 if (opts->x_ix86_regparm > REGPARM_MAX)
5058 {
5059 error ("-mregparm=%d is not between 0 and %d",
5060 opts->x_ix86_regparm, REGPARM_MAX);
5061 opts->x_ix86_regparm = 0;
5062 }
5063 }
5064 if (TARGET_IAMCU_P (opts->x_target_flags)
5065 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5066 opts->x_ix86_regparm = REGPARM_MAX;
5067
5068 /* Default align_* from the processor table. */
5069 ix86_default_align (opts);
5070
5071 /* Provide default for -mbranch-cost= value. */
5072 if (!opts_set->x_ix86_branch_cost)
5073 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5074
5075 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5076 {
5077 opts->x_target_flags
5078 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5079
5080 /* Enable by default the SSE and MMX builtins. Do allow the user to
5081 explicitly disable any of these. In particular, disabling SSE and
5082 MMX for kernel code is extremely useful. */
5083 if (!ix86_arch_specified)
5084 opts->x_ix86_isa_flags
5085 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5086 | TARGET_SUBTARGET64_ISA_DEFAULT)
5087 & ~opts->x_ix86_isa_flags_explicit);
5088
5089 if (TARGET_RTD_P (opts->x_target_flags))
5090 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5091 }
5092 else
5093 {
5094 opts->x_target_flags
5095 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5096
5097 if (!ix86_arch_specified)
5098 opts->x_ix86_isa_flags
5099 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5100
5101 /* i386 ABI does not specify red zone. It still makes sense to use it
5102 when programmer takes care to stack from being destroyed. */
5103 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5104 opts->x_target_flags |= MASK_NO_RED_ZONE;
5105 }
5106
5107 /* Keep nonleaf frame pointers. */
5108 if (opts->x_flag_omit_frame_pointer)
5109 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5110 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5111 opts->x_flag_omit_frame_pointer = 1;
5112
5113 /* If we're doing fast math, we don't care about comparison order
5114 wrt NaNs. This lets us use a shorter comparison sequence. */
5115 if (opts->x_flag_finite_math_only)
5116 opts->x_target_flags &= ~MASK_IEEE_FP;
5117
5118 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5119 since the insns won't need emulation. */
5120 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5121 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5122
5123 /* Likewise, if the target doesn't have a 387, or we've specified
5124 software floating point, don't use 387 inline intrinsics. */
5125 if (!TARGET_80387_P (opts->x_target_flags))
5126 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5127
5128 /* Turn on MMX builtins for -msse. */
5129 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5130 opts->x_ix86_isa_flags
5131 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5132
5133 /* Enable SSE prefetch. */
5134 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5135 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5136 x86_prefetch_sse = true;
5137
5138 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5139 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5140 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5141 opts->x_ix86_isa_flags
5142 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5143
5144 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5145 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5146 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5147 opts->x_ix86_isa_flags
5148 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5149
5150 /* Enable lzcnt instruction for -mabm. */
5151 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5152 opts->x_ix86_isa_flags
5153 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5154
5155 /* Validate -mpreferred-stack-boundary= value or default it to
5156 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5157 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5158 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5159 {
5160 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5161 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5162 int max = (TARGET_SEH ? 4 : 12);
5163
5164 if (opts->x_ix86_preferred_stack_boundary_arg < min
5165 || opts->x_ix86_preferred_stack_boundary_arg > max)
5166 {
5167 if (min == max)
5168 error ("-mpreferred-stack-boundary is not supported "
5169 "for this target");
5170 else
5171 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5172 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5173 }
5174 else
5175 ix86_preferred_stack_boundary
5176 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5177 }
5178
5179 /* Set the default value for -mstackrealign. */
5180 if (opts->x_ix86_force_align_arg_pointer == -1)
5181 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5182
5183 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5184
5185 /* Validate -mincoming-stack-boundary= value or default it to
5186 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5187 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5188 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5189 {
5190 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5191
5192 if (opts->x_ix86_incoming_stack_boundary_arg < min
5193 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5194 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5195 opts->x_ix86_incoming_stack_boundary_arg, min);
5196 else
5197 {
5198 ix86_user_incoming_stack_boundary
5199 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5200 ix86_incoming_stack_boundary
5201 = ix86_user_incoming_stack_boundary;
5202 }
5203 }
5204
5205 #ifndef NO_PROFILE_COUNTERS
5206 if (flag_nop_mcount)
5207 error ("-mnop-mcount is not compatible with this target");
5208 #endif
5209 if (flag_nop_mcount && flag_pic)
5210 error ("-mnop-mcount is not implemented for -fPIC");
5211
5212 /* Accept -msseregparm only if at least SSE support is enabled. */
5213 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5214 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5215 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5216
5217 if (opts_set->x_ix86_fpmath)
5218 {
5219 if (opts->x_ix86_fpmath & FPMATH_SSE)
5220 {
5221 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5222 {
5223 if (TARGET_80387_P (opts->x_target_flags))
5224 {
5225 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5226 opts->x_ix86_fpmath = FPMATH_387;
5227 }
5228 }
5229 else if ((opts->x_ix86_fpmath & FPMATH_387)
5230 && !TARGET_80387_P (opts->x_target_flags))
5231 {
5232 warning (0, "387 instruction set disabled, using SSE arithmetics");
5233 opts->x_ix86_fpmath = FPMATH_SSE;
5234 }
5235 }
5236 }
5237 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5238 fpmath=387. The second is however default at many targets since the
5239 extra 80bit precision of temporaries is considered to be part of ABI.
5240 Overwrite the default at least for -ffast-math.
5241 TODO: -mfpmath=both seems to produce same performing code with bit
5242 smaller binaries. It is however not clear if register allocation is
5243 ready for this setting.
5244 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5245 codegen. We may switch to 387 with -ffast-math for size optimized
5246 functions. */
5247 else if (fast_math_flags_set_p (&global_options)
5248 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5249 opts->x_ix86_fpmath = FPMATH_SSE;
5250 else
5251 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5252
5253 /* Use external vectorized library in vectorizing intrinsics. */
5254 if (opts_set->x_ix86_veclibabi_type)
5255 switch (opts->x_ix86_veclibabi_type)
5256 {
5257 case ix86_veclibabi_type_svml:
5258 ix86_veclib_handler = ix86_veclibabi_svml;
5259 break;
5260
5261 case ix86_veclibabi_type_acml:
5262 ix86_veclib_handler = ix86_veclibabi_acml;
5263 break;
5264
5265 default:
5266 gcc_unreachable ();
5267 }
5268
5269 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5270 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5271 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5272
5273 /* If stack probes are required, the space used for large function
5274 arguments on the stack must also be probed, so enable
5275 -maccumulate-outgoing-args so this happens in the prologue. */
5276 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5277 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5278 {
5279 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5280 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5281 "for correctness", prefix, suffix);
5282 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5283 }
5284
5285 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5286 {
5287 char *p;
5288 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5289 p = strchr (internal_label_prefix, 'X');
5290 internal_label_prefix_len = p - internal_label_prefix;
5291 *p = '\0';
5292 }
5293
5294 /* When scheduling description is not available, disable scheduler pass
5295 so it won't slow down the compilation and make x87 code slower. */
5296 if (!TARGET_SCHEDULE)
5297 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5298
5299 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5300 ix86_tune_cost->simultaneous_prefetches,
5301 opts->x_param_values,
5302 opts_set->x_param_values);
5303 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5304 ix86_tune_cost->prefetch_block,
5305 opts->x_param_values,
5306 opts_set->x_param_values);
5307 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5308 ix86_tune_cost->l1_cache_size,
5309 opts->x_param_values,
5310 opts_set->x_param_values);
5311 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5312 ix86_tune_cost->l2_cache_size,
5313 opts->x_param_values,
5314 opts_set->x_param_values);
5315
5316 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5317 if (opts->x_flag_prefetch_loop_arrays < 0
5318 && HAVE_prefetch
5319 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5320 && !opts->x_optimize_size
5321 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5322 opts->x_flag_prefetch_loop_arrays = 1;
5323
5324 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5325 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5326 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5327 targetm.expand_builtin_va_start = NULL;
5328
5329 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5330 {
5331 ix86_gen_leave = gen_leave_rex64;
5332 if (Pmode == DImode)
5333 {
5334 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5335 ix86_gen_tls_local_dynamic_base_64
5336 = gen_tls_local_dynamic_base_64_di;
5337 }
5338 else
5339 {
5340 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5341 ix86_gen_tls_local_dynamic_base_64
5342 = gen_tls_local_dynamic_base_64_si;
5343 }
5344 }
5345 else
5346 ix86_gen_leave = gen_leave;
5347
5348 if (Pmode == DImode)
5349 {
5350 ix86_gen_add3 = gen_adddi3;
5351 ix86_gen_sub3 = gen_subdi3;
5352 ix86_gen_sub3_carry = gen_subdi3_carry;
5353 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5354 ix86_gen_andsp = gen_anddi3;
5355 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5356 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5357 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5358 ix86_gen_monitor = gen_sse3_monitor_di;
5359 ix86_gen_monitorx = gen_monitorx_di;
5360 }
5361 else
5362 {
5363 ix86_gen_add3 = gen_addsi3;
5364 ix86_gen_sub3 = gen_subsi3;
5365 ix86_gen_sub3_carry = gen_subsi3_carry;
5366 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5367 ix86_gen_andsp = gen_andsi3;
5368 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5369 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5370 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5371 ix86_gen_monitor = gen_sse3_monitor_si;
5372 ix86_gen_monitorx = gen_monitorx_si;
5373 }
5374
5375 #ifdef USE_IX86_CLD
5376 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5377 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5378 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5379 #endif
5380
5381 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5382 {
5383 if (opts->x_flag_fentry > 0)
5384 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5385 "with -fpic");
5386 opts->x_flag_fentry = 0;
5387 }
5388 else if (TARGET_SEH)
5389 {
5390 if (opts->x_flag_fentry == 0)
5391 sorry ("-mno-fentry isn%'t compatible with SEH");
5392 opts->x_flag_fentry = 1;
5393 }
5394 else if (opts->x_flag_fentry < 0)
5395 {
5396 #if defined(PROFILE_BEFORE_PROLOGUE)
5397 opts->x_flag_fentry = 1;
5398 #else
5399 opts->x_flag_fentry = 0;
5400 #endif
5401 }
5402
5403 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5404 opts->x_target_flags |= MASK_VZEROUPPER;
5405 if (!(opts_set->x_target_flags & MASK_STV))
5406 opts->x_target_flags |= MASK_STV;
5407 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5408 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5409 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5410 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5411 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5412 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5413 /* Enable 128-bit AVX instruction generation
5414 for the auto-vectorizer. */
5415 if (TARGET_AVX128_OPTIMAL
5416 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5417 opts->x_target_flags |= MASK_PREFER_AVX128;
5418
5419 if (opts->x_ix86_recip_name)
5420 {
5421 char *p = ASTRDUP (opts->x_ix86_recip_name);
5422 char *q;
5423 unsigned int mask, i;
5424 bool invert;
5425
5426 while ((q = strtok (p, ",")) != NULL)
5427 {
5428 p = NULL;
5429 if (*q == '!')
5430 {
5431 invert = true;
5432 q++;
5433 }
5434 else
5435 invert = false;
5436
5437 if (!strcmp (q, "default"))
5438 mask = RECIP_MASK_ALL;
5439 else
5440 {
5441 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5442 if (!strcmp (q, recip_options[i].string))
5443 {
5444 mask = recip_options[i].mask;
5445 break;
5446 }
5447
5448 if (i == ARRAY_SIZE (recip_options))
5449 {
5450 error ("unknown option for -mrecip=%s", q);
5451 invert = false;
5452 mask = RECIP_MASK_NONE;
5453 }
5454 }
5455
5456 opts->x_recip_mask_explicit |= mask;
5457 if (invert)
5458 opts->x_recip_mask &= ~mask;
5459 else
5460 opts->x_recip_mask |= mask;
5461 }
5462 }
5463
5464 if (TARGET_RECIP_P (opts->x_target_flags))
5465 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5466 else if (opts_set->x_target_flags & MASK_RECIP)
5467 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5468
5469 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5470 for 64-bit Bionic. Also default long double to 64-bit for Intel
5471 MCU psABI. */
5472 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5473 && !(opts_set->x_target_flags
5474 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5475 opts->x_target_flags |= (TARGET_64BIT
5476 ? MASK_LONG_DOUBLE_128
5477 : MASK_LONG_DOUBLE_64);
5478
5479 /* Only one of them can be active. */
5480 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5481 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5482
5483 /* Save the initial options in case the user does function specific
5484 options. */
5485 if (main_args_p)
5486 target_option_default_node = target_option_current_node
5487 = build_target_option_node (opts);
5488
5489 /* Handle stack protector */
5490 if (!opts_set->x_ix86_stack_protector_guard)
5491 opts->x_ix86_stack_protector_guard
5492 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5493
5494 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5495 if (opts->x_ix86_tune_memcpy_strategy)
5496 {
5497 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5498 ix86_parse_stringop_strategy_string (str, false);
5499 free (str);
5500 }
5501
5502 if (opts->x_ix86_tune_memset_strategy)
5503 {
5504 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5505 ix86_parse_stringop_strategy_string (str, true);
5506 free (str);
5507 }
5508 }
5509
5510 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5511
5512 static void
5513 ix86_option_override (void)
5514 {
5515 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5516 struct register_pass_info insert_vzeroupper_info
5517 = { pass_insert_vzeroupper, "reload",
5518 1, PASS_POS_INSERT_AFTER
5519 };
5520 opt_pass *pass_stv = make_pass_stv (g);
5521 struct register_pass_info stv_info
5522 = { pass_stv, "combine",
5523 1, PASS_POS_INSERT_AFTER
5524 };
5525
5526 ix86_option_override_internal (true, &global_options, &global_options_set);
5527
5528
5529 /* This needs to be done at start up. It's convenient to do it here. */
5530 register_pass (&insert_vzeroupper_info);
5531 register_pass (&stv_info);
5532 }
5533
5534 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5535 static char *
5536 ix86_offload_options (void)
5537 {
5538 if (TARGET_LP64)
5539 return xstrdup ("-foffload-abi=lp64");
5540 return xstrdup ("-foffload-abi=ilp32");
5541 }
5542
5543 /* Update register usage after having seen the compiler flags. */
5544
5545 static void
5546 ix86_conditional_register_usage (void)
5547 {
5548 int i, c_mask;
5549
5550 /* For 32-bit targets, squash the REX registers. */
5551 if (! TARGET_64BIT)
5552 {
5553 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5554 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5555 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5556 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5557 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5558 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5559 }
5560
5561 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5562 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5563
5564 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5565
5566 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5567 {
5568 /* Set/reset conditionally defined registers from
5569 CALL_USED_REGISTERS initializer. */
5570 if (call_used_regs[i] > 1)
5571 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5572
5573 /* Calculate registers of CLOBBERED_REGS register set
5574 as call used registers from GENERAL_REGS register set. */
5575 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5576 && call_used_regs[i])
5577 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5578 }
5579
5580 /* If MMX is disabled, squash the registers. */
5581 if (! TARGET_MMX)
5582 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5583 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5584 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5585
5586 /* If SSE is disabled, squash the registers. */
5587 if (! TARGET_SSE)
5588 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5589 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5590 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5591
5592 /* If the FPU is disabled, squash the registers. */
5593 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5594 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5595 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5596 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5597
5598 /* If AVX512F is disabled, squash the registers. */
5599 if (! TARGET_AVX512F)
5600 {
5601 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5602 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5603
5604 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5605 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5606 }
5607
5608 /* If MPX is disabled, squash the registers. */
5609 if (! TARGET_MPX)
5610 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5611 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5612 }
5613
5614 \f
5615 /* Save the current options */
5616
5617 static void
5618 ix86_function_specific_save (struct cl_target_option *ptr,
5619 struct gcc_options *opts)
5620 {
5621 ptr->arch = ix86_arch;
5622 ptr->schedule = ix86_schedule;
5623 ptr->prefetch_sse = x86_prefetch_sse;
5624 ptr->tune = ix86_tune;
5625 ptr->branch_cost = ix86_branch_cost;
5626 ptr->tune_defaulted = ix86_tune_defaulted;
5627 ptr->arch_specified = ix86_arch_specified;
5628 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5629 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5630 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5631 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5632 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5633 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5634 ptr->x_ix86_abi = opts->x_ix86_abi;
5635 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5636 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5637 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5638 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5639 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5640 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5641 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5642 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5643 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5644 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5645 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5646 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5647 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5648 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5649 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5650 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5651 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5652 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5653 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5654 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5655
5656 /* The fields are char but the variables are not; make sure the
5657 values fit in the fields. */
5658 gcc_assert (ptr->arch == ix86_arch);
5659 gcc_assert (ptr->schedule == ix86_schedule);
5660 gcc_assert (ptr->tune == ix86_tune);
5661 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5662 }
5663
5664 /* Restore the current options */
5665
5666 static void
5667 ix86_function_specific_restore (struct gcc_options *opts,
5668 struct cl_target_option *ptr)
5669 {
5670 enum processor_type old_tune = ix86_tune;
5671 enum processor_type old_arch = ix86_arch;
5672 unsigned int ix86_arch_mask;
5673 int i;
5674
5675 /* We don't change -fPIC. */
5676 opts->x_flag_pic = flag_pic;
5677
5678 ix86_arch = (enum processor_type) ptr->arch;
5679 ix86_schedule = (enum attr_cpu) ptr->schedule;
5680 ix86_tune = (enum processor_type) ptr->tune;
5681 x86_prefetch_sse = ptr->prefetch_sse;
5682 opts->x_ix86_branch_cost = ptr->branch_cost;
5683 ix86_tune_defaulted = ptr->tune_defaulted;
5684 ix86_arch_specified = ptr->arch_specified;
5685 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5686 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5687 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5688 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5689 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5690 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5691 opts->x_ix86_abi = ptr->x_ix86_abi;
5692 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5693 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5694 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5695 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5696 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5697 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5698 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5699 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5700 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5701 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5702 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5703 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5704 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5705 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5706 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5707 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5708 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5709 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5710 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5711 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5712 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5713 /* TODO: ix86_cost should be chosen at instruction or function granuality
5714 so for cold code we use size_cost even in !optimize_size compilation. */
5715 if (opts->x_optimize_size)
5716 ix86_cost = &ix86_size_cost;
5717 else
5718 ix86_cost = ix86_tune_cost;
5719
5720 /* Recreate the arch feature tests if the arch changed */
5721 if (old_arch != ix86_arch)
5722 {
5723 ix86_arch_mask = 1u << ix86_arch;
5724 for (i = 0; i < X86_ARCH_LAST; ++i)
5725 ix86_arch_features[i]
5726 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5727 }
5728
5729 /* Recreate the tune optimization tests */
5730 if (old_tune != ix86_tune)
5731 set_ix86_tune_features (ix86_tune, false);
5732 }
5733
5734 /* Adjust target options after streaming them in. This is mainly about
5735 reconciling them with global options. */
5736
5737 static void
5738 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5739 {
5740 /* flag_pic is a global option, but ix86_cmodel is target saved option
5741 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5742 for PIC, or error out. */
5743 if (flag_pic)
5744 switch (ptr->x_ix86_cmodel)
5745 {
5746 case CM_SMALL:
5747 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5748 break;
5749
5750 case CM_MEDIUM:
5751 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5752 break;
5753
5754 case CM_LARGE:
5755 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5756 break;
5757
5758 case CM_KERNEL:
5759 error ("code model %s does not support PIC mode", "kernel");
5760 break;
5761
5762 default:
5763 break;
5764 }
5765 else
5766 switch (ptr->x_ix86_cmodel)
5767 {
5768 case CM_SMALL_PIC:
5769 ptr->x_ix86_cmodel = CM_SMALL;
5770 break;
5771
5772 case CM_MEDIUM_PIC:
5773 ptr->x_ix86_cmodel = CM_MEDIUM;
5774 break;
5775
5776 case CM_LARGE_PIC:
5777 ptr->x_ix86_cmodel = CM_LARGE;
5778 break;
5779
5780 default:
5781 break;
5782 }
5783 }
5784
5785 /* Print the current options */
5786
5787 static void
5788 ix86_function_specific_print (FILE *file, int indent,
5789 struct cl_target_option *ptr)
5790 {
5791 char *target_string
5792 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5793 NULL, NULL, ptr->x_ix86_fpmath, false);
5794
5795 gcc_assert (ptr->arch < PROCESSOR_max);
5796 fprintf (file, "%*sarch = %d (%s)\n",
5797 indent, "",
5798 ptr->arch, processor_target_table[ptr->arch].name);
5799
5800 gcc_assert (ptr->tune < PROCESSOR_max);
5801 fprintf (file, "%*stune = %d (%s)\n",
5802 indent, "",
5803 ptr->tune, processor_target_table[ptr->tune].name);
5804
5805 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5806
5807 if (target_string)
5808 {
5809 fprintf (file, "%*s%s\n", indent, "", target_string);
5810 free (target_string);
5811 }
5812 }
5813
5814 \f
5815 /* Inner function to process the attribute((target(...))), take an argument and
5816 set the current options from the argument. If we have a list, recursively go
5817 over the list. */
5818
5819 static bool
5820 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5821 struct gcc_options *opts,
5822 struct gcc_options *opts_set,
5823 struct gcc_options *enum_opts_set)
5824 {
5825 char *next_optstr;
5826 bool ret = true;
5827
5828 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5829 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5830 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5831 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5832 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5833
5834 enum ix86_opt_type
5835 {
5836 ix86_opt_unknown,
5837 ix86_opt_yes,
5838 ix86_opt_no,
5839 ix86_opt_str,
5840 ix86_opt_enum,
5841 ix86_opt_isa
5842 };
5843
5844 static const struct
5845 {
5846 const char *string;
5847 size_t len;
5848 enum ix86_opt_type type;
5849 int opt;
5850 int mask;
5851 } attrs[] = {
5852 /* isa options */
5853 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5854 IX86_ATTR_ISA ("abm", OPT_mabm),
5855 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5856 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5857 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5858 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5859 IX86_ATTR_ISA ("aes", OPT_maes),
5860 IX86_ATTR_ISA ("sha", OPT_msha),
5861 IX86_ATTR_ISA ("avx", OPT_mavx),
5862 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5863 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5864 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5865 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5866 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5867 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5868 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5869 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5870 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5871 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5872 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5873 IX86_ATTR_ISA ("sse", OPT_msse),
5874 IX86_ATTR_ISA ("sse2", OPT_msse2),
5875 IX86_ATTR_ISA ("sse3", OPT_msse3),
5876 IX86_ATTR_ISA ("sse4", OPT_msse4),
5877 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5878 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5879 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5880 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5881 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5882 IX86_ATTR_ISA ("fma", OPT_mfma),
5883 IX86_ATTR_ISA ("xop", OPT_mxop),
5884 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5885 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5886 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5887 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5888 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5889 IX86_ATTR_ISA ("hle", OPT_mhle),
5890 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5891 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5892 IX86_ATTR_ISA ("adx", OPT_madx),
5893 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5894 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5895 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5896 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5897 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5898 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5899 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5900 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5901 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5902 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5903 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5904 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5905
5906 /* enum options */
5907 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5908
5909 /* string options */
5910 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5911 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5912
5913 /* flag options */
5914 IX86_ATTR_YES ("cld",
5915 OPT_mcld,
5916 MASK_CLD),
5917
5918 IX86_ATTR_NO ("fancy-math-387",
5919 OPT_mfancy_math_387,
5920 MASK_NO_FANCY_MATH_387),
5921
5922 IX86_ATTR_YES ("ieee-fp",
5923 OPT_mieee_fp,
5924 MASK_IEEE_FP),
5925
5926 IX86_ATTR_YES ("inline-all-stringops",
5927 OPT_minline_all_stringops,
5928 MASK_INLINE_ALL_STRINGOPS),
5929
5930 IX86_ATTR_YES ("inline-stringops-dynamically",
5931 OPT_minline_stringops_dynamically,
5932 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5933
5934 IX86_ATTR_NO ("align-stringops",
5935 OPT_mno_align_stringops,
5936 MASK_NO_ALIGN_STRINGOPS),
5937
5938 IX86_ATTR_YES ("recip",
5939 OPT_mrecip,
5940 MASK_RECIP),
5941
5942 };
5943
5944 /* If this is a list, recurse to get the options. */
5945 if (TREE_CODE (args) == TREE_LIST)
5946 {
5947 bool ret = true;
5948
5949 for (; args; args = TREE_CHAIN (args))
5950 if (TREE_VALUE (args)
5951 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5952 p_strings, opts, opts_set,
5953 enum_opts_set))
5954 ret = false;
5955
5956 return ret;
5957 }
5958
5959 else if (TREE_CODE (args) != STRING_CST)
5960 {
5961 error ("attribute %<target%> argument not a string");
5962 return false;
5963 }
5964
5965 /* Handle multiple arguments separated by commas. */
5966 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5967
5968 while (next_optstr && *next_optstr != '\0')
5969 {
5970 char *p = next_optstr;
5971 char *orig_p = p;
5972 char *comma = strchr (next_optstr, ',');
5973 const char *opt_string;
5974 size_t len, opt_len;
5975 int opt;
5976 bool opt_set_p;
5977 char ch;
5978 unsigned i;
5979 enum ix86_opt_type type = ix86_opt_unknown;
5980 int mask = 0;
5981
5982 if (comma)
5983 {
5984 *comma = '\0';
5985 len = comma - next_optstr;
5986 next_optstr = comma + 1;
5987 }
5988 else
5989 {
5990 len = strlen (p);
5991 next_optstr = NULL;
5992 }
5993
5994 /* Recognize no-xxx. */
5995 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5996 {
5997 opt_set_p = false;
5998 p += 3;
5999 len -= 3;
6000 }
6001 else
6002 opt_set_p = true;
6003
6004 /* Find the option. */
6005 ch = *p;
6006 opt = N_OPTS;
6007 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6008 {
6009 type = attrs[i].type;
6010 opt_len = attrs[i].len;
6011 if (ch == attrs[i].string[0]
6012 && ((type != ix86_opt_str && type != ix86_opt_enum)
6013 ? len == opt_len
6014 : len > opt_len)
6015 && memcmp (p, attrs[i].string, opt_len) == 0)
6016 {
6017 opt = attrs[i].opt;
6018 mask = attrs[i].mask;
6019 opt_string = attrs[i].string;
6020 break;
6021 }
6022 }
6023
6024 /* Process the option. */
6025 if (opt == N_OPTS)
6026 {
6027 error ("attribute(target(\"%s\")) is unknown", orig_p);
6028 ret = false;
6029 }
6030
6031 else if (type == ix86_opt_isa)
6032 {
6033 struct cl_decoded_option decoded;
6034
6035 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6036 ix86_handle_option (opts, opts_set,
6037 &decoded, input_location);
6038 }
6039
6040 else if (type == ix86_opt_yes || type == ix86_opt_no)
6041 {
6042 if (type == ix86_opt_no)
6043 opt_set_p = !opt_set_p;
6044
6045 if (opt_set_p)
6046 opts->x_target_flags |= mask;
6047 else
6048 opts->x_target_flags &= ~mask;
6049 }
6050
6051 else if (type == ix86_opt_str)
6052 {
6053 if (p_strings[opt])
6054 {
6055 error ("option(\"%s\") was already specified", opt_string);
6056 ret = false;
6057 }
6058 else
6059 p_strings[opt] = xstrdup (p + opt_len);
6060 }
6061
6062 else if (type == ix86_opt_enum)
6063 {
6064 bool arg_ok;
6065 int value;
6066
6067 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6068 if (arg_ok)
6069 set_option (opts, enum_opts_set, opt, value,
6070 p + opt_len, DK_UNSPECIFIED, input_location,
6071 global_dc);
6072 else
6073 {
6074 error ("attribute(target(\"%s\")) is unknown", orig_p);
6075 ret = false;
6076 }
6077 }
6078
6079 else
6080 gcc_unreachable ();
6081 }
6082
6083 return ret;
6084 }
6085
6086 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6087
6088 tree
6089 ix86_valid_target_attribute_tree (tree args,
6090 struct gcc_options *opts,
6091 struct gcc_options *opts_set)
6092 {
6093 const char *orig_arch_string = opts->x_ix86_arch_string;
6094 const char *orig_tune_string = opts->x_ix86_tune_string;
6095 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6096 int orig_tune_defaulted = ix86_tune_defaulted;
6097 int orig_arch_specified = ix86_arch_specified;
6098 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6099 tree t = NULL_TREE;
6100 int i;
6101 struct cl_target_option *def
6102 = TREE_TARGET_OPTION (target_option_default_node);
6103 struct gcc_options enum_opts_set;
6104
6105 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6106
6107 /* Process each of the options on the chain. */
6108 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6109 opts_set, &enum_opts_set))
6110 return error_mark_node;
6111
6112 /* If the changed options are different from the default, rerun
6113 ix86_option_override_internal, and then save the options away.
6114 The string options are attribute options, and will be undone
6115 when we copy the save structure. */
6116 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6117 || opts->x_target_flags != def->x_target_flags
6118 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6119 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6120 || enum_opts_set.x_ix86_fpmath)
6121 {
6122 /* If we are using the default tune= or arch=, undo the string assigned,
6123 and use the default. */
6124 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6125 {
6126 opts->x_ix86_arch_string
6127 = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
6128
6129 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6130 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6131 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6132 | OPTION_MASK_ABI_64
6133 | OPTION_MASK_ABI_X32
6134 | OPTION_MASK_CODE16);
6135
6136 }
6137 else if (!orig_arch_specified)
6138 opts->x_ix86_arch_string = NULL;
6139
6140 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6141 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
6142 else if (orig_tune_defaulted)
6143 opts->x_ix86_tune_string = NULL;
6144
6145 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6146 if (enum_opts_set.x_ix86_fpmath)
6147 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6148 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6149 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6150 {
6151 if (TARGET_80387_P (opts->x_target_flags))
6152 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6153 | FPMATH_387);
6154 else
6155 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6156 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6157 }
6158
6159 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6160 ix86_option_override_internal (false, opts, opts_set);
6161
6162 /* Add any builtin functions with the new isa if any. */
6163 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6164
6165 /* Save the current options unless we are validating options for
6166 #pragma. */
6167 t = build_target_option_node (opts);
6168
6169 opts->x_ix86_arch_string = orig_arch_string;
6170 opts->x_ix86_tune_string = orig_tune_string;
6171 opts_set->x_ix86_fpmath = orig_fpmath_set;
6172
6173 /* Free up memory allocated to hold the strings */
6174 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6175 free (option_strings[i]);
6176 }
6177
6178 return t;
6179 }
6180
6181 /* Hook to validate attribute((target("string"))). */
6182
6183 static bool
6184 ix86_valid_target_attribute_p (tree fndecl,
6185 tree ARG_UNUSED (name),
6186 tree args,
6187 int ARG_UNUSED (flags))
6188 {
6189 struct gcc_options func_options;
6190 tree new_target, new_optimize;
6191 bool ret = true;
6192
6193 /* attribute((target("default"))) does nothing, beyond
6194 affecting multi-versioning. */
6195 if (TREE_VALUE (args)
6196 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6197 && TREE_CHAIN (args) == NULL_TREE
6198 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6199 return true;
6200
6201 tree old_optimize = build_optimization_node (&global_options);
6202
6203 /* Get the optimization options of the current function. */
6204 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6205
6206 if (!func_optimize)
6207 func_optimize = old_optimize;
6208
6209 /* Init func_options. */
6210 memset (&func_options, 0, sizeof (func_options));
6211 init_options_struct (&func_options, NULL);
6212 lang_hooks.init_options_struct (&func_options);
6213
6214 cl_optimization_restore (&func_options,
6215 TREE_OPTIMIZATION (func_optimize));
6216
6217 /* Initialize func_options to the default before its target options can
6218 be set. */
6219 cl_target_option_restore (&func_options,
6220 TREE_TARGET_OPTION (target_option_default_node));
6221
6222 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6223 &global_options_set);
6224
6225 new_optimize = build_optimization_node (&func_options);
6226
6227 if (new_target == error_mark_node)
6228 ret = false;
6229
6230 else if (fndecl && new_target)
6231 {
6232 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6233
6234 if (old_optimize != new_optimize)
6235 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6236 }
6237
6238 return ret;
6239 }
6240
6241 \f
6242 /* Hook to determine if one function can safely inline another. */
6243
6244 static bool
6245 ix86_can_inline_p (tree caller, tree callee)
6246 {
6247 bool ret = false;
6248 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6249 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6250
6251 /* If callee has no option attributes, then it is ok to inline. */
6252 if (!callee_tree)
6253 ret = true;
6254
6255 /* If caller has no option attributes, but callee does then it is not ok to
6256 inline. */
6257 else if (!caller_tree)
6258 ret = false;
6259
6260 else
6261 {
6262 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6263 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6264
6265 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6266 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6267 function. */
6268 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6269 != callee_opts->x_ix86_isa_flags)
6270 ret = false;
6271
6272 /* See if we have the same non-isa options. */
6273 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6274 ret = false;
6275
6276 /* See if arch, tune, etc. are the same. */
6277 else if (caller_opts->arch != callee_opts->arch)
6278 ret = false;
6279
6280 else if (caller_opts->tune != callee_opts->tune)
6281 ret = false;
6282
6283 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6284 ret = false;
6285
6286 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6287 ret = false;
6288
6289 else
6290 ret = true;
6291 }
6292
6293 return ret;
6294 }
6295
6296 \f
6297 /* Remember the last target of ix86_set_current_function. */
6298 static GTY(()) tree ix86_previous_fndecl;
6299
6300 /* Set targets globals to the default (or current #pragma GCC target
6301 if active). Invalidate ix86_previous_fndecl cache. */
6302
6303 void
6304 ix86_reset_previous_fndecl (void)
6305 {
6306 tree new_tree = target_option_current_node;
6307 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6308 if (TREE_TARGET_GLOBALS (new_tree))
6309 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6310 else if (new_tree == target_option_default_node)
6311 restore_target_globals (&default_target_globals);
6312 else
6313 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6314 ix86_previous_fndecl = NULL_TREE;
6315 }
6316
6317 /* Establish appropriate back-end context for processing the function
6318 FNDECL. The argument might be NULL to indicate processing at top
6319 level, outside of any function scope. */
6320 static void
6321 ix86_set_current_function (tree fndecl)
6322 {
6323 /* Only change the context if the function changes. This hook is called
6324 several times in the course of compiling a function, and we don't want to
6325 slow things down too much or call target_reinit when it isn't safe. */
6326 if (fndecl == ix86_previous_fndecl)
6327 return;
6328
6329 tree old_tree;
6330 if (ix86_previous_fndecl == NULL_TREE)
6331 old_tree = target_option_current_node;
6332 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6333 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6334 else
6335 old_tree = target_option_default_node;
6336
6337 if (fndecl == NULL_TREE)
6338 {
6339 if (old_tree != target_option_current_node)
6340 ix86_reset_previous_fndecl ();
6341 return;
6342 }
6343
6344 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6345 if (new_tree == NULL_TREE)
6346 new_tree = target_option_default_node;
6347
6348 if (old_tree != new_tree)
6349 {
6350 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6351 if (TREE_TARGET_GLOBALS (new_tree))
6352 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6353 else if (new_tree == target_option_default_node)
6354 restore_target_globals (&default_target_globals);
6355 else
6356 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6357 }
6358 ix86_previous_fndecl = fndecl;
6359
6360 /* 64-bit MS and SYSV ABI have different set of call used registers.
6361 Avoid expensive re-initialization of init_regs each time we switch
6362 function context. */
6363 if (TARGET_64BIT
6364 && (call_used_regs[SI_REG]
6365 == (cfun->machine->call_abi == MS_ABI)))
6366 reinit_regs ();
6367 }
6368
6369 \f
6370 /* Return true if this goes in large data/bss. */
6371
6372 static bool
6373 ix86_in_large_data_p (tree exp)
6374 {
6375 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6376 return false;
6377
6378 /* Functions are never large data. */
6379 if (TREE_CODE (exp) == FUNCTION_DECL)
6380 return false;
6381
6382 /* Automatic variables are never large data. */
6383 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6384 return false;
6385
6386 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6387 {
6388 const char *section = DECL_SECTION_NAME (exp);
6389 if (strcmp (section, ".ldata") == 0
6390 || strcmp (section, ".lbss") == 0)
6391 return true;
6392 return false;
6393 }
6394 else
6395 {
6396 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6397
6398 /* If this is an incomplete type with size 0, then we can't put it
6399 in data because it might be too big when completed. Also,
6400 int_size_in_bytes returns -1 if size can vary or is larger than
6401 an integer in which case also it is safer to assume that it goes in
6402 large data. */
6403 if (size <= 0 || size > ix86_section_threshold)
6404 return true;
6405 }
6406
6407 return false;
6408 }
6409
6410 /* Switch to the appropriate section for output of DECL.
6411 DECL is either a `VAR_DECL' node or a constant of some sort.
6412 RELOC indicates whether forming the initial value of DECL requires
6413 link-time relocations. */
6414
6415 ATTRIBUTE_UNUSED static section *
6416 x86_64_elf_select_section (tree decl, int reloc,
6417 unsigned HOST_WIDE_INT align)
6418 {
6419 if (ix86_in_large_data_p (decl))
6420 {
6421 const char *sname = NULL;
6422 unsigned int flags = SECTION_WRITE;
6423 switch (categorize_decl_for_section (decl, reloc))
6424 {
6425 case SECCAT_DATA:
6426 sname = ".ldata";
6427 break;
6428 case SECCAT_DATA_REL:
6429 sname = ".ldata.rel";
6430 break;
6431 case SECCAT_DATA_REL_LOCAL:
6432 sname = ".ldata.rel.local";
6433 break;
6434 case SECCAT_DATA_REL_RO:
6435 sname = ".ldata.rel.ro";
6436 break;
6437 case SECCAT_DATA_REL_RO_LOCAL:
6438 sname = ".ldata.rel.ro.local";
6439 break;
6440 case SECCAT_BSS:
6441 sname = ".lbss";
6442 flags |= SECTION_BSS;
6443 break;
6444 case SECCAT_RODATA:
6445 case SECCAT_RODATA_MERGE_STR:
6446 case SECCAT_RODATA_MERGE_STR_INIT:
6447 case SECCAT_RODATA_MERGE_CONST:
6448 sname = ".lrodata";
6449 flags = 0;
6450 break;
6451 case SECCAT_SRODATA:
6452 case SECCAT_SDATA:
6453 case SECCAT_SBSS:
6454 gcc_unreachable ();
6455 case SECCAT_TEXT:
6456 case SECCAT_TDATA:
6457 case SECCAT_TBSS:
6458 /* We don't split these for medium model. Place them into
6459 default sections and hope for best. */
6460 break;
6461 }
6462 if (sname)
6463 {
6464 /* We might get called with string constants, but get_named_section
6465 doesn't like them as they are not DECLs. Also, we need to set
6466 flags in that case. */
6467 if (!DECL_P (decl))
6468 return get_section (sname, flags, NULL);
6469 return get_named_section (decl, sname, reloc);
6470 }
6471 }
6472 return default_elf_select_section (decl, reloc, align);
6473 }
6474
6475 /* Select a set of attributes for section NAME based on the properties
6476 of DECL and whether or not RELOC indicates that DECL's initializer
6477 might contain runtime relocations. */
6478
6479 static unsigned int ATTRIBUTE_UNUSED
6480 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6481 {
6482 unsigned int flags = default_section_type_flags (decl, name, reloc);
6483
6484 if (decl == NULL_TREE
6485 && (strcmp (name, ".ldata.rel.ro") == 0
6486 || strcmp (name, ".ldata.rel.ro.local") == 0))
6487 flags |= SECTION_RELRO;
6488
6489 if (strcmp (name, ".lbss") == 0
6490 || strncmp (name, ".lbss.", 5) == 0
6491 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6492 flags |= SECTION_BSS;
6493
6494 return flags;
6495 }
6496
6497 /* Build up a unique section name, expressed as a
6498 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6499 RELOC indicates whether the initial value of EXP requires
6500 link-time relocations. */
6501
6502 static void ATTRIBUTE_UNUSED
6503 x86_64_elf_unique_section (tree decl, int reloc)
6504 {
6505 if (ix86_in_large_data_p (decl))
6506 {
6507 const char *prefix = NULL;
6508 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6509 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6510
6511 switch (categorize_decl_for_section (decl, reloc))
6512 {
6513 case SECCAT_DATA:
6514 case SECCAT_DATA_REL:
6515 case SECCAT_DATA_REL_LOCAL:
6516 case SECCAT_DATA_REL_RO:
6517 case SECCAT_DATA_REL_RO_LOCAL:
6518 prefix = one_only ? ".ld" : ".ldata";
6519 break;
6520 case SECCAT_BSS:
6521 prefix = one_only ? ".lb" : ".lbss";
6522 break;
6523 case SECCAT_RODATA:
6524 case SECCAT_RODATA_MERGE_STR:
6525 case SECCAT_RODATA_MERGE_STR_INIT:
6526 case SECCAT_RODATA_MERGE_CONST:
6527 prefix = one_only ? ".lr" : ".lrodata";
6528 break;
6529 case SECCAT_SRODATA:
6530 case SECCAT_SDATA:
6531 case SECCAT_SBSS:
6532 gcc_unreachable ();
6533 case SECCAT_TEXT:
6534 case SECCAT_TDATA:
6535 case SECCAT_TBSS:
6536 /* We don't split these for medium model. Place them into
6537 default sections and hope for best. */
6538 break;
6539 }
6540 if (prefix)
6541 {
6542 const char *name, *linkonce;
6543 char *string;
6544
6545 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6546 name = targetm.strip_name_encoding (name);
6547
6548 /* If we're using one_only, then there needs to be a .gnu.linkonce
6549 prefix to the section name. */
6550 linkonce = one_only ? ".gnu.linkonce" : "";
6551
6552 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6553
6554 set_decl_section_name (decl, string);
6555 return;
6556 }
6557 }
6558 default_unique_section (decl, reloc);
6559 }
6560
6561 #ifdef COMMON_ASM_OP
6562 /* This says how to output assembler code to declare an
6563 uninitialized external linkage data object.
6564
6565 For medium model x86-64 we need to use .largecomm opcode for
6566 large objects. */
6567 void
6568 x86_elf_aligned_common (FILE *file,
6569 const char *name, unsigned HOST_WIDE_INT size,
6570 int align)
6571 {
6572 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6573 && size > (unsigned int)ix86_section_threshold)
6574 fputs ("\t.largecomm\t", file);
6575 else
6576 fputs (COMMON_ASM_OP, file);
6577 assemble_name (file, name);
6578 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6579 size, align / BITS_PER_UNIT);
6580 }
6581 #endif
6582
6583 /* Utility function for targets to use in implementing
6584 ASM_OUTPUT_ALIGNED_BSS. */
6585
6586 void
6587 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6588 unsigned HOST_WIDE_INT size, int align)
6589 {
6590 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6591 && size > (unsigned int)ix86_section_threshold)
6592 switch_to_section (get_named_section (decl, ".lbss", 0));
6593 else
6594 switch_to_section (bss_section);
6595 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6596 #ifdef ASM_DECLARE_OBJECT_NAME
6597 last_assemble_variable_decl = decl;
6598 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6599 #else
6600 /* Standard thing is just output label for the object. */
6601 ASM_OUTPUT_LABEL (file, name);
6602 #endif /* ASM_DECLARE_OBJECT_NAME */
6603 ASM_OUTPUT_SKIP (file, size ? size : 1);
6604 }
6605 \f
6606 /* Decide whether we must probe the stack before any space allocation
6607 on this target. It's essentially TARGET_STACK_PROBE except when
6608 -fstack-check causes the stack to be already probed differently. */
6609
6610 bool
6611 ix86_target_stack_probe (void)
6612 {
6613 /* Do not probe the stack twice if static stack checking is enabled. */
6614 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6615 return false;
6616
6617 return TARGET_STACK_PROBE;
6618 }
6619 \f
6620 /* Decide whether we can make a sibling call to a function. DECL is the
6621 declaration of the function being targeted by the call and EXP is the
6622 CALL_EXPR representing the call. */
6623
6624 static bool
6625 ix86_function_ok_for_sibcall (tree decl, tree exp)
6626 {
6627 tree type, decl_or_type;
6628 rtx a, b;
6629
6630 /* If we are generating position-independent code, we cannot sibcall
6631 optimize direct calls to global functions, as the PLT requires
6632 %ebx be live. (Darwin does not have a PLT.) */
6633 if (!TARGET_MACHO
6634 && !TARGET_64BIT
6635 && flag_pic
6636 && flag_plt
6637 && decl && !targetm.binds_local_p (decl))
6638 return false;
6639
6640 /* If we need to align the outgoing stack, then sibcalling would
6641 unalign the stack, which may break the called function. */
6642 if (ix86_minimum_incoming_stack_boundary (true)
6643 < PREFERRED_STACK_BOUNDARY)
6644 return false;
6645
6646 if (decl)
6647 {
6648 decl_or_type = decl;
6649 type = TREE_TYPE (decl);
6650 }
6651 else
6652 {
6653 /* We're looking at the CALL_EXPR, we need the type of the function. */
6654 type = CALL_EXPR_FN (exp); /* pointer expression */
6655 type = TREE_TYPE (type); /* pointer type */
6656 type = TREE_TYPE (type); /* function type */
6657 decl_or_type = type;
6658 }
6659
6660 /* Check that the return value locations are the same. Like
6661 if we are returning floats on the 80387 register stack, we cannot
6662 make a sibcall from a function that doesn't return a float to a
6663 function that does or, conversely, from a function that does return
6664 a float to a function that doesn't; the necessary stack adjustment
6665 would not be executed. This is also the place we notice
6666 differences in the return value ABI. Note that it is ok for one
6667 of the functions to have void return type as long as the return
6668 value of the other is passed in a register. */
6669 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6670 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6671 cfun->decl, false);
6672 if (STACK_REG_P (a) || STACK_REG_P (b))
6673 {
6674 if (!rtx_equal_p (a, b))
6675 return false;
6676 }
6677 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6678 ;
6679 else if (!rtx_equal_p (a, b))
6680 return false;
6681
6682 if (TARGET_64BIT)
6683 {
6684 /* The SYSV ABI has more call-clobbered registers;
6685 disallow sibcalls from MS to SYSV. */
6686 if (cfun->machine->call_abi == MS_ABI
6687 && ix86_function_type_abi (type) == SYSV_ABI)
6688 return false;
6689 }
6690 else
6691 {
6692 /* If this call is indirect, we'll need to be able to use a
6693 call-clobbered register for the address of the target function.
6694 Make sure that all such registers are not used for passing
6695 parameters. Note that DLLIMPORT functions are indirect. */
6696 if (!decl
6697 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6698 {
6699 /* Check if regparm >= 3 since arg_reg_available is set to
6700 false if regparm == 0. If regparm is 1 or 2, there is
6701 always a call-clobbered register available.
6702
6703 ??? The symbol indirect call doesn't need a call-clobbered
6704 register. But we don't know if this is a symbol indirect
6705 call or not here. */
6706 if (ix86_function_regparm (type, NULL) >= 3
6707 && !cfun->machine->arg_reg_available)
6708 return false;
6709 }
6710 }
6711
6712 /* Otherwise okay. That also includes certain types of indirect calls. */
6713 return true;
6714 }
6715
6716 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6717 and "sseregparm" calling convention attributes;
6718 arguments as in struct attribute_spec.handler. */
6719
6720 static tree
6721 ix86_handle_cconv_attribute (tree *node, tree name,
6722 tree args,
6723 int,
6724 bool *no_add_attrs)
6725 {
6726 if (TREE_CODE (*node) != FUNCTION_TYPE
6727 && TREE_CODE (*node) != METHOD_TYPE
6728 && TREE_CODE (*node) != FIELD_DECL
6729 && TREE_CODE (*node) != TYPE_DECL)
6730 {
6731 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6732 name);
6733 *no_add_attrs = true;
6734 return NULL_TREE;
6735 }
6736
6737 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6738 if (is_attribute_p ("regparm", name))
6739 {
6740 tree cst;
6741
6742 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6743 {
6744 error ("fastcall and regparm attributes are not compatible");
6745 }
6746
6747 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6748 {
6749 error ("regparam and thiscall attributes are not compatible");
6750 }
6751
6752 cst = TREE_VALUE (args);
6753 if (TREE_CODE (cst) != INTEGER_CST)
6754 {
6755 warning (OPT_Wattributes,
6756 "%qE attribute requires an integer constant argument",
6757 name);
6758 *no_add_attrs = true;
6759 }
6760 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6761 {
6762 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6763 name, REGPARM_MAX);
6764 *no_add_attrs = true;
6765 }
6766
6767 return NULL_TREE;
6768 }
6769
6770 if (TARGET_64BIT)
6771 {
6772 /* Do not warn when emulating the MS ABI. */
6773 if ((TREE_CODE (*node) != FUNCTION_TYPE
6774 && TREE_CODE (*node) != METHOD_TYPE)
6775 || ix86_function_type_abi (*node) != MS_ABI)
6776 warning (OPT_Wattributes, "%qE attribute ignored",
6777 name);
6778 *no_add_attrs = true;
6779 return NULL_TREE;
6780 }
6781
6782 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6783 if (is_attribute_p ("fastcall", name))
6784 {
6785 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6786 {
6787 error ("fastcall and cdecl attributes are not compatible");
6788 }
6789 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6790 {
6791 error ("fastcall and stdcall attributes are not compatible");
6792 }
6793 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6794 {
6795 error ("fastcall and regparm attributes are not compatible");
6796 }
6797 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6798 {
6799 error ("fastcall and thiscall attributes are not compatible");
6800 }
6801 }
6802
6803 /* Can combine stdcall with fastcall (redundant), regparm and
6804 sseregparm. */
6805 else if (is_attribute_p ("stdcall", name))
6806 {
6807 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6808 {
6809 error ("stdcall and cdecl attributes are not compatible");
6810 }
6811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6812 {
6813 error ("stdcall and fastcall attributes are not compatible");
6814 }
6815 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6816 {
6817 error ("stdcall and thiscall attributes are not compatible");
6818 }
6819 }
6820
6821 /* Can combine cdecl with regparm and sseregparm. */
6822 else if (is_attribute_p ("cdecl", name))
6823 {
6824 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6825 {
6826 error ("stdcall and cdecl attributes are not compatible");
6827 }
6828 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6829 {
6830 error ("fastcall and cdecl attributes are not compatible");
6831 }
6832 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6833 {
6834 error ("cdecl and thiscall attributes are not compatible");
6835 }
6836 }
6837 else if (is_attribute_p ("thiscall", name))
6838 {
6839 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6840 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6841 name);
6842 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6843 {
6844 error ("stdcall and thiscall attributes are not compatible");
6845 }
6846 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6847 {
6848 error ("fastcall and thiscall attributes are not compatible");
6849 }
6850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6851 {
6852 error ("cdecl and thiscall attributes are not compatible");
6853 }
6854 }
6855
6856 /* Can combine sseregparm with all attributes. */
6857
6858 return NULL_TREE;
6859 }
6860
6861 /* The transactional memory builtins are implicitly regparm or fastcall
6862 depending on the ABI. Override the generic do-nothing attribute that
6863 these builtins were declared with, and replace it with one of the two
6864 attributes that we expect elsewhere. */
6865
6866 static tree
6867 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6868 int flags, bool *no_add_attrs)
6869 {
6870 tree alt;
6871
6872 /* In no case do we want to add the placeholder attribute. */
6873 *no_add_attrs = true;
6874
6875 /* The 64-bit ABI is unchanged for transactional memory. */
6876 if (TARGET_64BIT)
6877 return NULL_TREE;
6878
6879 /* ??? Is there a better way to validate 32-bit windows? We have
6880 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6881 if (CHECK_STACK_LIMIT > 0)
6882 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6883 else
6884 {
6885 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6886 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6887 }
6888 decl_attributes (node, alt, flags);
6889
6890 return NULL_TREE;
6891 }
6892
6893 /* This function determines from TYPE the calling-convention. */
6894
6895 unsigned int
6896 ix86_get_callcvt (const_tree type)
6897 {
6898 unsigned int ret = 0;
6899 bool is_stdarg;
6900 tree attrs;
6901
6902 if (TARGET_64BIT)
6903 return IX86_CALLCVT_CDECL;
6904
6905 attrs = TYPE_ATTRIBUTES (type);
6906 if (attrs != NULL_TREE)
6907 {
6908 if (lookup_attribute ("cdecl", attrs))
6909 ret |= IX86_CALLCVT_CDECL;
6910 else if (lookup_attribute ("stdcall", attrs))
6911 ret |= IX86_CALLCVT_STDCALL;
6912 else if (lookup_attribute ("fastcall", attrs))
6913 ret |= IX86_CALLCVT_FASTCALL;
6914 else if (lookup_attribute ("thiscall", attrs))
6915 ret |= IX86_CALLCVT_THISCALL;
6916
6917 /* Regparam isn't allowed for thiscall and fastcall. */
6918 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6919 {
6920 if (lookup_attribute ("regparm", attrs))
6921 ret |= IX86_CALLCVT_REGPARM;
6922 if (lookup_attribute ("sseregparm", attrs))
6923 ret |= IX86_CALLCVT_SSEREGPARM;
6924 }
6925
6926 if (IX86_BASE_CALLCVT(ret) != 0)
6927 return ret;
6928 }
6929
6930 is_stdarg = stdarg_p (type);
6931 if (TARGET_RTD && !is_stdarg)
6932 return IX86_CALLCVT_STDCALL | ret;
6933
6934 if (ret != 0
6935 || is_stdarg
6936 || TREE_CODE (type) != METHOD_TYPE
6937 || ix86_function_type_abi (type) != MS_ABI)
6938 return IX86_CALLCVT_CDECL | ret;
6939
6940 return IX86_CALLCVT_THISCALL;
6941 }
6942
6943 /* Return 0 if the attributes for two types are incompatible, 1 if they
6944 are compatible, and 2 if they are nearly compatible (which causes a
6945 warning to be generated). */
6946
6947 static int
6948 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6949 {
6950 unsigned int ccvt1, ccvt2;
6951
6952 if (TREE_CODE (type1) != FUNCTION_TYPE
6953 && TREE_CODE (type1) != METHOD_TYPE)
6954 return 1;
6955
6956 ccvt1 = ix86_get_callcvt (type1);
6957 ccvt2 = ix86_get_callcvt (type2);
6958 if (ccvt1 != ccvt2)
6959 return 0;
6960 if (ix86_function_regparm (type1, NULL)
6961 != ix86_function_regparm (type2, NULL))
6962 return 0;
6963
6964 return 1;
6965 }
6966 \f
6967 /* Return the regparm value for a function with the indicated TYPE and DECL.
6968 DECL may be NULL when calling function indirectly
6969 or considering a libcall. */
6970
6971 static int
6972 ix86_function_regparm (const_tree type, const_tree decl)
6973 {
6974 tree attr;
6975 int regparm;
6976 unsigned int ccvt;
6977
6978 if (TARGET_64BIT)
6979 return (ix86_function_type_abi (type) == SYSV_ABI
6980 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6981 ccvt = ix86_get_callcvt (type);
6982 regparm = ix86_regparm;
6983
6984 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6985 {
6986 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6987 if (attr)
6988 {
6989 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6990 return regparm;
6991 }
6992 }
6993 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6994 return 2;
6995 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6996 return 1;
6997
6998 /* Use register calling convention for local functions when possible. */
6999 if (decl
7000 && TREE_CODE (decl) == FUNCTION_DECL)
7001 {
7002 cgraph_node *target = cgraph_node::get (decl);
7003 if (target)
7004 target = target->function_symbol ();
7005
7006 /* Caller and callee must agree on the calling convention, so
7007 checking here just optimize means that with
7008 __attribute__((optimize (...))) caller could use regparm convention
7009 and callee not, or vice versa. Instead look at whether the callee
7010 is optimized or not. */
7011 if (target && opt_for_fn (target->decl, optimize)
7012 && !(profile_flag && !flag_fentry))
7013 {
7014 cgraph_local_info *i = &target->local;
7015 if (i && i->local && i->can_change_signature)
7016 {
7017 int local_regparm, globals = 0, regno;
7018
7019 /* Make sure no regparm register is taken by a
7020 fixed register variable. */
7021 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7022 local_regparm++)
7023 if (fixed_regs[local_regparm])
7024 break;
7025
7026 /* We don't want to use regparm(3) for nested functions as
7027 these use a static chain pointer in the third argument. */
7028 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7029 local_regparm = 2;
7030
7031 /* Save a register for the split stack. */
7032 if (local_regparm == 3 && flag_split_stack)
7033 local_regparm = 2;
7034
7035 /* Each fixed register usage increases register pressure,
7036 so less registers should be used for argument passing.
7037 This functionality can be overriden by an explicit
7038 regparm value. */
7039 for (regno = AX_REG; regno <= DI_REG; regno++)
7040 if (fixed_regs[regno])
7041 globals++;
7042
7043 local_regparm
7044 = globals < local_regparm ? local_regparm - globals : 0;
7045
7046 if (local_regparm > regparm)
7047 regparm = local_regparm;
7048 }
7049 }
7050 }
7051
7052 return regparm;
7053 }
7054
7055 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7056 DFmode (2) arguments in SSE registers for a function with the
7057 indicated TYPE and DECL. DECL may be NULL when calling function
7058 indirectly or considering a libcall. Return -1 if any FP parameter
7059 should be rejected by error. This is used in siutation we imply SSE
7060 calling convetion but the function is called from another function with
7061 SSE disabled. Otherwise return 0. */
7062
7063 static int
7064 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7065 {
7066 gcc_assert (!TARGET_64BIT);
7067
7068 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7069 by the sseregparm attribute. */
7070 if (TARGET_SSEREGPARM
7071 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7072 {
7073 if (!TARGET_SSE)
7074 {
7075 if (warn)
7076 {
7077 if (decl)
7078 error ("calling %qD with attribute sseregparm without "
7079 "SSE/SSE2 enabled", decl);
7080 else
7081 error ("calling %qT with attribute sseregparm without "
7082 "SSE/SSE2 enabled", type);
7083 }
7084 return 0;
7085 }
7086
7087 return 2;
7088 }
7089
7090 if (!decl)
7091 return 0;
7092
7093 cgraph_node *target = cgraph_node::get (decl);
7094 if (target)
7095 target = target->function_symbol ();
7096
7097 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7098 (and DFmode for SSE2) arguments in SSE registers. */
7099 if (target
7100 /* TARGET_SSE_MATH */
7101 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7102 && opt_for_fn (target->decl, optimize)
7103 && !(profile_flag && !flag_fentry))
7104 {
7105 cgraph_local_info *i = &target->local;
7106 if (i && i->local && i->can_change_signature)
7107 {
7108 /* Refuse to produce wrong code when local function with SSE enabled
7109 is called from SSE disabled function.
7110 FIXME: We need a way to detect these cases cross-ltrans partition
7111 and avoid using SSE calling conventions on local functions called
7112 from function with SSE disabled. For now at least delay the
7113 warning until we know we are going to produce wrong code.
7114 See PR66047 */
7115 if (!TARGET_SSE && warn)
7116 return -1;
7117 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7118 ->x_ix86_isa_flags) ? 2 : 1;
7119 }
7120 }
7121
7122 return 0;
7123 }
7124
7125 /* Return true if EAX is live at the start of the function. Used by
7126 ix86_expand_prologue to determine if we need special help before
7127 calling allocate_stack_worker. */
7128
7129 static bool
7130 ix86_eax_live_at_start_p (void)
7131 {
7132 /* Cheat. Don't bother working forward from ix86_function_regparm
7133 to the function type to whether an actual argument is located in
7134 eax. Instead just look at cfg info, which is still close enough
7135 to correct at this point. This gives false positives for broken
7136 functions that might use uninitialized data that happens to be
7137 allocated in eax, but who cares? */
7138 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7139 }
7140
7141 static bool
7142 ix86_keep_aggregate_return_pointer (tree fntype)
7143 {
7144 tree attr;
7145
7146 if (!TARGET_64BIT)
7147 {
7148 attr = lookup_attribute ("callee_pop_aggregate_return",
7149 TYPE_ATTRIBUTES (fntype));
7150 if (attr)
7151 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7152
7153 /* For 32-bit MS-ABI the default is to keep aggregate
7154 return pointer. */
7155 if (ix86_function_type_abi (fntype) == MS_ABI)
7156 return true;
7157 }
7158 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7159 }
7160
7161 /* Value is the number of bytes of arguments automatically
7162 popped when returning from a subroutine call.
7163 FUNDECL is the declaration node of the function (as a tree),
7164 FUNTYPE is the data type of the function (as a tree),
7165 or for a library call it is an identifier node for the subroutine name.
7166 SIZE is the number of bytes of arguments passed on the stack.
7167
7168 On the 80386, the RTD insn may be used to pop them if the number
7169 of args is fixed, but if the number is variable then the caller
7170 must pop them all. RTD can't be used for library calls now
7171 because the library is compiled with the Unix compiler.
7172 Use of RTD is a selectable option, since it is incompatible with
7173 standard Unix calling sequences. If the option is not selected,
7174 the caller must always pop the args.
7175
7176 The attribute stdcall is equivalent to RTD on a per module basis. */
7177
7178 static int
7179 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7180 {
7181 unsigned int ccvt;
7182
7183 /* None of the 64-bit ABIs pop arguments. */
7184 if (TARGET_64BIT)
7185 return 0;
7186
7187 ccvt = ix86_get_callcvt (funtype);
7188
7189 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7190 | IX86_CALLCVT_THISCALL)) != 0
7191 && ! stdarg_p (funtype))
7192 return size;
7193
7194 /* Lose any fake structure return argument if it is passed on the stack. */
7195 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7196 && !ix86_keep_aggregate_return_pointer (funtype))
7197 {
7198 int nregs = ix86_function_regparm (funtype, fundecl);
7199 if (nregs == 0)
7200 return GET_MODE_SIZE (Pmode);
7201 }
7202
7203 return 0;
7204 }
7205
7206 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7207
7208 static bool
7209 ix86_legitimate_combined_insn (rtx_insn *insn)
7210 {
7211 /* Check operand constraints in case hard registers were propagated
7212 into insn pattern. This check prevents combine pass from
7213 generating insn patterns with invalid hard register operands.
7214 These invalid insns can eventually confuse reload to error out
7215 with a spill failure. See also PRs 46829 and 46843. */
7216 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7217 {
7218 int i;
7219
7220 extract_insn (insn);
7221 preprocess_constraints (insn);
7222
7223 int n_operands = recog_data.n_operands;
7224 int n_alternatives = recog_data.n_alternatives;
7225 for (i = 0; i < n_operands; i++)
7226 {
7227 rtx op = recog_data.operand[i];
7228 machine_mode mode = GET_MODE (op);
7229 const operand_alternative *op_alt;
7230 int offset = 0;
7231 bool win;
7232 int j;
7233
7234 /* For pre-AVX disallow unaligned loads/stores where the
7235 instructions don't support it. */
7236 if (!TARGET_AVX
7237 && VECTOR_MODE_P (GET_MODE (op))
7238 && misaligned_operand (op, GET_MODE (op)))
7239 {
7240 int min_align = get_attr_ssememalign (insn);
7241 if (min_align == 0)
7242 return false;
7243 }
7244
7245 /* A unary operator may be accepted by the predicate, but it
7246 is irrelevant for matching constraints. */
7247 if (UNARY_P (op))
7248 op = XEXP (op, 0);
7249
7250 if (SUBREG_P (op))
7251 {
7252 if (REG_P (SUBREG_REG (op))
7253 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7254 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7255 GET_MODE (SUBREG_REG (op)),
7256 SUBREG_BYTE (op),
7257 GET_MODE (op));
7258 op = SUBREG_REG (op);
7259 }
7260
7261 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7262 continue;
7263
7264 op_alt = recog_op_alt;
7265
7266 /* Operand has no constraints, anything is OK. */
7267 win = !n_alternatives;
7268
7269 alternative_mask preferred = get_preferred_alternatives (insn);
7270 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7271 {
7272 if (!TEST_BIT (preferred, j))
7273 continue;
7274 if (op_alt[i].anything_ok
7275 || (op_alt[i].matches != -1
7276 && operands_match_p
7277 (recog_data.operand[i],
7278 recog_data.operand[op_alt[i].matches]))
7279 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7280 {
7281 win = true;
7282 break;
7283 }
7284 }
7285
7286 if (!win)
7287 return false;
7288 }
7289 }
7290
7291 return true;
7292 }
7293 \f
7294 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7295
7296 static unsigned HOST_WIDE_INT
7297 ix86_asan_shadow_offset (void)
7298 {
7299 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7300 : HOST_WIDE_INT_C (0x7fff8000))
7301 : (HOST_WIDE_INT_1 << 29);
7302 }
7303 \f
7304 /* Argument support functions. */
7305
7306 /* Return true when register may be used to pass function parameters. */
7307 bool
7308 ix86_function_arg_regno_p (int regno)
7309 {
7310 int i;
7311 enum calling_abi call_abi;
7312 const int *parm_regs;
7313
7314 if (TARGET_MPX && BND_REGNO_P (regno))
7315 return true;
7316
7317 if (!TARGET_64BIT)
7318 {
7319 if (TARGET_MACHO)
7320 return (regno < REGPARM_MAX
7321 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7322 else
7323 return (regno < REGPARM_MAX
7324 || (TARGET_MMX && MMX_REGNO_P (regno)
7325 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7326 || (TARGET_SSE && SSE_REGNO_P (regno)
7327 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7328 }
7329
7330 if (TARGET_SSE && SSE_REGNO_P (regno)
7331 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7332 return true;
7333
7334 /* TODO: The function should depend on current function ABI but
7335 builtins.c would need updating then. Therefore we use the
7336 default ABI. */
7337 call_abi = ix86_cfun_abi ();
7338
7339 /* RAX is used as hidden argument to va_arg functions. */
7340 if (call_abi == SYSV_ABI && regno == AX_REG)
7341 return true;
7342
7343 if (call_abi == MS_ABI)
7344 parm_regs = x86_64_ms_abi_int_parameter_registers;
7345 else
7346 parm_regs = x86_64_int_parameter_registers;
7347
7348 for (i = 0; i < (call_abi == MS_ABI
7349 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7350 if (regno == parm_regs[i])
7351 return true;
7352 return false;
7353 }
7354
7355 /* Return if we do not know how to pass TYPE solely in registers. */
7356
7357 static bool
7358 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7359 {
7360 if (must_pass_in_stack_var_size_or_pad (mode, type))
7361 return true;
7362
7363 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7364 The layout_type routine is crafty and tries to trick us into passing
7365 currently unsupported vector types on the stack by using TImode. */
7366 return (!TARGET_64BIT && mode == TImode
7367 && type && TREE_CODE (type) != VECTOR_TYPE);
7368 }
7369
7370 /* It returns the size, in bytes, of the area reserved for arguments passed
7371 in registers for the function represented by fndecl dependent to the used
7372 abi format. */
7373 int
7374 ix86_reg_parm_stack_space (const_tree fndecl)
7375 {
7376 enum calling_abi call_abi = SYSV_ABI;
7377 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7378 call_abi = ix86_function_abi (fndecl);
7379 else
7380 call_abi = ix86_function_type_abi (fndecl);
7381 if (TARGET_64BIT && call_abi == MS_ABI)
7382 return 32;
7383 return 0;
7384 }
7385
7386 /* We add this as a workaround in order to use libc_has_function
7387 hook in i386.md. */
7388 bool
7389 ix86_libc_has_function (enum function_class fn_class)
7390 {
7391 return targetm.libc_has_function (fn_class);
7392 }
7393
7394 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7395 specifying the call abi used. */
7396 enum calling_abi
7397 ix86_function_type_abi (const_tree fntype)
7398 {
7399 enum calling_abi abi = ix86_abi;
7400
7401 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7402 return abi;
7403
7404 if (abi == SYSV_ABI
7405 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7406 {
7407 if (TARGET_X32)
7408 error ("X32 does not support ms_abi attribute");
7409
7410 abi = MS_ABI;
7411 }
7412 else if (abi == MS_ABI
7413 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7414 abi = SYSV_ABI;
7415
7416 return abi;
7417 }
7418
7419 static enum calling_abi
7420 ix86_function_abi (const_tree fndecl)
7421 {
7422 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7423 }
7424
7425 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7426 specifying the call abi used. */
7427 enum calling_abi
7428 ix86_cfun_abi (void)
7429 {
7430 return cfun ? cfun->machine->call_abi : ix86_abi;
7431 }
7432
7433 static bool
7434 ix86_function_ms_hook_prologue (const_tree fn)
7435 {
7436 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7437 {
7438 if (decl_function_context (fn) != NULL_TREE)
7439 error_at (DECL_SOURCE_LOCATION (fn),
7440 "ms_hook_prologue is not compatible with nested function");
7441 else
7442 return true;
7443 }
7444 return false;
7445 }
7446
7447 /* Write the extra assembler code needed to declare a function properly. */
7448
7449 void
7450 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7451 tree decl)
7452 {
7453 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7454
7455 if (is_ms_hook)
7456 {
7457 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7458 unsigned int filler_cc = 0xcccccccc;
7459
7460 for (i = 0; i < filler_count; i += 4)
7461 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7462 }
7463
7464 #ifdef SUBTARGET_ASM_UNWIND_INIT
7465 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7466 #endif
7467
7468 ASM_OUTPUT_LABEL (asm_out_file, fname);
7469
7470 /* Output magic byte marker, if hot-patch attribute is set. */
7471 if (is_ms_hook)
7472 {
7473 if (TARGET_64BIT)
7474 {
7475 /* leaq [%rsp + 0], %rsp */
7476 asm_fprintf (asm_out_file, ASM_BYTE
7477 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7478 }
7479 else
7480 {
7481 /* movl.s %edi, %edi
7482 push %ebp
7483 movl.s %esp, %ebp */
7484 asm_fprintf (asm_out_file, ASM_BYTE
7485 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7486 }
7487 }
7488 }
7489
7490 /* regclass.c */
7491 extern void init_regs (void);
7492
7493 /* Implementation of call abi switching target hook. Specific to FNDECL
7494 the specific call register sets are set. See also
7495 ix86_conditional_register_usage for more details. */
7496 void
7497 ix86_call_abi_override (const_tree fndecl)
7498 {
7499 cfun->machine->call_abi = ix86_function_abi (fndecl);
7500 }
7501
7502 /* Return 1 if pseudo register should be created and used to hold
7503 GOT address for PIC code. */
7504 bool
7505 ix86_use_pseudo_pic_reg (void)
7506 {
7507 if ((TARGET_64BIT
7508 && (ix86_cmodel == CM_SMALL_PIC
7509 || TARGET_PECOFF))
7510 || !flag_pic)
7511 return false;
7512 return true;
7513 }
7514
7515 /* Initialize large model PIC register. */
7516
7517 static void
7518 ix86_init_large_pic_reg (unsigned int tmp_regno)
7519 {
7520 rtx_code_label *label;
7521 rtx tmp_reg;
7522
7523 gcc_assert (Pmode == DImode);
7524 label = gen_label_rtx ();
7525 emit_label (label);
7526 LABEL_PRESERVE_P (label) = 1;
7527 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7528 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7529 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7530 label));
7531 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7532 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7533 pic_offset_table_rtx, tmp_reg));
7534 }
7535
7536 /* Create and initialize PIC register if required. */
7537 static void
7538 ix86_init_pic_reg (void)
7539 {
7540 edge entry_edge;
7541 rtx_insn *seq;
7542
7543 if (!ix86_use_pseudo_pic_reg ())
7544 return;
7545
7546 start_sequence ();
7547
7548 if (TARGET_64BIT)
7549 {
7550 if (ix86_cmodel == CM_LARGE_PIC)
7551 ix86_init_large_pic_reg (R11_REG);
7552 else
7553 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7554 }
7555 else
7556 {
7557 /* If there is future mcount call in the function it is more profitable
7558 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7559 rtx reg = crtl->profile
7560 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7561 : pic_offset_table_rtx;
7562 rtx_insn *insn = emit_insn (gen_set_got (reg));
7563 RTX_FRAME_RELATED_P (insn) = 1;
7564 if (crtl->profile)
7565 emit_move_insn (pic_offset_table_rtx, reg);
7566 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7567 }
7568
7569 seq = get_insns ();
7570 end_sequence ();
7571
7572 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7573 insert_insn_on_edge (seq, entry_edge);
7574 commit_one_edge_insertion (entry_edge);
7575 }
7576
7577 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7578 for a call to a function whose data type is FNTYPE.
7579 For a library call, FNTYPE is 0. */
7580
7581 void
7582 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7583 tree fntype, /* tree ptr for function decl */
7584 rtx libname, /* SYMBOL_REF of library name or 0 */
7585 tree fndecl,
7586 int caller)
7587 {
7588 struct cgraph_local_info *i = NULL;
7589 struct cgraph_node *target = NULL;
7590
7591 memset (cum, 0, sizeof (*cum));
7592
7593 if (fndecl)
7594 {
7595 target = cgraph_node::get (fndecl);
7596 if (target)
7597 {
7598 target = target->function_symbol ();
7599 i = cgraph_node::local_info (target->decl);
7600 cum->call_abi = ix86_function_abi (target->decl);
7601 }
7602 else
7603 cum->call_abi = ix86_function_abi (fndecl);
7604 }
7605 else
7606 cum->call_abi = ix86_function_type_abi (fntype);
7607
7608 cum->caller = caller;
7609
7610 /* Set up the number of registers to use for passing arguments. */
7611 cum->nregs = ix86_regparm;
7612 if (TARGET_64BIT)
7613 {
7614 cum->nregs = (cum->call_abi == SYSV_ABI
7615 ? X86_64_REGPARM_MAX
7616 : X86_64_MS_REGPARM_MAX);
7617 }
7618 if (TARGET_SSE)
7619 {
7620 cum->sse_nregs = SSE_REGPARM_MAX;
7621 if (TARGET_64BIT)
7622 {
7623 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7624 ? X86_64_SSE_REGPARM_MAX
7625 : X86_64_MS_SSE_REGPARM_MAX);
7626 }
7627 }
7628 if (TARGET_MMX)
7629 cum->mmx_nregs = MMX_REGPARM_MAX;
7630 cum->warn_avx512f = true;
7631 cum->warn_avx = true;
7632 cum->warn_sse = true;
7633 cum->warn_mmx = true;
7634
7635 /* Because type might mismatch in between caller and callee, we need to
7636 use actual type of function for local calls.
7637 FIXME: cgraph_analyze can be told to actually record if function uses
7638 va_start so for local functions maybe_vaarg can be made aggressive
7639 helping K&R code.
7640 FIXME: once typesytem is fixed, we won't need this code anymore. */
7641 if (i && i->local && i->can_change_signature)
7642 fntype = TREE_TYPE (target->decl);
7643 cum->stdarg = stdarg_p (fntype);
7644 cum->maybe_vaarg = (fntype
7645 ? (!prototype_p (fntype) || stdarg_p (fntype))
7646 : !libname);
7647
7648 cum->bnd_regno = FIRST_BND_REG;
7649 cum->bnds_in_bt = 0;
7650 cum->force_bnd_pass = 0;
7651 cum->decl = fndecl;
7652
7653 if (!TARGET_64BIT)
7654 {
7655 /* If there are variable arguments, then we won't pass anything
7656 in registers in 32-bit mode. */
7657 if (stdarg_p (fntype))
7658 {
7659 cum->nregs = 0;
7660 /* Since in 32-bit, variable arguments are always passed on
7661 stack, there is scratch register available for indirect
7662 sibcall. */
7663 cfun->machine->arg_reg_available = true;
7664 cum->sse_nregs = 0;
7665 cum->mmx_nregs = 0;
7666 cum->warn_avx512f = false;
7667 cum->warn_avx = false;
7668 cum->warn_sse = false;
7669 cum->warn_mmx = false;
7670 return;
7671 }
7672
7673 /* Use ecx and edx registers if function has fastcall attribute,
7674 else look for regparm information. */
7675 if (fntype)
7676 {
7677 unsigned int ccvt = ix86_get_callcvt (fntype);
7678 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7679 {
7680 cum->nregs = 1;
7681 cum->fastcall = 1; /* Same first register as in fastcall. */
7682 }
7683 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7684 {
7685 cum->nregs = 2;
7686 cum->fastcall = 1;
7687 }
7688 else
7689 cum->nregs = ix86_function_regparm (fntype, fndecl);
7690 }
7691
7692 /* Set up the number of SSE registers used for passing SFmode
7693 and DFmode arguments. Warn for mismatching ABI. */
7694 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7695 }
7696
7697 cfun->machine->arg_reg_available = (cum->nregs > 0);
7698 }
7699
7700 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7701 But in the case of vector types, it is some vector mode.
7702
7703 When we have only some of our vector isa extensions enabled, then there
7704 are some modes for which vector_mode_supported_p is false. For these
7705 modes, the generic vector support in gcc will choose some non-vector mode
7706 in order to implement the type. By computing the natural mode, we'll
7707 select the proper ABI location for the operand and not depend on whatever
7708 the middle-end decides to do with these vector types.
7709
7710 The midde-end can't deal with the vector types > 16 bytes. In this
7711 case, we return the original mode and warn ABI change if CUM isn't
7712 NULL.
7713
7714 If INT_RETURN is true, warn ABI change if the vector mode isn't
7715 available for function return value. */
7716
7717 static machine_mode
7718 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7719 bool in_return)
7720 {
7721 machine_mode mode = TYPE_MODE (type);
7722
7723 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7724 {
7725 HOST_WIDE_INT size = int_size_in_bytes (type);
7726 if ((size == 8 || size == 16 || size == 32 || size == 64)
7727 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7728 && TYPE_VECTOR_SUBPARTS (type) > 1)
7729 {
7730 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7731
7732 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7733 mode = MIN_MODE_VECTOR_FLOAT;
7734 else
7735 mode = MIN_MODE_VECTOR_INT;
7736
7737 /* Get the mode which has this inner mode and number of units. */
7738 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7739 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7740 && GET_MODE_INNER (mode) == innermode)
7741 {
7742 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7743 {
7744 static bool warnedavx512f;
7745 static bool warnedavx512f_ret;
7746
7747 if (cum && cum->warn_avx512f && !warnedavx512f)
7748 {
7749 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7750 "without AVX512F enabled changes the ABI"))
7751 warnedavx512f = true;
7752 }
7753 else if (in_return && !warnedavx512f_ret)
7754 {
7755 if (warning (OPT_Wpsabi, "AVX512F vector return "
7756 "without AVX512F enabled changes the ABI"))
7757 warnedavx512f_ret = true;
7758 }
7759
7760 return TYPE_MODE (type);
7761 }
7762 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7763 {
7764 static bool warnedavx;
7765 static bool warnedavx_ret;
7766
7767 if (cum && cum->warn_avx && !warnedavx)
7768 {
7769 if (warning (OPT_Wpsabi, "AVX vector argument "
7770 "without AVX enabled changes the ABI"))
7771 warnedavx = true;
7772 }
7773 else if (in_return && !warnedavx_ret)
7774 {
7775 if (warning (OPT_Wpsabi, "AVX vector return "
7776 "without AVX enabled changes the ABI"))
7777 warnedavx_ret = true;
7778 }
7779
7780 return TYPE_MODE (type);
7781 }
7782 else if (((size == 8 && TARGET_64BIT) || size == 16)
7783 && !TARGET_SSE
7784 && !TARGET_IAMCU)
7785 {
7786 static bool warnedsse;
7787 static bool warnedsse_ret;
7788
7789 if (cum && cum->warn_sse && !warnedsse)
7790 {
7791 if (warning (OPT_Wpsabi, "SSE vector argument "
7792 "without SSE enabled changes the ABI"))
7793 warnedsse = true;
7794 }
7795 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7796 {
7797 if (warning (OPT_Wpsabi, "SSE vector return "
7798 "without SSE enabled changes the ABI"))
7799 warnedsse_ret = true;
7800 }
7801 }
7802 else if ((size == 8 && !TARGET_64BIT)
7803 && !TARGET_MMX
7804 && !TARGET_IAMCU)
7805 {
7806 static bool warnedmmx;
7807 static bool warnedmmx_ret;
7808
7809 if (cum && cum->warn_mmx && !warnedmmx)
7810 {
7811 if (warning (OPT_Wpsabi, "MMX vector argument "
7812 "without MMX enabled changes the ABI"))
7813 warnedmmx = true;
7814 }
7815 else if (in_return && !warnedmmx_ret)
7816 {
7817 if (warning (OPT_Wpsabi, "MMX vector return "
7818 "without MMX enabled changes the ABI"))
7819 warnedmmx_ret = true;
7820 }
7821 }
7822 return mode;
7823 }
7824
7825 gcc_unreachable ();
7826 }
7827 }
7828
7829 return mode;
7830 }
7831
7832 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7833 this may not agree with the mode that the type system has chosen for the
7834 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7835 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7836
7837 static rtx
7838 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7839 unsigned int regno)
7840 {
7841 rtx tmp;
7842
7843 if (orig_mode != BLKmode)
7844 tmp = gen_rtx_REG (orig_mode, regno);
7845 else
7846 {
7847 tmp = gen_rtx_REG (mode, regno);
7848 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7849 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7850 }
7851
7852 return tmp;
7853 }
7854
7855 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7856 of this code is to classify each 8bytes of incoming argument by the register
7857 class and assign registers accordingly. */
7858
7859 /* Return the union class of CLASS1 and CLASS2.
7860 See the x86-64 PS ABI for details. */
7861
7862 static enum x86_64_reg_class
7863 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7864 {
7865 /* Rule #1: If both classes are equal, this is the resulting class. */
7866 if (class1 == class2)
7867 return class1;
7868
7869 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7870 the other class. */
7871 if (class1 == X86_64_NO_CLASS)
7872 return class2;
7873 if (class2 == X86_64_NO_CLASS)
7874 return class1;
7875
7876 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7877 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7878 return X86_64_MEMORY_CLASS;
7879
7880 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7881 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7882 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7883 return X86_64_INTEGERSI_CLASS;
7884 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7885 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7886 return X86_64_INTEGER_CLASS;
7887
7888 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7889 MEMORY is used. */
7890 if (class1 == X86_64_X87_CLASS
7891 || class1 == X86_64_X87UP_CLASS
7892 || class1 == X86_64_COMPLEX_X87_CLASS
7893 || class2 == X86_64_X87_CLASS
7894 || class2 == X86_64_X87UP_CLASS
7895 || class2 == X86_64_COMPLEX_X87_CLASS)
7896 return X86_64_MEMORY_CLASS;
7897
7898 /* Rule #6: Otherwise class SSE is used. */
7899 return X86_64_SSE_CLASS;
7900 }
7901
7902 /* Classify the argument of type TYPE and mode MODE.
7903 CLASSES will be filled by the register class used to pass each word
7904 of the operand. The number of words is returned. In case the parameter
7905 should be passed in memory, 0 is returned. As a special case for zero
7906 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7907
7908 BIT_OFFSET is used internally for handling records and specifies offset
7909 of the offset in bits modulo 512 to avoid overflow cases.
7910
7911 See the x86-64 PS ABI for details.
7912 */
7913
7914 static int
7915 classify_argument (machine_mode mode, const_tree type,
7916 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7917 {
7918 HOST_WIDE_INT bytes =
7919 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7920 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7921
7922 /* Variable sized entities are always passed/returned in memory. */
7923 if (bytes < 0)
7924 return 0;
7925
7926 if (mode != VOIDmode
7927 && targetm.calls.must_pass_in_stack (mode, type))
7928 return 0;
7929
7930 if (type && AGGREGATE_TYPE_P (type))
7931 {
7932 int i;
7933 tree field;
7934 enum x86_64_reg_class subclasses[MAX_CLASSES];
7935
7936 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7937 if (bytes > 64)
7938 return 0;
7939
7940 for (i = 0; i < words; i++)
7941 classes[i] = X86_64_NO_CLASS;
7942
7943 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7944 signalize memory class, so handle it as special case. */
7945 if (!words)
7946 {
7947 classes[0] = X86_64_NO_CLASS;
7948 return 1;
7949 }
7950
7951 /* Classify each field of record and merge classes. */
7952 switch (TREE_CODE (type))
7953 {
7954 case RECORD_TYPE:
7955 /* And now merge the fields of structure. */
7956 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7957 {
7958 if (TREE_CODE (field) == FIELD_DECL)
7959 {
7960 int num;
7961
7962 if (TREE_TYPE (field) == error_mark_node)
7963 continue;
7964
7965 /* Bitfields are always classified as integer. Handle them
7966 early, since later code would consider them to be
7967 misaligned integers. */
7968 if (DECL_BIT_FIELD (field))
7969 {
7970 for (i = (int_bit_position (field)
7971 + (bit_offset % 64)) / 8 / 8;
7972 i < ((int_bit_position (field) + (bit_offset % 64))
7973 + tree_to_shwi (DECL_SIZE (field))
7974 + 63) / 8 / 8; i++)
7975 classes[i] =
7976 merge_classes (X86_64_INTEGER_CLASS,
7977 classes[i]);
7978 }
7979 else
7980 {
7981 int pos;
7982
7983 type = TREE_TYPE (field);
7984
7985 /* Flexible array member is ignored. */
7986 if (TYPE_MODE (type) == BLKmode
7987 && TREE_CODE (type) == ARRAY_TYPE
7988 && TYPE_SIZE (type) == NULL_TREE
7989 && TYPE_DOMAIN (type) != NULL_TREE
7990 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7991 == NULL_TREE))
7992 {
7993 static bool warned;
7994
7995 if (!warned && warn_psabi)
7996 {
7997 warned = true;
7998 inform (input_location,
7999 "the ABI of passing struct with"
8000 " a flexible array member has"
8001 " changed in GCC 4.4");
8002 }
8003 continue;
8004 }
8005 num = classify_argument (TYPE_MODE (type), type,
8006 subclasses,
8007 (int_bit_position (field)
8008 + bit_offset) % 512);
8009 if (!num)
8010 return 0;
8011 pos = (int_bit_position (field)
8012 + (bit_offset % 64)) / 8 / 8;
8013 for (i = 0; i < num && (i + pos) < words; i++)
8014 classes[i + pos] =
8015 merge_classes (subclasses[i], classes[i + pos]);
8016 }
8017 }
8018 }
8019 break;
8020
8021 case ARRAY_TYPE:
8022 /* Arrays are handled as small records. */
8023 {
8024 int num;
8025 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8026 TREE_TYPE (type), subclasses, bit_offset);
8027 if (!num)
8028 return 0;
8029
8030 /* The partial classes are now full classes. */
8031 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8032 subclasses[0] = X86_64_SSE_CLASS;
8033 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8034 && !((bit_offset % 64) == 0 && bytes == 4))
8035 subclasses[0] = X86_64_INTEGER_CLASS;
8036
8037 for (i = 0; i < words; i++)
8038 classes[i] = subclasses[i % num];
8039
8040 break;
8041 }
8042 case UNION_TYPE:
8043 case QUAL_UNION_TYPE:
8044 /* Unions are similar to RECORD_TYPE but offset is always 0.
8045 */
8046 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8047 {
8048 if (TREE_CODE (field) == FIELD_DECL)
8049 {
8050 int num;
8051
8052 if (TREE_TYPE (field) == error_mark_node)
8053 continue;
8054
8055 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8056 TREE_TYPE (field), subclasses,
8057 bit_offset);
8058 if (!num)
8059 return 0;
8060 for (i = 0; i < num && i < words; i++)
8061 classes[i] = merge_classes (subclasses[i], classes[i]);
8062 }
8063 }
8064 break;
8065
8066 default:
8067 gcc_unreachable ();
8068 }
8069
8070 if (words > 2)
8071 {
8072 /* When size > 16 bytes, if the first one isn't
8073 X86_64_SSE_CLASS or any other ones aren't
8074 X86_64_SSEUP_CLASS, everything should be passed in
8075 memory. */
8076 if (classes[0] != X86_64_SSE_CLASS)
8077 return 0;
8078
8079 for (i = 1; i < words; i++)
8080 if (classes[i] != X86_64_SSEUP_CLASS)
8081 return 0;
8082 }
8083
8084 /* Final merger cleanup. */
8085 for (i = 0; i < words; i++)
8086 {
8087 /* If one class is MEMORY, everything should be passed in
8088 memory. */
8089 if (classes[i] == X86_64_MEMORY_CLASS)
8090 return 0;
8091
8092 /* The X86_64_SSEUP_CLASS should be always preceded by
8093 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8094 if (classes[i] == X86_64_SSEUP_CLASS
8095 && classes[i - 1] != X86_64_SSE_CLASS
8096 && classes[i - 1] != X86_64_SSEUP_CLASS)
8097 {
8098 /* The first one should never be X86_64_SSEUP_CLASS. */
8099 gcc_assert (i != 0);
8100 classes[i] = X86_64_SSE_CLASS;
8101 }
8102
8103 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8104 everything should be passed in memory. */
8105 if (classes[i] == X86_64_X87UP_CLASS
8106 && (classes[i - 1] != X86_64_X87_CLASS))
8107 {
8108 static bool warned;
8109
8110 /* The first one should never be X86_64_X87UP_CLASS. */
8111 gcc_assert (i != 0);
8112 if (!warned && warn_psabi)
8113 {
8114 warned = true;
8115 inform (input_location,
8116 "the ABI of passing union with long double"
8117 " has changed in GCC 4.4");
8118 }
8119 return 0;
8120 }
8121 }
8122 return words;
8123 }
8124
8125 /* Compute alignment needed. We align all types to natural boundaries with
8126 exception of XFmode that is aligned to 64bits. */
8127 if (mode != VOIDmode && mode != BLKmode)
8128 {
8129 int mode_alignment = GET_MODE_BITSIZE (mode);
8130
8131 if (mode == XFmode)
8132 mode_alignment = 128;
8133 else if (mode == XCmode)
8134 mode_alignment = 256;
8135 if (COMPLEX_MODE_P (mode))
8136 mode_alignment /= 2;
8137 /* Misaligned fields are always returned in memory. */
8138 if (bit_offset % mode_alignment)
8139 return 0;
8140 }
8141
8142 /* for V1xx modes, just use the base mode */
8143 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8144 && GET_MODE_UNIT_SIZE (mode) == bytes)
8145 mode = GET_MODE_INNER (mode);
8146
8147 /* Classification of atomic types. */
8148 switch (mode)
8149 {
8150 case SDmode:
8151 case DDmode:
8152 classes[0] = X86_64_SSE_CLASS;
8153 return 1;
8154 case TDmode:
8155 classes[0] = X86_64_SSE_CLASS;
8156 classes[1] = X86_64_SSEUP_CLASS;
8157 return 2;
8158 case DImode:
8159 case SImode:
8160 case HImode:
8161 case QImode:
8162 case CSImode:
8163 case CHImode:
8164 case CQImode:
8165 {
8166 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8167
8168 /* Analyze last 128 bits only. */
8169 size = (size - 1) & 0x7f;
8170
8171 if (size < 32)
8172 {
8173 classes[0] = X86_64_INTEGERSI_CLASS;
8174 return 1;
8175 }
8176 else if (size < 64)
8177 {
8178 classes[0] = X86_64_INTEGER_CLASS;
8179 return 1;
8180 }
8181 else if (size < 64+32)
8182 {
8183 classes[0] = X86_64_INTEGER_CLASS;
8184 classes[1] = X86_64_INTEGERSI_CLASS;
8185 return 2;
8186 }
8187 else if (size < 64+64)
8188 {
8189 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8190 return 2;
8191 }
8192 else
8193 gcc_unreachable ();
8194 }
8195 case CDImode:
8196 case TImode:
8197 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8198 return 2;
8199 case COImode:
8200 case OImode:
8201 /* OImode shouldn't be used directly. */
8202 gcc_unreachable ();
8203 case CTImode:
8204 return 0;
8205 case SFmode:
8206 if (!(bit_offset % 64))
8207 classes[0] = X86_64_SSESF_CLASS;
8208 else
8209 classes[0] = X86_64_SSE_CLASS;
8210 return 1;
8211 case DFmode:
8212 classes[0] = X86_64_SSEDF_CLASS;
8213 return 1;
8214 case XFmode:
8215 classes[0] = X86_64_X87_CLASS;
8216 classes[1] = X86_64_X87UP_CLASS;
8217 return 2;
8218 case TFmode:
8219 classes[0] = X86_64_SSE_CLASS;
8220 classes[1] = X86_64_SSEUP_CLASS;
8221 return 2;
8222 case SCmode:
8223 classes[0] = X86_64_SSE_CLASS;
8224 if (!(bit_offset % 64))
8225 return 1;
8226 else
8227 {
8228 static bool warned;
8229
8230 if (!warned && warn_psabi)
8231 {
8232 warned = true;
8233 inform (input_location,
8234 "the ABI of passing structure with complex float"
8235 " member has changed in GCC 4.4");
8236 }
8237 classes[1] = X86_64_SSESF_CLASS;
8238 return 2;
8239 }
8240 case DCmode:
8241 classes[0] = X86_64_SSEDF_CLASS;
8242 classes[1] = X86_64_SSEDF_CLASS;
8243 return 2;
8244 case XCmode:
8245 classes[0] = X86_64_COMPLEX_X87_CLASS;
8246 return 1;
8247 case TCmode:
8248 /* This modes is larger than 16 bytes. */
8249 return 0;
8250 case V8SFmode:
8251 case V8SImode:
8252 case V32QImode:
8253 case V16HImode:
8254 case V4DFmode:
8255 case V4DImode:
8256 classes[0] = X86_64_SSE_CLASS;
8257 classes[1] = X86_64_SSEUP_CLASS;
8258 classes[2] = X86_64_SSEUP_CLASS;
8259 classes[3] = X86_64_SSEUP_CLASS;
8260 return 4;
8261 case V8DFmode:
8262 case V16SFmode:
8263 case V8DImode:
8264 case V16SImode:
8265 case V32HImode:
8266 case V64QImode:
8267 classes[0] = X86_64_SSE_CLASS;
8268 classes[1] = X86_64_SSEUP_CLASS;
8269 classes[2] = X86_64_SSEUP_CLASS;
8270 classes[3] = X86_64_SSEUP_CLASS;
8271 classes[4] = X86_64_SSEUP_CLASS;
8272 classes[5] = X86_64_SSEUP_CLASS;
8273 classes[6] = X86_64_SSEUP_CLASS;
8274 classes[7] = X86_64_SSEUP_CLASS;
8275 return 8;
8276 case V4SFmode:
8277 case V4SImode:
8278 case V16QImode:
8279 case V8HImode:
8280 case V2DFmode:
8281 case V2DImode:
8282 classes[0] = X86_64_SSE_CLASS;
8283 classes[1] = X86_64_SSEUP_CLASS;
8284 return 2;
8285 case V1TImode:
8286 case V1DImode:
8287 case V2SFmode:
8288 case V2SImode:
8289 case V4HImode:
8290 case V8QImode:
8291 classes[0] = X86_64_SSE_CLASS;
8292 return 1;
8293 case BLKmode:
8294 case VOIDmode:
8295 return 0;
8296 default:
8297 gcc_assert (VECTOR_MODE_P (mode));
8298
8299 if (bytes > 16)
8300 return 0;
8301
8302 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8303
8304 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8305 classes[0] = X86_64_INTEGERSI_CLASS;
8306 else
8307 classes[0] = X86_64_INTEGER_CLASS;
8308 classes[1] = X86_64_INTEGER_CLASS;
8309 return 1 + (bytes > 8);
8310 }
8311 }
8312
8313 /* Examine the argument and return set number of register required in each
8314 class. Return true iff parameter should be passed in memory. */
8315
8316 static bool
8317 examine_argument (machine_mode mode, const_tree type, int in_return,
8318 int *int_nregs, int *sse_nregs)
8319 {
8320 enum x86_64_reg_class regclass[MAX_CLASSES];
8321 int n = classify_argument (mode, type, regclass, 0);
8322
8323 *int_nregs = 0;
8324 *sse_nregs = 0;
8325
8326 if (!n)
8327 return true;
8328 for (n--; n >= 0; n--)
8329 switch (regclass[n])
8330 {
8331 case X86_64_INTEGER_CLASS:
8332 case X86_64_INTEGERSI_CLASS:
8333 (*int_nregs)++;
8334 break;
8335 case X86_64_SSE_CLASS:
8336 case X86_64_SSESF_CLASS:
8337 case X86_64_SSEDF_CLASS:
8338 (*sse_nregs)++;
8339 break;
8340 case X86_64_NO_CLASS:
8341 case X86_64_SSEUP_CLASS:
8342 break;
8343 case X86_64_X87_CLASS:
8344 case X86_64_X87UP_CLASS:
8345 case X86_64_COMPLEX_X87_CLASS:
8346 if (!in_return)
8347 return true;
8348 break;
8349 case X86_64_MEMORY_CLASS:
8350 gcc_unreachable ();
8351 }
8352
8353 return false;
8354 }
8355
8356 /* Construct container for the argument used by GCC interface. See
8357 FUNCTION_ARG for the detailed description. */
8358
8359 static rtx
8360 construct_container (machine_mode mode, machine_mode orig_mode,
8361 const_tree type, int in_return, int nintregs, int nsseregs,
8362 const int *intreg, int sse_regno)
8363 {
8364 /* The following variables hold the static issued_error state. */
8365 static bool issued_sse_arg_error;
8366 static bool issued_sse_ret_error;
8367 static bool issued_x87_ret_error;
8368
8369 machine_mode tmpmode;
8370 int bytes =
8371 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8372 enum x86_64_reg_class regclass[MAX_CLASSES];
8373 int n;
8374 int i;
8375 int nexps = 0;
8376 int needed_sseregs, needed_intregs;
8377 rtx exp[MAX_CLASSES];
8378 rtx ret;
8379
8380 n = classify_argument (mode, type, regclass, 0);
8381 if (!n)
8382 return NULL;
8383 if (examine_argument (mode, type, in_return, &needed_intregs,
8384 &needed_sseregs))
8385 return NULL;
8386 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8387 return NULL;
8388
8389 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8390 some less clueful developer tries to use floating-point anyway. */
8391 if (needed_sseregs && !TARGET_SSE)
8392 {
8393 if (in_return)
8394 {
8395 if (!issued_sse_ret_error)
8396 {
8397 error ("SSE register return with SSE disabled");
8398 issued_sse_ret_error = true;
8399 }
8400 }
8401 else if (!issued_sse_arg_error)
8402 {
8403 error ("SSE register argument with SSE disabled");
8404 issued_sse_arg_error = true;
8405 }
8406 return NULL;
8407 }
8408
8409 /* Likewise, error if the ABI requires us to return values in the
8410 x87 registers and the user specified -mno-80387. */
8411 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8412 for (i = 0; i < n; i++)
8413 if (regclass[i] == X86_64_X87_CLASS
8414 || regclass[i] == X86_64_X87UP_CLASS
8415 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8416 {
8417 if (!issued_x87_ret_error)
8418 {
8419 error ("x87 register return with x87 disabled");
8420 issued_x87_ret_error = true;
8421 }
8422 return NULL;
8423 }
8424
8425 /* First construct simple cases. Avoid SCmode, since we want to use
8426 single register to pass this type. */
8427 if (n == 1 && mode != SCmode)
8428 switch (regclass[0])
8429 {
8430 case X86_64_INTEGER_CLASS:
8431 case X86_64_INTEGERSI_CLASS:
8432 return gen_rtx_REG (mode, intreg[0]);
8433 case X86_64_SSE_CLASS:
8434 case X86_64_SSESF_CLASS:
8435 case X86_64_SSEDF_CLASS:
8436 if (mode != BLKmode)
8437 return gen_reg_or_parallel (mode, orig_mode,
8438 SSE_REGNO (sse_regno));
8439 break;
8440 case X86_64_X87_CLASS:
8441 case X86_64_COMPLEX_X87_CLASS:
8442 return gen_rtx_REG (mode, FIRST_STACK_REG);
8443 case X86_64_NO_CLASS:
8444 /* Zero sized array, struct or class. */
8445 return NULL;
8446 default:
8447 gcc_unreachable ();
8448 }
8449 if (n == 2
8450 && regclass[0] == X86_64_SSE_CLASS
8451 && regclass[1] == X86_64_SSEUP_CLASS
8452 && mode != BLKmode)
8453 return gen_reg_or_parallel (mode, orig_mode,
8454 SSE_REGNO (sse_regno));
8455 if (n == 4
8456 && regclass[0] == X86_64_SSE_CLASS
8457 && regclass[1] == X86_64_SSEUP_CLASS
8458 && regclass[2] == X86_64_SSEUP_CLASS
8459 && regclass[3] == X86_64_SSEUP_CLASS
8460 && mode != BLKmode)
8461 return gen_reg_or_parallel (mode, orig_mode,
8462 SSE_REGNO (sse_regno));
8463 if (n == 8
8464 && regclass[0] == X86_64_SSE_CLASS
8465 && regclass[1] == X86_64_SSEUP_CLASS
8466 && regclass[2] == X86_64_SSEUP_CLASS
8467 && regclass[3] == X86_64_SSEUP_CLASS
8468 && regclass[4] == X86_64_SSEUP_CLASS
8469 && regclass[5] == X86_64_SSEUP_CLASS
8470 && regclass[6] == X86_64_SSEUP_CLASS
8471 && regclass[7] == X86_64_SSEUP_CLASS
8472 && mode != BLKmode)
8473 return gen_reg_or_parallel (mode, orig_mode,
8474 SSE_REGNO (sse_regno));
8475 if (n == 2
8476 && regclass[0] == X86_64_X87_CLASS
8477 && regclass[1] == X86_64_X87UP_CLASS)
8478 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8479
8480 if (n == 2
8481 && regclass[0] == X86_64_INTEGER_CLASS
8482 && regclass[1] == X86_64_INTEGER_CLASS
8483 && (mode == CDImode || mode == TImode)
8484 && intreg[0] + 1 == intreg[1])
8485 return gen_rtx_REG (mode, intreg[0]);
8486
8487 /* Otherwise figure out the entries of the PARALLEL. */
8488 for (i = 0; i < n; i++)
8489 {
8490 int pos;
8491
8492 switch (regclass[i])
8493 {
8494 case X86_64_NO_CLASS:
8495 break;
8496 case X86_64_INTEGER_CLASS:
8497 case X86_64_INTEGERSI_CLASS:
8498 /* Merge TImodes on aligned occasions here too. */
8499 if (i * 8 + 8 > bytes)
8500 tmpmode
8501 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8502 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8503 tmpmode = SImode;
8504 else
8505 tmpmode = DImode;
8506 /* We've requested 24 bytes we
8507 don't have mode for. Use DImode. */
8508 if (tmpmode == BLKmode)
8509 tmpmode = DImode;
8510 exp [nexps++]
8511 = gen_rtx_EXPR_LIST (VOIDmode,
8512 gen_rtx_REG (tmpmode, *intreg),
8513 GEN_INT (i*8));
8514 intreg++;
8515 break;
8516 case X86_64_SSESF_CLASS:
8517 exp [nexps++]
8518 = gen_rtx_EXPR_LIST (VOIDmode,
8519 gen_rtx_REG (SFmode,
8520 SSE_REGNO (sse_regno)),
8521 GEN_INT (i*8));
8522 sse_regno++;
8523 break;
8524 case X86_64_SSEDF_CLASS:
8525 exp [nexps++]
8526 = gen_rtx_EXPR_LIST (VOIDmode,
8527 gen_rtx_REG (DFmode,
8528 SSE_REGNO (sse_regno)),
8529 GEN_INT (i*8));
8530 sse_regno++;
8531 break;
8532 case X86_64_SSE_CLASS:
8533 pos = i;
8534 switch (n)
8535 {
8536 case 1:
8537 tmpmode = DImode;
8538 break;
8539 case 2:
8540 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8541 {
8542 tmpmode = TImode;
8543 i++;
8544 }
8545 else
8546 tmpmode = DImode;
8547 break;
8548 case 4:
8549 gcc_assert (i == 0
8550 && regclass[1] == X86_64_SSEUP_CLASS
8551 && regclass[2] == X86_64_SSEUP_CLASS
8552 && regclass[3] == X86_64_SSEUP_CLASS);
8553 tmpmode = OImode;
8554 i += 3;
8555 break;
8556 case 8:
8557 gcc_assert (i == 0
8558 && regclass[1] == X86_64_SSEUP_CLASS
8559 && regclass[2] == X86_64_SSEUP_CLASS
8560 && regclass[3] == X86_64_SSEUP_CLASS
8561 && regclass[4] == X86_64_SSEUP_CLASS
8562 && regclass[5] == X86_64_SSEUP_CLASS
8563 && regclass[6] == X86_64_SSEUP_CLASS
8564 && regclass[7] == X86_64_SSEUP_CLASS);
8565 tmpmode = XImode;
8566 i += 7;
8567 break;
8568 default:
8569 gcc_unreachable ();
8570 }
8571 exp [nexps++]
8572 = gen_rtx_EXPR_LIST (VOIDmode,
8573 gen_rtx_REG (tmpmode,
8574 SSE_REGNO (sse_regno)),
8575 GEN_INT (pos*8));
8576 sse_regno++;
8577 break;
8578 default:
8579 gcc_unreachable ();
8580 }
8581 }
8582
8583 /* Empty aligned struct, union or class. */
8584 if (nexps == 0)
8585 return NULL;
8586
8587 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8588 for (i = 0; i < nexps; i++)
8589 XVECEXP (ret, 0, i) = exp [i];
8590 return ret;
8591 }
8592
8593 /* Update the data in CUM to advance over an argument of mode MODE
8594 and data type TYPE. (TYPE is null for libcalls where that information
8595 may not be available.)
8596
8597 Return a number of integer regsiters advanced over. */
8598
8599 static int
8600 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8601 const_tree type, HOST_WIDE_INT bytes,
8602 HOST_WIDE_INT words)
8603 {
8604 int res = 0;
8605 bool error_p = NULL;
8606
8607 if (TARGET_IAMCU)
8608 {
8609 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8610 bytes in registers. */
8611 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8612 goto pass_in_reg;
8613 return res;
8614 }
8615
8616 switch (mode)
8617 {
8618 default:
8619 break;
8620
8621 case BLKmode:
8622 if (bytes < 0)
8623 break;
8624 /* FALLTHRU */
8625
8626 case DImode:
8627 case SImode:
8628 case HImode:
8629 case QImode:
8630 pass_in_reg:
8631 cum->words += words;
8632 cum->nregs -= words;
8633 cum->regno += words;
8634 if (cum->nregs >= 0)
8635 res = words;
8636 if (cum->nregs <= 0)
8637 {
8638 cum->nregs = 0;
8639 cfun->machine->arg_reg_available = false;
8640 cum->regno = 0;
8641 }
8642 break;
8643
8644 case OImode:
8645 /* OImode shouldn't be used directly. */
8646 gcc_unreachable ();
8647
8648 case DFmode:
8649 if (cum->float_in_sse == -1)
8650 error_p = 1;
8651 if (cum->float_in_sse < 2)
8652 break;
8653 case SFmode:
8654 if (cum->float_in_sse == -1)
8655 error_p = 1;
8656 if (cum->float_in_sse < 1)
8657 break;
8658 /* FALLTHRU */
8659
8660 case V8SFmode:
8661 case V8SImode:
8662 case V64QImode:
8663 case V32HImode:
8664 case V16SImode:
8665 case V8DImode:
8666 case V16SFmode:
8667 case V8DFmode:
8668 case V32QImode:
8669 case V16HImode:
8670 case V4DFmode:
8671 case V4DImode:
8672 case TImode:
8673 case V16QImode:
8674 case V8HImode:
8675 case V4SImode:
8676 case V2DImode:
8677 case V4SFmode:
8678 case V2DFmode:
8679 if (!type || !AGGREGATE_TYPE_P (type))
8680 {
8681 cum->sse_words += words;
8682 cum->sse_nregs -= 1;
8683 cum->sse_regno += 1;
8684 if (cum->sse_nregs <= 0)
8685 {
8686 cum->sse_nregs = 0;
8687 cum->sse_regno = 0;
8688 }
8689 }
8690 break;
8691
8692 case V8QImode:
8693 case V4HImode:
8694 case V2SImode:
8695 case V2SFmode:
8696 case V1TImode:
8697 case V1DImode:
8698 if (!type || !AGGREGATE_TYPE_P (type))
8699 {
8700 cum->mmx_words += words;
8701 cum->mmx_nregs -= 1;
8702 cum->mmx_regno += 1;
8703 if (cum->mmx_nregs <= 0)
8704 {
8705 cum->mmx_nregs = 0;
8706 cum->mmx_regno = 0;
8707 }
8708 }
8709 break;
8710 }
8711 if (error_p)
8712 {
8713 cum->float_in_sse = 0;
8714 error ("calling %qD with SSE calling convention without "
8715 "SSE/SSE2 enabled", cum->decl);
8716 sorry ("this is a GCC bug that can be worked around by adding "
8717 "attribute used to function called");
8718 }
8719
8720 return res;
8721 }
8722
8723 static int
8724 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8725 const_tree type, HOST_WIDE_INT words, bool named)
8726 {
8727 int int_nregs, sse_nregs;
8728
8729 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8730 if (!named && (VALID_AVX512F_REG_MODE (mode)
8731 || VALID_AVX256_REG_MODE (mode)))
8732 return 0;
8733
8734 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8735 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8736 {
8737 cum->nregs -= int_nregs;
8738 cum->sse_nregs -= sse_nregs;
8739 cum->regno += int_nregs;
8740 cum->sse_regno += sse_nregs;
8741 return int_nregs;
8742 }
8743 else
8744 {
8745 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8746 cum->words = ROUND_UP (cum->words, align);
8747 cum->words += words;
8748 return 0;
8749 }
8750 }
8751
8752 static int
8753 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8754 HOST_WIDE_INT words)
8755 {
8756 /* Otherwise, this should be passed indirect. */
8757 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8758
8759 cum->words += words;
8760 if (cum->nregs > 0)
8761 {
8762 cum->nregs -= 1;
8763 cum->regno += 1;
8764 return 1;
8765 }
8766 return 0;
8767 }
8768
8769 /* Update the data in CUM to advance over an argument of mode MODE and
8770 data type TYPE. (TYPE is null for libcalls where that information
8771 may not be available.) */
8772
8773 static void
8774 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8775 const_tree type, bool named)
8776 {
8777 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8778 HOST_WIDE_INT bytes, words;
8779 int nregs;
8780
8781 if (mode == BLKmode)
8782 bytes = int_size_in_bytes (type);
8783 else
8784 bytes = GET_MODE_SIZE (mode);
8785 words = CEIL (bytes, UNITS_PER_WORD);
8786
8787 if (type)
8788 mode = type_natural_mode (type, NULL, false);
8789
8790 if ((type && POINTER_BOUNDS_TYPE_P (type))
8791 || POINTER_BOUNDS_MODE_P (mode))
8792 {
8793 /* If we pass bounds in BT then just update remained bounds count. */
8794 if (cum->bnds_in_bt)
8795 {
8796 cum->bnds_in_bt--;
8797 return;
8798 }
8799
8800 /* Update remained number of bounds to force. */
8801 if (cum->force_bnd_pass)
8802 cum->force_bnd_pass--;
8803
8804 cum->bnd_regno++;
8805
8806 return;
8807 }
8808
8809 /* The first arg not going to Bounds Tables resets this counter. */
8810 cum->bnds_in_bt = 0;
8811 /* For unnamed args we always pass bounds to avoid bounds mess when
8812 passed and received types do not match. If bounds do not follow
8813 unnamed arg, still pretend required number of bounds were passed. */
8814 if (cum->force_bnd_pass)
8815 {
8816 cum->bnd_regno += cum->force_bnd_pass;
8817 cum->force_bnd_pass = 0;
8818 }
8819
8820 if (TARGET_64BIT)
8821 {
8822 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8823
8824 if (call_abi == MS_ABI)
8825 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8826 else
8827 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8828 }
8829 else
8830 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8831
8832 /* For stdarg we expect bounds to be passed for each value passed
8833 in register. */
8834 if (cum->stdarg)
8835 cum->force_bnd_pass = nregs;
8836 /* For pointers passed in memory we expect bounds passed in Bounds
8837 Table. */
8838 if (!nregs)
8839 cum->bnds_in_bt = chkp_type_bounds_count (type);
8840 }
8841
8842 /* Define where to put the arguments to a function.
8843 Value is zero to push the argument on the stack,
8844 or a hard register in which to store the argument.
8845
8846 MODE is the argument's machine mode.
8847 TYPE is the data type of the argument (as a tree).
8848 This is null for libcalls where that information may
8849 not be available.
8850 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8851 the preceding args and about the function being called.
8852 NAMED is nonzero if this argument is a named parameter
8853 (otherwise it is an extra parameter matching an ellipsis). */
8854
8855 static rtx
8856 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8857 machine_mode orig_mode, const_tree type,
8858 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8859 {
8860 bool error_p = false;
8861 /* Avoid the AL settings for the Unix64 ABI. */
8862 if (mode == VOIDmode)
8863 return constm1_rtx;
8864
8865 if (TARGET_IAMCU)
8866 {
8867 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8868 bytes in registers. */
8869 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8870 goto pass_in_reg;
8871 return NULL_RTX;
8872 }
8873
8874 switch (mode)
8875 {
8876 default:
8877 break;
8878
8879 case BLKmode:
8880 if (bytes < 0)
8881 break;
8882 /* FALLTHRU */
8883 case DImode:
8884 case SImode:
8885 case HImode:
8886 case QImode:
8887 pass_in_reg:
8888 if (words <= cum->nregs)
8889 {
8890 int regno = cum->regno;
8891
8892 /* Fastcall allocates the first two DWORD (SImode) or
8893 smaller arguments to ECX and EDX if it isn't an
8894 aggregate type . */
8895 if (cum->fastcall)
8896 {
8897 if (mode == BLKmode
8898 || mode == DImode
8899 || (type && AGGREGATE_TYPE_P (type)))
8900 break;
8901
8902 /* ECX not EAX is the first allocated register. */
8903 if (regno == AX_REG)
8904 regno = CX_REG;
8905 }
8906 return gen_rtx_REG (mode, regno);
8907 }
8908 break;
8909
8910 case DFmode:
8911 if (cum->float_in_sse == -1)
8912 error_p = 1;
8913 if (cum->float_in_sse < 2)
8914 break;
8915 case SFmode:
8916 if (cum->float_in_sse == -1)
8917 error_p = 1;
8918 if (cum->float_in_sse < 1)
8919 break;
8920 /* FALLTHRU */
8921 case TImode:
8922 /* In 32bit, we pass TImode in xmm registers. */
8923 case V16QImode:
8924 case V8HImode:
8925 case V4SImode:
8926 case V2DImode:
8927 case V4SFmode:
8928 case V2DFmode:
8929 if (!type || !AGGREGATE_TYPE_P (type))
8930 {
8931 if (cum->sse_nregs)
8932 return gen_reg_or_parallel (mode, orig_mode,
8933 cum->sse_regno + FIRST_SSE_REG);
8934 }
8935 break;
8936
8937 case OImode:
8938 case XImode:
8939 /* OImode and XImode shouldn't be used directly. */
8940 gcc_unreachable ();
8941
8942 case V64QImode:
8943 case V32HImode:
8944 case V16SImode:
8945 case V8DImode:
8946 case V16SFmode:
8947 case V8DFmode:
8948 case V8SFmode:
8949 case V8SImode:
8950 case V32QImode:
8951 case V16HImode:
8952 case V4DFmode:
8953 case V4DImode:
8954 if (!type || !AGGREGATE_TYPE_P (type))
8955 {
8956 if (cum->sse_nregs)
8957 return gen_reg_or_parallel (mode, orig_mode,
8958 cum->sse_regno + FIRST_SSE_REG);
8959 }
8960 break;
8961
8962 case V8QImode:
8963 case V4HImode:
8964 case V2SImode:
8965 case V2SFmode:
8966 case V1TImode:
8967 case V1DImode:
8968 if (!type || !AGGREGATE_TYPE_P (type))
8969 {
8970 if (cum->mmx_nregs)
8971 return gen_reg_or_parallel (mode, orig_mode,
8972 cum->mmx_regno + FIRST_MMX_REG);
8973 }
8974 break;
8975 }
8976 if (error_p)
8977 {
8978 cum->float_in_sse = 0;
8979 error ("calling %qD with SSE calling convention without "
8980 "SSE/SSE2 enabled", cum->decl);
8981 sorry ("this is a GCC bug that can be worked around by adding "
8982 "attribute used to function called");
8983 }
8984
8985 return NULL_RTX;
8986 }
8987
8988 static rtx
8989 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8990 machine_mode orig_mode, const_tree type, bool named)
8991 {
8992 /* Handle a hidden AL argument containing number of registers
8993 for varargs x86-64 functions. */
8994 if (mode == VOIDmode)
8995 return GEN_INT (cum->maybe_vaarg
8996 ? (cum->sse_nregs < 0
8997 ? X86_64_SSE_REGPARM_MAX
8998 : cum->sse_regno)
8999 : -1);
9000
9001 switch (mode)
9002 {
9003 default:
9004 break;
9005
9006 case V8SFmode:
9007 case V8SImode:
9008 case V32QImode:
9009 case V16HImode:
9010 case V4DFmode:
9011 case V4DImode:
9012 case V16SFmode:
9013 case V16SImode:
9014 case V64QImode:
9015 case V32HImode:
9016 case V8DFmode:
9017 case V8DImode:
9018 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9019 if (!named)
9020 return NULL;
9021 break;
9022 }
9023
9024 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9025 cum->sse_nregs,
9026 &x86_64_int_parameter_registers [cum->regno],
9027 cum->sse_regno);
9028 }
9029
9030 static rtx
9031 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9032 machine_mode orig_mode, bool named,
9033 HOST_WIDE_INT bytes)
9034 {
9035 unsigned int regno;
9036
9037 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9038 We use value of -2 to specify that current function call is MSABI. */
9039 if (mode == VOIDmode)
9040 return GEN_INT (-2);
9041
9042 /* If we've run out of registers, it goes on the stack. */
9043 if (cum->nregs == 0)
9044 return NULL_RTX;
9045
9046 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9047
9048 /* Only floating point modes are passed in anything but integer regs. */
9049 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9050 {
9051 if (named)
9052 regno = cum->regno + FIRST_SSE_REG;
9053 else
9054 {
9055 rtx t1, t2;
9056
9057 /* Unnamed floating parameters are passed in both the
9058 SSE and integer registers. */
9059 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9060 t2 = gen_rtx_REG (mode, regno);
9061 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9062 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9063 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9064 }
9065 }
9066 /* Handle aggregated types passed in register. */
9067 if (orig_mode == BLKmode)
9068 {
9069 if (bytes > 0 && bytes <= 8)
9070 mode = (bytes > 4 ? DImode : SImode);
9071 if (mode == BLKmode)
9072 mode = DImode;
9073 }
9074
9075 return gen_reg_or_parallel (mode, orig_mode, regno);
9076 }
9077
9078 /* Return where to put the arguments to a function.
9079 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9080
9081 MODE is the argument's machine mode. TYPE is the data type of the
9082 argument. It is null for libcalls where that information may not be
9083 available. CUM gives information about the preceding args and about
9084 the function being called. NAMED is nonzero if this argument is a
9085 named parameter (otherwise it is an extra parameter matching an
9086 ellipsis). */
9087
9088 static rtx
9089 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9090 const_tree type, bool named)
9091 {
9092 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9093 machine_mode mode = omode;
9094 HOST_WIDE_INT bytes, words;
9095 rtx arg;
9096
9097 /* All pointer bounds arguments are handled separately here. */
9098 if ((type && POINTER_BOUNDS_TYPE_P (type))
9099 || POINTER_BOUNDS_MODE_P (mode))
9100 {
9101 /* Return NULL if bounds are forced to go in Bounds Table. */
9102 if (cum->bnds_in_bt)
9103 arg = NULL;
9104 /* Return the next available bound reg if any. */
9105 else if (cum->bnd_regno <= LAST_BND_REG)
9106 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9107 /* Return the next special slot number otherwise. */
9108 else
9109 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9110
9111 return arg;
9112 }
9113
9114 if (mode == BLKmode)
9115 bytes = int_size_in_bytes (type);
9116 else
9117 bytes = GET_MODE_SIZE (mode);
9118 words = CEIL (bytes, UNITS_PER_WORD);
9119
9120 /* To simplify the code below, represent vector types with a vector mode
9121 even if MMX/SSE are not active. */
9122 if (type && TREE_CODE (type) == VECTOR_TYPE)
9123 mode = type_natural_mode (type, cum, false);
9124
9125 if (TARGET_64BIT)
9126 {
9127 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9128
9129 if (call_abi == MS_ABI)
9130 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9131 else
9132 arg = function_arg_64 (cum, mode, omode, type, named);
9133 }
9134 else
9135 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9136
9137 return arg;
9138 }
9139
9140 /* A C expression that indicates when an argument must be passed by
9141 reference. If nonzero for an argument, a copy of that argument is
9142 made in memory and a pointer to the argument is passed instead of
9143 the argument itself. The pointer is passed in whatever way is
9144 appropriate for passing a pointer to that type. */
9145
9146 static bool
9147 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9148 const_tree type, bool)
9149 {
9150 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9151
9152 /* Bounds are never passed by reference. */
9153 if ((type && POINTER_BOUNDS_TYPE_P (type))
9154 || POINTER_BOUNDS_MODE_P (mode))
9155 return false;
9156
9157 if (TARGET_64BIT)
9158 {
9159 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9160
9161 /* See Windows x64 Software Convention. */
9162 if (call_abi == MS_ABI)
9163 {
9164 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9165
9166 if (type)
9167 {
9168 /* Arrays are passed by reference. */
9169 if (TREE_CODE (type) == ARRAY_TYPE)
9170 return true;
9171
9172 if (RECORD_OR_UNION_TYPE_P (type))
9173 {
9174 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9175 are passed by reference. */
9176 msize = int_size_in_bytes (type);
9177 }
9178 }
9179
9180 /* __m128 is passed by reference. */
9181 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9182 }
9183 else if (type && int_size_in_bytes (type) == -1)
9184 return true;
9185 }
9186
9187 return false;
9188 }
9189
9190 /* Return true when TYPE should be 128bit aligned for 32bit argument
9191 passing ABI. XXX: This function is obsolete and is only used for
9192 checking psABI compatibility with previous versions of GCC. */
9193
9194 static bool
9195 ix86_compat_aligned_value_p (const_tree type)
9196 {
9197 machine_mode mode = TYPE_MODE (type);
9198 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9199 || mode == TDmode
9200 || mode == TFmode
9201 || mode == TCmode)
9202 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9203 return true;
9204 if (TYPE_ALIGN (type) < 128)
9205 return false;
9206
9207 if (AGGREGATE_TYPE_P (type))
9208 {
9209 /* Walk the aggregates recursively. */
9210 switch (TREE_CODE (type))
9211 {
9212 case RECORD_TYPE:
9213 case UNION_TYPE:
9214 case QUAL_UNION_TYPE:
9215 {
9216 tree field;
9217
9218 /* Walk all the structure fields. */
9219 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9220 {
9221 if (TREE_CODE (field) == FIELD_DECL
9222 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9223 return true;
9224 }
9225 break;
9226 }
9227
9228 case ARRAY_TYPE:
9229 /* Just for use if some languages passes arrays by value. */
9230 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9231 return true;
9232 break;
9233
9234 default:
9235 gcc_unreachable ();
9236 }
9237 }
9238 return false;
9239 }
9240
9241 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9242 XXX: This function is obsolete and is only used for checking psABI
9243 compatibility with previous versions of GCC. */
9244
9245 static unsigned int
9246 ix86_compat_function_arg_boundary (machine_mode mode,
9247 const_tree type, unsigned int align)
9248 {
9249 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9250 natural boundaries. */
9251 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9252 {
9253 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9254 make an exception for SSE modes since these require 128bit
9255 alignment.
9256
9257 The handling here differs from field_alignment. ICC aligns MMX
9258 arguments to 4 byte boundaries, while structure fields are aligned
9259 to 8 byte boundaries. */
9260 if (!type)
9261 {
9262 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9263 align = PARM_BOUNDARY;
9264 }
9265 else
9266 {
9267 if (!ix86_compat_aligned_value_p (type))
9268 align = PARM_BOUNDARY;
9269 }
9270 }
9271 if (align > BIGGEST_ALIGNMENT)
9272 align = BIGGEST_ALIGNMENT;
9273 return align;
9274 }
9275
9276 /* Return true when TYPE should be 128bit aligned for 32bit argument
9277 passing ABI. */
9278
9279 static bool
9280 ix86_contains_aligned_value_p (const_tree type)
9281 {
9282 machine_mode mode = TYPE_MODE (type);
9283
9284 if (mode == XFmode || mode == XCmode)
9285 return false;
9286
9287 if (TYPE_ALIGN (type) < 128)
9288 return false;
9289
9290 if (AGGREGATE_TYPE_P (type))
9291 {
9292 /* Walk the aggregates recursively. */
9293 switch (TREE_CODE (type))
9294 {
9295 case RECORD_TYPE:
9296 case UNION_TYPE:
9297 case QUAL_UNION_TYPE:
9298 {
9299 tree field;
9300
9301 /* Walk all the structure fields. */
9302 for (field = TYPE_FIELDS (type);
9303 field;
9304 field = DECL_CHAIN (field))
9305 {
9306 if (TREE_CODE (field) == FIELD_DECL
9307 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9308 return true;
9309 }
9310 break;
9311 }
9312
9313 case ARRAY_TYPE:
9314 /* Just for use if some languages passes arrays by value. */
9315 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9316 return true;
9317 break;
9318
9319 default:
9320 gcc_unreachable ();
9321 }
9322 }
9323 else
9324 return TYPE_ALIGN (type) >= 128;
9325
9326 return false;
9327 }
9328
9329 /* Gives the alignment boundary, in bits, of an argument with the
9330 specified mode and type. */
9331
9332 static unsigned int
9333 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9334 {
9335 unsigned int align;
9336 if (type)
9337 {
9338 /* Since the main variant type is used for call, we convert it to
9339 the main variant type. */
9340 type = TYPE_MAIN_VARIANT (type);
9341 align = TYPE_ALIGN (type);
9342 }
9343 else
9344 align = GET_MODE_ALIGNMENT (mode);
9345 if (align < PARM_BOUNDARY)
9346 align = PARM_BOUNDARY;
9347 else
9348 {
9349 static bool warned;
9350 unsigned int saved_align = align;
9351
9352 if (!TARGET_64BIT)
9353 {
9354 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9355 if (!type)
9356 {
9357 if (mode == XFmode || mode == XCmode)
9358 align = PARM_BOUNDARY;
9359 }
9360 else if (!ix86_contains_aligned_value_p (type))
9361 align = PARM_BOUNDARY;
9362
9363 if (align < 128)
9364 align = PARM_BOUNDARY;
9365 }
9366
9367 if (warn_psabi
9368 && !warned
9369 && align != ix86_compat_function_arg_boundary (mode, type,
9370 saved_align))
9371 {
9372 warned = true;
9373 inform (input_location,
9374 "The ABI for passing parameters with %d-byte"
9375 " alignment has changed in GCC 4.6",
9376 align / BITS_PER_UNIT);
9377 }
9378 }
9379
9380 return align;
9381 }
9382
9383 /* Return true if N is a possible register number of function value. */
9384
9385 static bool
9386 ix86_function_value_regno_p (const unsigned int regno)
9387 {
9388 switch (regno)
9389 {
9390 case AX_REG:
9391 return true;
9392 case DX_REG:
9393 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9394 case DI_REG:
9395 case SI_REG:
9396 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9397
9398 case BND0_REG:
9399 case BND1_REG:
9400 return chkp_function_instrumented_p (current_function_decl);
9401
9402 /* Complex values are returned in %st(0)/%st(1) pair. */
9403 case ST0_REG:
9404 case ST1_REG:
9405 /* TODO: The function should depend on current function ABI but
9406 builtins.c would need updating then. Therefore we use the
9407 default ABI. */
9408 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9409 return false;
9410 return TARGET_FLOAT_RETURNS_IN_80387;
9411
9412 /* Complex values are returned in %xmm0/%xmm1 pair. */
9413 case XMM0_REG:
9414 case XMM1_REG:
9415 return TARGET_SSE;
9416
9417 case MM0_REG:
9418 if (TARGET_MACHO || TARGET_64BIT)
9419 return false;
9420 return TARGET_MMX;
9421 }
9422
9423 return false;
9424 }
9425
9426 /* Define how to find the value returned by a function.
9427 VALTYPE is the data type of the value (as a tree).
9428 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9429 otherwise, FUNC is 0. */
9430
9431 static rtx
9432 function_value_32 (machine_mode orig_mode, machine_mode mode,
9433 const_tree fntype, const_tree fn)
9434 {
9435 unsigned int regno;
9436
9437 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9438 we normally prevent this case when mmx is not available. However
9439 some ABIs may require the result to be returned like DImode. */
9440 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9441 regno = FIRST_MMX_REG;
9442
9443 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9444 we prevent this case when sse is not available. However some ABIs
9445 may require the result to be returned like integer TImode. */
9446 else if (mode == TImode
9447 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9448 regno = FIRST_SSE_REG;
9449
9450 /* 32-byte vector modes in %ymm0. */
9451 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9452 regno = FIRST_SSE_REG;
9453
9454 /* 64-byte vector modes in %zmm0. */
9455 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9456 regno = FIRST_SSE_REG;
9457
9458 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9459 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9460 regno = FIRST_FLOAT_REG;
9461 else
9462 /* Most things go in %eax. */
9463 regno = AX_REG;
9464
9465 /* Override FP return register with %xmm0 for local functions when
9466 SSE math is enabled or for functions with sseregparm attribute. */
9467 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9468 {
9469 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9470 if (sse_level == -1)
9471 {
9472 error ("calling %qD with SSE caling convention without "
9473 "SSE/SSE2 enabled", fn);
9474 sorry ("this is a GCC bug that can be worked around by adding "
9475 "attribute used to function called");
9476 }
9477 else if ((sse_level >= 1 && mode == SFmode)
9478 || (sse_level == 2 && mode == DFmode))
9479 regno = FIRST_SSE_REG;
9480 }
9481
9482 /* OImode shouldn't be used directly. */
9483 gcc_assert (mode != OImode);
9484
9485 return gen_rtx_REG (orig_mode, regno);
9486 }
9487
9488 static rtx
9489 function_value_64 (machine_mode orig_mode, machine_mode mode,
9490 const_tree valtype)
9491 {
9492 rtx ret;
9493
9494 /* Handle libcalls, which don't provide a type node. */
9495 if (valtype == NULL)
9496 {
9497 unsigned int regno;
9498
9499 switch (mode)
9500 {
9501 case SFmode:
9502 case SCmode:
9503 case DFmode:
9504 case DCmode:
9505 case TFmode:
9506 case SDmode:
9507 case DDmode:
9508 case TDmode:
9509 regno = FIRST_SSE_REG;
9510 break;
9511 case XFmode:
9512 case XCmode:
9513 regno = FIRST_FLOAT_REG;
9514 break;
9515 case TCmode:
9516 return NULL;
9517 default:
9518 regno = AX_REG;
9519 }
9520
9521 return gen_rtx_REG (mode, regno);
9522 }
9523 else if (POINTER_TYPE_P (valtype))
9524 {
9525 /* Pointers are always returned in word_mode. */
9526 mode = word_mode;
9527 }
9528
9529 ret = construct_container (mode, orig_mode, valtype, 1,
9530 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9531 x86_64_int_return_registers, 0);
9532
9533 /* For zero sized structures, construct_container returns NULL, but we
9534 need to keep rest of compiler happy by returning meaningful value. */
9535 if (!ret)
9536 ret = gen_rtx_REG (orig_mode, AX_REG);
9537
9538 return ret;
9539 }
9540
9541 static rtx
9542 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9543 const_tree valtype)
9544 {
9545 unsigned int regno = AX_REG;
9546
9547 if (TARGET_SSE)
9548 {
9549 switch (GET_MODE_SIZE (mode))
9550 {
9551 case 16:
9552 if (valtype != NULL_TREE
9553 && !VECTOR_INTEGER_TYPE_P (valtype)
9554 && !VECTOR_INTEGER_TYPE_P (valtype)
9555 && !INTEGRAL_TYPE_P (valtype)
9556 && !VECTOR_FLOAT_TYPE_P (valtype))
9557 break;
9558 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9559 && !COMPLEX_MODE_P (mode))
9560 regno = FIRST_SSE_REG;
9561 break;
9562 case 8:
9563 case 4:
9564 if (mode == SFmode || mode == DFmode)
9565 regno = FIRST_SSE_REG;
9566 break;
9567 default:
9568 break;
9569 }
9570 }
9571 return gen_rtx_REG (orig_mode, regno);
9572 }
9573
9574 static rtx
9575 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9576 machine_mode orig_mode, machine_mode mode)
9577 {
9578 const_tree fn, fntype;
9579
9580 fn = NULL_TREE;
9581 if (fntype_or_decl && DECL_P (fntype_or_decl))
9582 fn = fntype_or_decl;
9583 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9584
9585 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9586 || POINTER_BOUNDS_MODE_P (mode))
9587 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9588 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9589 return function_value_ms_64 (orig_mode, mode, valtype);
9590 else if (TARGET_64BIT)
9591 return function_value_64 (orig_mode, mode, valtype);
9592 else
9593 return function_value_32 (orig_mode, mode, fntype, fn);
9594 }
9595
9596 static rtx
9597 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9598 {
9599 machine_mode mode, orig_mode;
9600
9601 orig_mode = TYPE_MODE (valtype);
9602 mode = type_natural_mode (valtype, NULL, true);
9603 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9604 }
9605
9606 /* Return an RTX representing a place where a function returns
9607 or recieves pointer bounds or NULL if no bounds are returned.
9608
9609 VALTYPE is a data type of a value returned by the function.
9610
9611 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9612 or FUNCTION_TYPE of the function.
9613
9614 If OUTGOING is false, return a place in which the caller will
9615 see the return value. Otherwise, return a place where a
9616 function returns a value. */
9617
9618 static rtx
9619 ix86_function_value_bounds (const_tree valtype,
9620 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9621 bool outgoing ATTRIBUTE_UNUSED)
9622 {
9623 rtx res = NULL_RTX;
9624
9625 if (BOUNDED_TYPE_P (valtype))
9626 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9627 else if (chkp_type_has_pointer (valtype))
9628 {
9629 bitmap slots;
9630 rtx bounds[2];
9631 bitmap_iterator bi;
9632 unsigned i, bnd_no = 0;
9633
9634 bitmap_obstack_initialize (NULL);
9635 slots = BITMAP_ALLOC (NULL);
9636 chkp_find_bound_slots (valtype, slots);
9637
9638 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9639 {
9640 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9641 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9642 gcc_assert (bnd_no < 2);
9643 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9644 }
9645
9646 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9647
9648 BITMAP_FREE (slots);
9649 bitmap_obstack_release (NULL);
9650 }
9651 else
9652 res = NULL_RTX;
9653
9654 return res;
9655 }
9656
9657 /* Pointer function arguments and return values are promoted to
9658 word_mode. */
9659
9660 static machine_mode
9661 ix86_promote_function_mode (const_tree type, machine_mode mode,
9662 int *punsignedp, const_tree fntype,
9663 int for_return)
9664 {
9665 if (type != NULL_TREE && POINTER_TYPE_P (type))
9666 {
9667 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9668 return word_mode;
9669 }
9670 return default_promote_function_mode (type, mode, punsignedp, fntype,
9671 for_return);
9672 }
9673
9674 /* Return true if a structure, union or array with MODE containing FIELD
9675 should be accessed using BLKmode. */
9676
9677 static bool
9678 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9679 {
9680 /* Union with XFmode must be in BLKmode. */
9681 return (mode == XFmode
9682 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9683 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9684 }
9685
9686 rtx
9687 ix86_libcall_value (machine_mode mode)
9688 {
9689 return ix86_function_value_1 (NULL, NULL, mode, mode);
9690 }
9691
9692 /* Return true iff type is returned in memory. */
9693
9694 static bool
9695 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9696 {
9697 #ifdef SUBTARGET_RETURN_IN_MEMORY
9698 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9699 #else
9700 const machine_mode mode = type_natural_mode (type, NULL, true);
9701 HOST_WIDE_INT size;
9702
9703 if (POINTER_BOUNDS_TYPE_P (type))
9704 return false;
9705
9706 if (TARGET_64BIT)
9707 {
9708 if (ix86_function_type_abi (fntype) == MS_ABI)
9709 {
9710 size = int_size_in_bytes (type);
9711
9712 /* __m128 is returned in xmm0. */
9713 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9714 || INTEGRAL_TYPE_P (type)
9715 || VECTOR_FLOAT_TYPE_P (type))
9716 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9717 && !COMPLEX_MODE_P (mode)
9718 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9719 return false;
9720
9721 /* Otherwise, the size must be exactly in [1248]. */
9722 return size != 1 && size != 2 && size != 4 && size != 8;
9723 }
9724 else
9725 {
9726 int needed_intregs, needed_sseregs;
9727
9728 return examine_argument (mode, type, 1,
9729 &needed_intregs, &needed_sseregs);
9730 }
9731 }
9732 else
9733 {
9734 size = int_size_in_bytes (type);
9735
9736 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9737 bytes in registers. */
9738 if (TARGET_IAMCU)
9739 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9740
9741 if (mode == BLKmode)
9742 return true;
9743
9744 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9745 return false;
9746
9747 if (VECTOR_MODE_P (mode) || mode == TImode)
9748 {
9749 /* User-created vectors small enough to fit in EAX. */
9750 if (size < 8)
9751 return false;
9752
9753 /* Unless ABI prescibes otherwise,
9754 MMX/3dNow values are returned in MM0 if available. */
9755
9756 if (size == 8)
9757 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9758
9759 /* SSE values are returned in XMM0 if available. */
9760 if (size == 16)
9761 return !TARGET_SSE;
9762
9763 /* AVX values are returned in YMM0 if available. */
9764 if (size == 32)
9765 return !TARGET_AVX;
9766
9767 /* AVX512F values are returned in ZMM0 if available. */
9768 if (size == 64)
9769 return !TARGET_AVX512F;
9770 }
9771
9772 if (mode == XFmode)
9773 return false;
9774
9775 if (size > 12)
9776 return true;
9777
9778 /* OImode shouldn't be used directly. */
9779 gcc_assert (mode != OImode);
9780
9781 return false;
9782 }
9783 #endif
9784 }
9785
9786 \f
9787 /* Create the va_list data type. */
9788
9789 static tree
9790 ix86_build_builtin_va_list_64 (void)
9791 {
9792 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9793
9794 record = lang_hooks.types.make_type (RECORD_TYPE);
9795 type_decl = build_decl (BUILTINS_LOCATION,
9796 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9797
9798 f_gpr = build_decl (BUILTINS_LOCATION,
9799 FIELD_DECL, get_identifier ("gp_offset"),
9800 unsigned_type_node);
9801 f_fpr = build_decl (BUILTINS_LOCATION,
9802 FIELD_DECL, get_identifier ("fp_offset"),
9803 unsigned_type_node);
9804 f_ovf = build_decl (BUILTINS_LOCATION,
9805 FIELD_DECL, get_identifier ("overflow_arg_area"),
9806 ptr_type_node);
9807 f_sav = build_decl (BUILTINS_LOCATION,
9808 FIELD_DECL, get_identifier ("reg_save_area"),
9809 ptr_type_node);
9810
9811 va_list_gpr_counter_field = f_gpr;
9812 va_list_fpr_counter_field = f_fpr;
9813
9814 DECL_FIELD_CONTEXT (f_gpr) = record;
9815 DECL_FIELD_CONTEXT (f_fpr) = record;
9816 DECL_FIELD_CONTEXT (f_ovf) = record;
9817 DECL_FIELD_CONTEXT (f_sav) = record;
9818
9819 TYPE_STUB_DECL (record) = type_decl;
9820 TYPE_NAME (record) = type_decl;
9821 TYPE_FIELDS (record) = f_gpr;
9822 DECL_CHAIN (f_gpr) = f_fpr;
9823 DECL_CHAIN (f_fpr) = f_ovf;
9824 DECL_CHAIN (f_ovf) = f_sav;
9825
9826 layout_type (record);
9827
9828 /* The correct type is an array type of one element. */
9829 return build_array_type (record, build_index_type (size_zero_node));
9830 }
9831
9832 /* Setup the builtin va_list data type and for 64-bit the additional
9833 calling convention specific va_list data types. */
9834
9835 static tree
9836 ix86_build_builtin_va_list (void)
9837 {
9838 if (TARGET_64BIT)
9839 {
9840 /* Initialize ABI specific va_list builtin types. */
9841 tree sysv_va_list, ms_va_list;
9842
9843 sysv_va_list = ix86_build_builtin_va_list_64 ();
9844 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9845
9846 /* For MS_ABI we use plain pointer to argument area. */
9847 ms_va_list = build_pointer_type (char_type_node);
9848 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9849
9850 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9851 }
9852 else
9853 {
9854 /* For i386 we use plain pointer to argument area. */
9855 return build_pointer_type (char_type_node);
9856 }
9857 }
9858
9859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9860
9861 static void
9862 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9863 {
9864 rtx save_area, mem;
9865 alias_set_type set;
9866 int i, max;
9867
9868 /* GPR size of varargs save area. */
9869 if (cfun->va_list_gpr_size)
9870 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9871 else
9872 ix86_varargs_gpr_size = 0;
9873
9874 /* FPR size of varargs save area. We don't need it if we don't pass
9875 anything in SSE registers. */
9876 if (TARGET_SSE && cfun->va_list_fpr_size)
9877 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9878 else
9879 ix86_varargs_fpr_size = 0;
9880
9881 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9882 return;
9883
9884 save_area = frame_pointer_rtx;
9885 set = get_varargs_alias_set ();
9886
9887 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9888 if (max > X86_64_REGPARM_MAX)
9889 max = X86_64_REGPARM_MAX;
9890
9891 for (i = cum->regno; i < max; i++)
9892 {
9893 mem = gen_rtx_MEM (word_mode,
9894 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9895 MEM_NOTRAP_P (mem) = 1;
9896 set_mem_alias_set (mem, set);
9897 emit_move_insn (mem,
9898 gen_rtx_REG (word_mode,
9899 x86_64_int_parameter_registers[i]));
9900 }
9901
9902 if (ix86_varargs_fpr_size)
9903 {
9904 machine_mode smode;
9905 rtx_code_label *label;
9906 rtx test;
9907
9908 /* Now emit code to save SSE registers. The AX parameter contains number
9909 of SSE parameter registers used to call this function, though all we
9910 actually check here is the zero/non-zero status. */
9911
9912 label = gen_label_rtx ();
9913 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9914 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9915 label));
9916
9917 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9918 we used movdqa (i.e. TImode) instead? Perhaps even better would
9919 be if we could determine the real mode of the data, via a hook
9920 into pass_stdarg. Ignore all that for now. */
9921 smode = V4SFmode;
9922 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9923 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9924
9925 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9926 if (max > X86_64_SSE_REGPARM_MAX)
9927 max = X86_64_SSE_REGPARM_MAX;
9928
9929 for (i = cum->sse_regno; i < max; ++i)
9930 {
9931 mem = plus_constant (Pmode, save_area,
9932 i * 16 + ix86_varargs_gpr_size);
9933 mem = gen_rtx_MEM (smode, mem);
9934 MEM_NOTRAP_P (mem) = 1;
9935 set_mem_alias_set (mem, set);
9936 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9937
9938 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9939 }
9940
9941 emit_label (label);
9942 }
9943 }
9944
9945 static void
9946 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9947 {
9948 alias_set_type set = get_varargs_alias_set ();
9949 int i;
9950
9951 /* Reset to zero, as there might be a sysv vaarg used
9952 before. */
9953 ix86_varargs_gpr_size = 0;
9954 ix86_varargs_fpr_size = 0;
9955
9956 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9957 {
9958 rtx reg, mem;
9959
9960 mem = gen_rtx_MEM (Pmode,
9961 plus_constant (Pmode, virtual_incoming_args_rtx,
9962 i * UNITS_PER_WORD));
9963 MEM_NOTRAP_P (mem) = 1;
9964 set_mem_alias_set (mem, set);
9965
9966 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9967 emit_move_insn (mem, reg);
9968 }
9969 }
9970
9971 static void
9972 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9973 tree type, int *, int no_rtl)
9974 {
9975 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9976 CUMULATIVE_ARGS next_cum;
9977 tree fntype;
9978
9979 /* This argument doesn't appear to be used anymore. Which is good,
9980 because the old code here didn't suppress rtl generation. */
9981 gcc_assert (!no_rtl);
9982
9983 if (!TARGET_64BIT)
9984 return;
9985
9986 fntype = TREE_TYPE (current_function_decl);
9987
9988 /* For varargs, we do not want to skip the dummy va_dcl argument.
9989 For stdargs, we do want to skip the last named argument. */
9990 next_cum = *cum;
9991 if (stdarg_p (fntype))
9992 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9993 true);
9994
9995 if (cum->call_abi == MS_ABI)
9996 setup_incoming_varargs_ms_64 (&next_cum);
9997 else
9998 setup_incoming_varargs_64 (&next_cum);
9999 }
10000
10001 static void
10002 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10003 enum machine_mode mode,
10004 tree type,
10005 int *pretend_size ATTRIBUTE_UNUSED,
10006 int no_rtl)
10007 {
10008 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10009 CUMULATIVE_ARGS next_cum;
10010 tree fntype;
10011 rtx save_area;
10012 int bnd_reg, i, max;
10013
10014 gcc_assert (!no_rtl);
10015
10016 /* Do nothing if we use plain pointer to argument area. */
10017 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10018 return;
10019
10020 fntype = TREE_TYPE (current_function_decl);
10021
10022 /* For varargs, we do not want to skip the dummy va_dcl argument.
10023 For stdargs, we do want to skip the last named argument. */
10024 next_cum = *cum;
10025 if (stdarg_p (fntype))
10026 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10027 true);
10028 save_area = frame_pointer_rtx;
10029
10030 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10031 if (max > X86_64_REGPARM_MAX)
10032 max = X86_64_REGPARM_MAX;
10033
10034 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10035 if (chkp_function_instrumented_p (current_function_decl))
10036 for (i = cum->regno; i < max; i++)
10037 {
10038 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10039 rtx ptr = gen_rtx_REG (Pmode,
10040 x86_64_int_parameter_registers[i]);
10041 rtx bounds;
10042
10043 if (bnd_reg <= LAST_BND_REG)
10044 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10045 else
10046 {
10047 rtx ldx_addr =
10048 plus_constant (Pmode, arg_pointer_rtx,
10049 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10050 bounds = gen_reg_rtx (BNDmode);
10051 emit_insn (BNDmode == BND64mode
10052 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10053 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10054 }
10055
10056 emit_insn (BNDmode == BND64mode
10057 ? gen_bnd64_stx (addr, ptr, bounds)
10058 : gen_bnd32_stx (addr, ptr, bounds));
10059
10060 bnd_reg++;
10061 }
10062 }
10063
10064
10065 /* Checks if TYPE is of kind va_list char *. */
10066
10067 static bool
10068 is_va_list_char_pointer (tree type)
10069 {
10070 tree canonic;
10071
10072 /* For 32-bit it is always true. */
10073 if (!TARGET_64BIT)
10074 return true;
10075 canonic = ix86_canonical_va_list_type (type);
10076 return (canonic == ms_va_list_type_node
10077 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10078 }
10079
10080 /* Implement va_start. */
10081
10082 static void
10083 ix86_va_start (tree valist, rtx nextarg)
10084 {
10085 HOST_WIDE_INT words, n_gpr, n_fpr;
10086 tree f_gpr, f_fpr, f_ovf, f_sav;
10087 tree gpr, fpr, ovf, sav, t;
10088 tree type;
10089 rtx ovf_rtx;
10090
10091 if (flag_split_stack
10092 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10093 {
10094 unsigned int scratch_regno;
10095
10096 /* When we are splitting the stack, we can't refer to the stack
10097 arguments using internal_arg_pointer, because they may be on
10098 the old stack. The split stack prologue will arrange to
10099 leave a pointer to the old stack arguments in a scratch
10100 register, which we here copy to a pseudo-register. The split
10101 stack prologue can't set the pseudo-register directly because
10102 it (the prologue) runs before any registers have been saved. */
10103
10104 scratch_regno = split_stack_prologue_scratch_regno ();
10105 if (scratch_regno != INVALID_REGNUM)
10106 {
10107 rtx reg;
10108 rtx_insn *seq;
10109
10110 reg = gen_reg_rtx (Pmode);
10111 cfun->machine->split_stack_varargs_pointer = reg;
10112
10113 start_sequence ();
10114 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10115 seq = get_insns ();
10116 end_sequence ();
10117
10118 push_topmost_sequence ();
10119 emit_insn_after (seq, entry_of_function ());
10120 pop_topmost_sequence ();
10121 }
10122 }
10123
10124 /* Only 64bit target needs something special. */
10125 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10126 {
10127 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10128 std_expand_builtin_va_start (valist, nextarg);
10129 else
10130 {
10131 rtx va_r, next;
10132
10133 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10134 next = expand_binop (ptr_mode, add_optab,
10135 cfun->machine->split_stack_varargs_pointer,
10136 crtl->args.arg_offset_rtx,
10137 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10138 convert_move (va_r, next, 0);
10139
10140 /* Store zero bounds for va_list. */
10141 if (chkp_function_instrumented_p (current_function_decl))
10142 chkp_expand_bounds_reset_for_mem (valist,
10143 make_tree (TREE_TYPE (valist),
10144 next));
10145
10146 }
10147 return;
10148 }
10149
10150 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10151 f_fpr = DECL_CHAIN (f_gpr);
10152 f_ovf = DECL_CHAIN (f_fpr);
10153 f_sav = DECL_CHAIN (f_ovf);
10154
10155 valist = build_simple_mem_ref (valist);
10156 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10157 /* The following should be folded into the MEM_REF offset. */
10158 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10159 f_gpr, NULL_TREE);
10160 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10161 f_fpr, NULL_TREE);
10162 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10163 f_ovf, NULL_TREE);
10164 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10165 f_sav, NULL_TREE);
10166
10167 /* Count number of gp and fp argument registers used. */
10168 words = crtl->args.info.words;
10169 n_gpr = crtl->args.info.regno;
10170 n_fpr = crtl->args.info.sse_regno;
10171
10172 if (cfun->va_list_gpr_size)
10173 {
10174 type = TREE_TYPE (gpr);
10175 t = build2 (MODIFY_EXPR, type,
10176 gpr, build_int_cst (type, n_gpr * 8));
10177 TREE_SIDE_EFFECTS (t) = 1;
10178 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10179 }
10180
10181 if (TARGET_SSE && cfun->va_list_fpr_size)
10182 {
10183 type = TREE_TYPE (fpr);
10184 t = build2 (MODIFY_EXPR, type, fpr,
10185 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10186 TREE_SIDE_EFFECTS (t) = 1;
10187 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10188 }
10189
10190 /* Find the overflow area. */
10191 type = TREE_TYPE (ovf);
10192 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10193 ovf_rtx = crtl->args.internal_arg_pointer;
10194 else
10195 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10196 t = make_tree (type, ovf_rtx);
10197 if (words != 0)
10198 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10199
10200 /* Store zero bounds for overflow area pointer. */
10201 if (chkp_function_instrumented_p (current_function_decl))
10202 chkp_expand_bounds_reset_for_mem (ovf, t);
10203
10204 t = build2 (MODIFY_EXPR, type, ovf, t);
10205 TREE_SIDE_EFFECTS (t) = 1;
10206 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10207
10208 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10209 {
10210 /* Find the register save area.
10211 Prologue of the function save it right above stack frame. */
10212 type = TREE_TYPE (sav);
10213 t = make_tree (type, frame_pointer_rtx);
10214 if (!ix86_varargs_gpr_size)
10215 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10216
10217 /* Store zero bounds for save area pointer. */
10218 if (chkp_function_instrumented_p (current_function_decl))
10219 chkp_expand_bounds_reset_for_mem (sav, t);
10220
10221 t = build2 (MODIFY_EXPR, type, sav, t);
10222 TREE_SIDE_EFFECTS (t) = 1;
10223 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10224 }
10225 }
10226
10227 /* Implement va_arg. */
10228
10229 static tree
10230 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10231 gimple_seq *post_p)
10232 {
10233 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10234 tree f_gpr, f_fpr, f_ovf, f_sav;
10235 tree gpr, fpr, ovf, sav, t;
10236 int size, rsize;
10237 tree lab_false, lab_over = NULL_TREE;
10238 tree addr, t2;
10239 rtx container;
10240 int indirect_p = 0;
10241 tree ptrtype;
10242 machine_mode nat_mode;
10243 unsigned int arg_boundary;
10244
10245 /* Only 64bit target needs something special. */
10246 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10247 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10248
10249 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10250 f_fpr = DECL_CHAIN (f_gpr);
10251 f_ovf = DECL_CHAIN (f_fpr);
10252 f_sav = DECL_CHAIN (f_ovf);
10253
10254 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10255 valist, f_gpr, NULL_TREE);
10256
10257 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10258 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10259 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10260
10261 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10262 if (indirect_p)
10263 type = build_pointer_type (type);
10264 size = int_size_in_bytes (type);
10265 rsize = CEIL (size, UNITS_PER_WORD);
10266
10267 nat_mode = type_natural_mode (type, NULL, false);
10268 switch (nat_mode)
10269 {
10270 case V8SFmode:
10271 case V8SImode:
10272 case V32QImode:
10273 case V16HImode:
10274 case V4DFmode:
10275 case V4DImode:
10276 case V16SFmode:
10277 case V16SImode:
10278 case V64QImode:
10279 case V32HImode:
10280 case V8DFmode:
10281 case V8DImode:
10282 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10283 if (!TARGET_64BIT_MS_ABI)
10284 {
10285 container = NULL;
10286 break;
10287 }
10288
10289 default:
10290 container = construct_container (nat_mode, TYPE_MODE (type),
10291 type, 0, X86_64_REGPARM_MAX,
10292 X86_64_SSE_REGPARM_MAX, intreg,
10293 0);
10294 break;
10295 }
10296
10297 /* Pull the value out of the saved registers. */
10298
10299 addr = create_tmp_var (ptr_type_node, "addr");
10300
10301 if (container)
10302 {
10303 int needed_intregs, needed_sseregs;
10304 bool need_temp;
10305 tree int_addr, sse_addr;
10306
10307 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10308 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10309
10310 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10311
10312 need_temp = (!REG_P (container)
10313 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10314 || TYPE_ALIGN (type) > 128));
10315
10316 /* In case we are passing structure, verify that it is consecutive block
10317 on the register save area. If not we need to do moves. */
10318 if (!need_temp && !REG_P (container))
10319 {
10320 /* Verify that all registers are strictly consecutive */
10321 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10322 {
10323 int i;
10324
10325 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10326 {
10327 rtx slot = XVECEXP (container, 0, i);
10328 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10329 || INTVAL (XEXP (slot, 1)) != i * 16)
10330 need_temp = true;
10331 }
10332 }
10333 else
10334 {
10335 int i;
10336
10337 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10338 {
10339 rtx slot = XVECEXP (container, 0, i);
10340 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10341 || INTVAL (XEXP (slot, 1)) != i * 8)
10342 need_temp = true;
10343 }
10344 }
10345 }
10346 if (!need_temp)
10347 {
10348 int_addr = addr;
10349 sse_addr = addr;
10350 }
10351 else
10352 {
10353 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10354 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10355 }
10356
10357 /* First ensure that we fit completely in registers. */
10358 if (needed_intregs)
10359 {
10360 t = build_int_cst (TREE_TYPE (gpr),
10361 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10362 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10363 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10364 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10365 gimplify_and_add (t, pre_p);
10366 }
10367 if (needed_sseregs)
10368 {
10369 t = build_int_cst (TREE_TYPE (fpr),
10370 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10371 + X86_64_REGPARM_MAX * 8);
10372 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10373 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10374 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10375 gimplify_and_add (t, pre_p);
10376 }
10377
10378 /* Compute index to start of area used for integer regs. */
10379 if (needed_intregs)
10380 {
10381 /* int_addr = gpr + sav; */
10382 t = fold_build_pointer_plus (sav, gpr);
10383 gimplify_assign (int_addr, t, pre_p);
10384 }
10385 if (needed_sseregs)
10386 {
10387 /* sse_addr = fpr + sav; */
10388 t = fold_build_pointer_plus (sav, fpr);
10389 gimplify_assign (sse_addr, t, pre_p);
10390 }
10391 if (need_temp)
10392 {
10393 int i, prev_size = 0;
10394 tree temp = create_tmp_var (type, "va_arg_tmp");
10395
10396 /* addr = &temp; */
10397 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10398 gimplify_assign (addr, t, pre_p);
10399
10400 for (i = 0; i < XVECLEN (container, 0); i++)
10401 {
10402 rtx slot = XVECEXP (container, 0, i);
10403 rtx reg = XEXP (slot, 0);
10404 machine_mode mode = GET_MODE (reg);
10405 tree piece_type;
10406 tree addr_type;
10407 tree daddr_type;
10408 tree src_addr, src;
10409 int src_offset;
10410 tree dest_addr, dest;
10411 int cur_size = GET_MODE_SIZE (mode);
10412
10413 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10414 prev_size = INTVAL (XEXP (slot, 1));
10415 if (prev_size + cur_size > size)
10416 {
10417 cur_size = size - prev_size;
10418 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10419 if (mode == BLKmode)
10420 mode = QImode;
10421 }
10422 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10423 if (mode == GET_MODE (reg))
10424 addr_type = build_pointer_type (piece_type);
10425 else
10426 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10427 true);
10428 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10429 true);
10430
10431 if (SSE_REGNO_P (REGNO (reg)))
10432 {
10433 src_addr = sse_addr;
10434 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10435 }
10436 else
10437 {
10438 src_addr = int_addr;
10439 src_offset = REGNO (reg) * 8;
10440 }
10441 src_addr = fold_convert (addr_type, src_addr);
10442 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10443
10444 dest_addr = fold_convert (daddr_type, addr);
10445 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10446 if (cur_size == GET_MODE_SIZE (mode))
10447 {
10448 src = build_va_arg_indirect_ref (src_addr);
10449 dest = build_va_arg_indirect_ref (dest_addr);
10450
10451 gimplify_assign (dest, src, pre_p);
10452 }
10453 else
10454 {
10455 tree copy
10456 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10457 3, dest_addr, src_addr,
10458 size_int (cur_size));
10459 gimplify_and_add (copy, pre_p);
10460 }
10461 prev_size += cur_size;
10462 }
10463 }
10464
10465 if (needed_intregs)
10466 {
10467 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10468 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10469 gimplify_assign (gpr, t, pre_p);
10470 }
10471
10472 if (needed_sseregs)
10473 {
10474 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10475 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10476 gimplify_assign (unshare_expr (fpr), t, pre_p);
10477 }
10478
10479 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10480
10481 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10482 }
10483
10484 /* ... otherwise out of the overflow area. */
10485
10486 /* When we align parameter on stack for caller, if the parameter
10487 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10488 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10489 here with caller. */
10490 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10491 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10492 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10493
10494 /* Care for on-stack alignment if needed. */
10495 if (arg_boundary <= 64 || size == 0)
10496 t = ovf;
10497 else
10498 {
10499 HOST_WIDE_INT align = arg_boundary / 8;
10500 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10501 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10502 build_int_cst (TREE_TYPE (t), -align));
10503 }
10504
10505 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10506 gimplify_assign (addr, t, pre_p);
10507
10508 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10509 gimplify_assign (unshare_expr (ovf), t, pre_p);
10510
10511 if (container)
10512 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10513
10514 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10515 addr = fold_convert (ptrtype, addr);
10516
10517 if (indirect_p)
10518 addr = build_va_arg_indirect_ref (addr);
10519 return build_va_arg_indirect_ref (addr);
10520 }
10521 \f
10522 /* Return true if OPNUM's MEM should be matched
10523 in movabs* patterns. */
10524
10525 bool
10526 ix86_check_movabs (rtx insn, int opnum)
10527 {
10528 rtx set, mem;
10529
10530 set = PATTERN (insn);
10531 if (GET_CODE (set) == PARALLEL)
10532 set = XVECEXP (set, 0, 0);
10533 gcc_assert (GET_CODE (set) == SET);
10534 mem = XEXP (set, opnum);
10535 while (SUBREG_P (mem))
10536 mem = SUBREG_REG (mem);
10537 gcc_assert (MEM_P (mem));
10538 return volatile_ok || !MEM_VOLATILE_P (mem);
10539 }
10540 \f
10541 /* Initialize the table of extra 80387 mathematical constants. */
10542
10543 static void
10544 init_ext_80387_constants (void)
10545 {
10546 static const char * cst[5] =
10547 {
10548 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10549 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10550 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10551 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10552 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10553 };
10554 int i;
10555
10556 for (i = 0; i < 5; i++)
10557 {
10558 real_from_string (&ext_80387_constants_table[i], cst[i]);
10559 /* Ensure each constant is rounded to XFmode precision. */
10560 real_convert (&ext_80387_constants_table[i],
10561 XFmode, &ext_80387_constants_table[i]);
10562 }
10563
10564 ext_80387_constants_init = 1;
10565 }
10566
10567 /* Return non-zero if the constant is something that
10568 can be loaded with a special instruction. */
10569
10570 int
10571 standard_80387_constant_p (rtx x)
10572 {
10573 machine_mode mode = GET_MODE (x);
10574
10575 const REAL_VALUE_TYPE *r;
10576
10577 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10578 return -1;
10579
10580 if (x == CONST0_RTX (mode))
10581 return 1;
10582 if (x == CONST1_RTX (mode))
10583 return 2;
10584
10585 r = CONST_DOUBLE_REAL_VALUE (x);
10586
10587 /* For XFmode constants, try to find a special 80387 instruction when
10588 optimizing for size or on those CPUs that benefit from them. */
10589 if (mode == XFmode
10590 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10591 {
10592 int i;
10593
10594 if (! ext_80387_constants_init)
10595 init_ext_80387_constants ();
10596
10597 for (i = 0; i < 5; i++)
10598 if (real_identical (r, &ext_80387_constants_table[i]))
10599 return i + 3;
10600 }
10601
10602 /* Load of the constant -0.0 or -1.0 will be split as
10603 fldz;fchs or fld1;fchs sequence. */
10604 if (real_isnegzero (r))
10605 return 8;
10606 if (real_identical (r, &dconstm1))
10607 return 9;
10608
10609 return 0;
10610 }
10611
10612 /* Return the opcode of the special instruction to be used to load
10613 the constant X. */
10614
10615 const char *
10616 standard_80387_constant_opcode (rtx x)
10617 {
10618 switch (standard_80387_constant_p (x))
10619 {
10620 case 1:
10621 return "fldz";
10622 case 2:
10623 return "fld1";
10624 case 3:
10625 return "fldlg2";
10626 case 4:
10627 return "fldln2";
10628 case 5:
10629 return "fldl2e";
10630 case 6:
10631 return "fldl2t";
10632 case 7:
10633 return "fldpi";
10634 case 8:
10635 case 9:
10636 return "#";
10637 default:
10638 gcc_unreachable ();
10639 }
10640 }
10641
10642 /* Return the CONST_DOUBLE representing the 80387 constant that is
10643 loaded by the specified special instruction. The argument IDX
10644 matches the return value from standard_80387_constant_p. */
10645
10646 rtx
10647 standard_80387_constant_rtx (int idx)
10648 {
10649 int i;
10650
10651 if (! ext_80387_constants_init)
10652 init_ext_80387_constants ();
10653
10654 switch (idx)
10655 {
10656 case 3:
10657 case 4:
10658 case 5:
10659 case 6:
10660 case 7:
10661 i = idx - 3;
10662 break;
10663
10664 default:
10665 gcc_unreachable ();
10666 }
10667
10668 return const_double_from_real_value (ext_80387_constants_table[i],
10669 XFmode);
10670 }
10671
10672 /* Return 1 if X is all 0s and 2 if x is all 1s
10673 in supported SSE/AVX vector mode. */
10674
10675 int
10676 standard_sse_constant_p (rtx x)
10677 {
10678 machine_mode mode;
10679
10680 if (!TARGET_SSE)
10681 return 0;
10682
10683 mode = GET_MODE (x);
10684
10685 if (x == const0_rtx || x == CONST0_RTX (mode))
10686 return 1;
10687 if (vector_all_ones_operand (x, mode))
10688 switch (mode)
10689 {
10690 case V16QImode:
10691 case V8HImode:
10692 case V4SImode:
10693 case V2DImode:
10694 if (TARGET_SSE2)
10695 return 2;
10696 case V32QImode:
10697 case V16HImode:
10698 case V8SImode:
10699 case V4DImode:
10700 if (TARGET_AVX2)
10701 return 2;
10702 case V64QImode:
10703 case V32HImode:
10704 case V16SImode:
10705 case V8DImode:
10706 if (TARGET_AVX512F)
10707 return 2;
10708 default:
10709 break;
10710 }
10711
10712 return 0;
10713 }
10714
10715 /* Return the opcode of the special instruction to be used to load
10716 the constant X. */
10717
10718 const char *
10719 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10720 {
10721 switch (standard_sse_constant_p (x))
10722 {
10723 case 1:
10724 switch (get_attr_mode (insn))
10725 {
10726 case MODE_XI:
10727 return "vpxord\t%g0, %g0, %g0";
10728 case MODE_V16SF:
10729 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10730 : "vpxord\t%g0, %g0, %g0";
10731 case MODE_V8DF:
10732 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10733 : "vpxorq\t%g0, %g0, %g0";
10734 case MODE_TI:
10735 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10736 : "%vpxor\t%0, %d0";
10737 case MODE_V2DF:
10738 return "%vxorpd\t%0, %d0";
10739 case MODE_V4SF:
10740 return "%vxorps\t%0, %d0";
10741
10742 case MODE_OI:
10743 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10744 : "vpxor\t%x0, %x0, %x0";
10745 case MODE_V4DF:
10746 return "vxorpd\t%x0, %x0, %x0";
10747 case MODE_V8SF:
10748 return "vxorps\t%x0, %x0, %x0";
10749
10750 default:
10751 break;
10752 }
10753
10754 case 2:
10755 if (TARGET_AVX512VL
10756 || get_attr_mode (insn) == MODE_XI
10757 || get_attr_mode (insn) == MODE_V8DF
10758 || get_attr_mode (insn) == MODE_V16SF)
10759 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10760 if (TARGET_AVX)
10761 return "vpcmpeqd\t%0, %0, %0";
10762 else
10763 return "pcmpeqd\t%0, %0";
10764
10765 default:
10766 break;
10767 }
10768 gcc_unreachable ();
10769 }
10770
10771 /* Returns true if OP contains a symbol reference */
10772
10773 bool
10774 symbolic_reference_mentioned_p (rtx op)
10775 {
10776 const char *fmt;
10777 int i;
10778
10779 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10780 return true;
10781
10782 fmt = GET_RTX_FORMAT (GET_CODE (op));
10783 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10784 {
10785 if (fmt[i] == 'E')
10786 {
10787 int j;
10788
10789 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10790 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10791 return true;
10792 }
10793
10794 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10795 return true;
10796 }
10797
10798 return false;
10799 }
10800
10801 /* Return true if it is appropriate to emit `ret' instructions in the
10802 body of a function. Do this only if the epilogue is simple, needing a
10803 couple of insns. Prior to reloading, we can't tell how many registers
10804 must be saved, so return false then. Return false if there is no frame
10805 marker to de-allocate. */
10806
10807 bool
10808 ix86_can_use_return_insn_p (void)
10809 {
10810 struct ix86_frame frame;
10811
10812 if (! reload_completed || frame_pointer_needed)
10813 return 0;
10814
10815 /* Don't allow more than 32k pop, since that's all we can do
10816 with one instruction. */
10817 if (crtl->args.pops_args && crtl->args.size >= 32768)
10818 return 0;
10819
10820 ix86_compute_frame_layout (&frame);
10821 return (frame.stack_pointer_offset == UNITS_PER_WORD
10822 && (frame.nregs + frame.nsseregs) == 0);
10823 }
10824 \f
10825 /* Value should be nonzero if functions must have frame pointers.
10826 Zero means the frame pointer need not be set up (and parms may
10827 be accessed via the stack pointer) in functions that seem suitable. */
10828
10829 static bool
10830 ix86_frame_pointer_required (void)
10831 {
10832 /* If we accessed previous frames, then the generated code expects
10833 to be able to access the saved ebp value in our frame. */
10834 if (cfun->machine->accesses_prev_frame)
10835 return true;
10836
10837 /* Several x86 os'es need a frame pointer for other reasons,
10838 usually pertaining to setjmp. */
10839 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10840 return true;
10841
10842 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10843 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10844 return true;
10845
10846 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10847 allocation is 4GB. */
10848 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10849 return true;
10850
10851 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10852 turns off the frame pointer by default. Turn it back on now if
10853 we've not got a leaf function. */
10854 if (TARGET_OMIT_LEAF_FRAME_POINTER
10855 && (!crtl->is_leaf
10856 || ix86_current_function_calls_tls_descriptor))
10857 return true;
10858
10859 if (crtl->profile && !flag_fentry)
10860 return true;
10861
10862 return false;
10863 }
10864
10865 /* Record that the current function accesses previous call frames. */
10866
10867 void
10868 ix86_setup_frame_addresses (void)
10869 {
10870 cfun->machine->accesses_prev_frame = 1;
10871 }
10872 \f
10873 #ifndef USE_HIDDEN_LINKONCE
10874 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10875 # define USE_HIDDEN_LINKONCE 1
10876 # else
10877 # define USE_HIDDEN_LINKONCE 0
10878 # endif
10879 #endif
10880
10881 static int pic_labels_used;
10882
10883 /* Fills in the label name that should be used for a pc thunk for
10884 the given register. */
10885
10886 static void
10887 get_pc_thunk_name (char name[32], unsigned int regno)
10888 {
10889 gcc_assert (!TARGET_64BIT);
10890
10891 if (USE_HIDDEN_LINKONCE)
10892 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10893 else
10894 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10895 }
10896
10897
10898 /* This function generates code for -fpic that loads %ebx with
10899 the return address of the caller and then returns. */
10900
10901 static void
10902 ix86_code_end (void)
10903 {
10904 rtx xops[2];
10905 int regno;
10906
10907 for (regno = AX_REG; regno <= SP_REG; regno++)
10908 {
10909 char name[32];
10910 tree decl;
10911
10912 if (!(pic_labels_used & (1 << regno)))
10913 continue;
10914
10915 get_pc_thunk_name (name, regno);
10916
10917 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10918 get_identifier (name),
10919 build_function_type_list (void_type_node, NULL_TREE));
10920 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10921 NULL_TREE, void_type_node);
10922 TREE_PUBLIC (decl) = 1;
10923 TREE_STATIC (decl) = 1;
10924 DECL_IGNORED_P (decl) = 1;
10925
10926 #if TARGET_MACHO
10927 if (TARGET_MACHO)
10928 {
10929 switch_to_section (darwin_sections[text_coal_section]);
10930 fputs ("\t.weak_definition\t", asm_out_file);
10931 assemble_name (asm_out_file, name);
10932 fputs ("\n\t.private_extern\t", asm_out_file);
10933 assemble_name (asm_out_file, name);
10934 putc ('\n', asm_out_file);
10935 ASM_OUTPUT_LABEL (asm_out_file, name);
10936 DECL_WEAK (decl) = 1;
10937 }
10938 else
10939 #endif
10940 if (USE_HIDDEN_LINKONCE)
10941 {
10942 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10943
10944 targetm.asm_out.unique_section (decl, 0);
10945 switch_to_section (get_named_section (decl, NULL, 0));
10946
10947 targetm.asm_out.globalize_label (asm_out_file, name);
10948 fputs ("\t.hidden\t", asm_out_file);
10949 assemble_name (asm_out_file, name);
10950 putc ('\n', asm_out_file);
10951 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10952 }
10953 else
10954 {
10955 switch_to_section (text_section);
10956 ASM_OUTPUT_LABEL (asm_out_file, name);
10957 }
10958
10959 DECL_INITIAL (decl) = make_node (BLOCK);
10960 current_function_decl = decl;
10961 init_function_start (decl);
10962 first_function_block_is_cold = false;
10963 /* Make sure unwind info is emitted for the thunk if needed. */
10964 final_start_function (emit_barrier (), asm_out_file, 1);
10965
10966 /* Pad stack IP move with 4 instructions (two NOPs count
10967 as one instruction). */
10968 if (TARGET_PAD_SHORT_FUNCTION)
10969 {
10970 int i = 8;
10971
10972 while (i--)
10973 fputs ("\tnop\n", asm_out_file);
10974 }
10975
10976 xops[0] = gen_rtx_REG (Pmode, regno);
10977 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10978 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10979 output_asm_insn ("%!ret", NULL);
10980 final_end_function ();
10981 init_insn_lengths ();
10982 free_after_compilation (cfun);
10983 set_cfun (NULL);
10984 current_function_decl = NULL;
10985 }
10986
10987 if (flag_split_stack)
10988 file_end_indicate_split_stack ();
10989 }
10990
10991 /* Emit code for the SET_GOT patterns. */
10992
10993 const char *
10994 output_set_got (rtx dest, rtx label)
10995 {
10996 rtx xops[3];
10997
10998 xops[0] = dest;
10999
11000 if (TARGET_VXWORKS_RTP && flag_pic)
11001 {
11002 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11003 xops[2] = gen_rtx_MEM (Pmode,
11004 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11005 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11006
11007 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11008 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11009 an unadorned address. */
11010 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11011 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11012 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11013 return "";
11014 }
11015
11016 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11017
11018 if (!flag_pic)
11019 {
11020 if (TARGET_MACHO)
11021 /* We don't need a pic base, we're not producing pic. */
11022 gcc_unreachable ();
11023
11024 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11025 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11026 targetm.asm_out.internal_label (asm_out_file, "L",
11027 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11028 }
11029 else
11030 {
11031 char name[32];
11032 get_pc_thunk_name (name, REGNO (dest));
11033 pic_labels_used |= 1 << REGNO (dest);
11034
11035 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11036 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11037 output_asm_insn ("%!call\t%X2", xops);
11038
11039 #if TARGET_MACHO
11040 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11041 This is what will be referenced by the Mach-O PIC subsystem. */
11042 if (machopic_should_output_picbase_label () || !label)
11043 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11044
11045 /* When we are restoring the pic base at the site of a nonlocal label,
11046 and we decided to emit the pic base above, we will still output a
11047 local label used for calculating the correction offset (even though
11048 the offset will be 0 in that case). */
11049 if (label)
11050 targetm.asm_out.internal_label (asm_out_file, "L",
11051 CODE_LABEL_NUMBER (label));
11052 #endif
11053 }
11054
11055 if (!TARGET_MACHO)
11056 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11057
11058 return "";
11059 }
11060
11061 /* Generate an "push" pattern for input ARG. */
11062
11063 static rtx
11064 gen_push (rtx arg)
11065 {
11066 struct machine_function *m = cfun->machine;
11067
11068 if (m->fs.cfa_reg == stack_pointer_rtx)
11069 m->fs.cfa_offset += UNITS_PER_WORD;
11070 m->fs.sp_offset += UNITS_PER_WORD;
11071
11072 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11073 arg = gen_rtx_REG (word_mode, REGNO (arg));
11074
11075 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11076 gen_rtx_PRE_DEC (Pmode,
11077 stack_pointer_rtx)),
11078 arg);
11079 }
11080
11081 /* Generate an "pop" pattern for input ARG. */
11082
11083 static rtx
11084 gen_pop (rtx arg)
11085 {
11086 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11087 arg = gen_rtx_REG (word_mode, REGNO (arg));
11088
11089 return gen_rtx_SET (arg,
11090 gen_rtx_MEM (word_mode,
11091 gen_rtx_POST_INC (Pmode,
11092 stack_pointer_rtx)));
11093 }
11094
11095 /* Return >= 0 if there is an unused call-clobbered register available
11096 for the entire function. */
11097
11098 static unsigned int
11099 ix86_select_alt_pic_regnum (void)
11100 {
11101 if (ix86_use_pseudo_pic_reg ())
11102 return INVALID_REGNUM;
11103
11104 if (crtl->is_leaf
11105 && !crtl->profile
11106 && !ix86_current_function_calls_tls_descriptor)
11107 {
11108 int i, drap;
11109 /* Can't use the same register for both PIC and DRAP. */
11110 if (crtl->drap_reg)
11111 drap = REGNO (crtl->drap_reg);
11112 else
11113 drap = -1;
11114 for (i = 2; i >= 0; --i)
11115 if (i != drap && !df_regs_ever_live_p (i))
11116 return i;
11117 }
11118
11119 return INVALID_REGNUM;
11120 }
11121
11122 /* Return TRUE if we need to save REGNO. */
11123
11124 static bool
11125 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11126 {
11127 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11128 && pic_offset_table_rtx)
11129 {
11130 if (ix86_use_pseudo_pic_reg ())
11131 {
11132 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11133 _mcount in prologue. */
11134 if (!TARGET_64BIT && flag_pic && crtl->profile)
11135 return true;
11136 }
11137 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11138 || crtl->profile
11139 || crtl->calls_eh_return
11140 || crtl->uses_const_pool
11141 || cfun->has_nonlocal_label)
11142 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11143 }
11144
11145 if (crtl->calls_eh_return && maybe_eh_return)
11146 {
11147 unsigned i;
11148 for (i = 0; ; i++)
11149 {
11150 unsigned test = EH_RETURN_DATA_REGNO (i);
11151 if (test == INVALID_REGNUM)
11152 break;
11153 if (test == regno)
11154 return true;
11155 }
11156 }
11157
11158 if (crtl->drap_reg
11159 && regno == REGNO (crtl->drap_reg)
11160 && !cfun->machine->no_drap_save_restore)
11161 return true;
11162
11163 return (df_regs_ever_live_p (regno)
11164 && !call_used_regs[regno]
11165 && !fixed_regs[regno]
11166 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11167 }
11168
11169 /* Return number of saved general prupose registers. */
11170
11171 static int
11172 ix86_nsaved_regs (void)
11173 {
11174 int nregs = 0;
11175 int regno;
11176
11177 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11178 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11179 nregs ++;
11180 return nregs;
11181 }
11182
11183 /* Return number of saved SSE registers. */
11184
11185 static int
11186 ix86_nsaved_sseregs (void)
11187 {
11188 int nregs = 0;
11189 int regno;
11190
11191 if (!TARGET_64BIT_MS_ABI)
11192 return 0;
11193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11194 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11195 nregs ++;
11196 return nregs;
11197 }
11198
11199 /* Given FROM and TO register numbers, say whether this elimination is
11200 allowed. If stack alignment is needed, we can only replace argument
11201 pointer with hard frame pointer, or replace frame pointer with stack
11202 pointer. Otherwise, frame pointer elimination is automatically
11203 handled and all other eliminations are valid. */
11204
11205 static bool
11206 ix86_can_eliminate (const int from, const int to)
11207 {
11208 if (stack_realign_fp)
11209 return ((from == ARG_POINTER_REGNUM
11210 && to == HARD_FRAME_POINTER_REGNUM)
11211 || (from == FRAME_POINTER_REGNUM
11212 && to == STACK_POINTER_REGNUM));
11213 else
11214 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11215 }
11216
11217 /* Return the offset between two registers, one to be eliminated, and the other
11218 its replacement, at the start of a routine. */
11219
11220 HOST_WIDE_INT
11221 ix86_initial_elimination_offset (int from, int to)
11222 {
11223 struct ix86_frame frame;
11224 ix86_compute_frame_layout (&frame);
11225
11226 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11227 return frame.hard_frame_pointer_offset;
11228 else if (from == FRAME_POINTER_REGNUM
11229 && to == HARD_FRAME_POINTER_REGNUM)
11230 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11231 else
11232 {
11233 gcc_assert (to == STACK_POINTER_REGNUM);
11234
11235 if (from == ARG_POINTER_REGNUM)
11236 return frame.stack_pointer_offset;
11237
11238 gcc_assert (from == FRAME_POINTER_REGNUM);
11239 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11240 }
11241 }
11242
11243 /* In a dynamically-aligned function, we can't know the offset from
11244 stack pointer to frame pointer, so we must ensure that setjmp
11245 eliminates fp against the hard fp (%ebp) rather than trying to
11246 index from %esp up to the top of the frame across a gap that is
11247 of unknown (at compile-time) size. */
11248 static rtx
11249 ix86_builtin_setjmp_frame_value (void)
11250 {
11251 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11252 }
11253
11254 /* When using -fsplit-stack, the allocation routines set a field in
11255 the TCB to the bottom of the stack plus this much space, measured
11256 in bytes. */
11257
11258 #define SPLIT_STACK_AVAILABLE 256
11259
11260 /* Fill structure ix86_frame about frame of currently computed function. */
11261
11262 static void
11263 ix86_compute_frame_layout (struct ix86_frame *frame)
11264 {
11265 unsigned HOST_WIDE_INT stack_alignment_needed;
11266 HOST_WIDE_INT offset;
11267 unsigned HOST_WIDE_INT preferred_alignment;
11268 HOST_WIDE_INT size = get_frame_size ();
11269 HOST_WIDE_INT to_allocate;
11270
11271 frame->nregs = ix86_nsaved_regs ();
11272 frame->nsseregs = ix86_nsaved_sseregs ();
11273
11274 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
11275 function prologues and leaf. */
11276 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11277 && (!crtl->is_leaf || cfun->calls_alloca != 0
11278 || ix86_current_function_calls_tls_descriptor))
11279 {
11280 crtl->preferred_stack_boundary = 128;
11281 crtl->stack_alignment_needed = 128;
11282 }
11283 /* preferred_stack_boundary is never updated for call
11284 expanded from tls descriptor. Update it here. We don't update it in
11285 expand stage because according to the comments before
11286 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
11287 away. */
11288 else if (ix86_current_function_calls_tls_descriptor
11289 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
11290 {
11291 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
11292 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
11293 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
11294 }
11295
11296 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11297 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11298
11299 gcc_assert (!size || stack_alignment_needed);
11300 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11301 gcc_assert (preferred_alignment <= stack_alignment_needed);
11302
11303 /* For SEH we have to limit the amount of code movement into the prologue.
11304 At present we do this via a BLOCKAGE, at which point there's very little
11305 scheduling that can be done, which means that there's very little point
11306 in doing anything except PUSHs. */
11307 if (TARGET_SEH)
11308 cfun->machine->use_fast_prologue_epilogue = false;
11309
11310 /* During reload iteration the amount of registers saved can change.
11311 Recompute the value as needed. Do not recompute when amount of registers
11312 didn't change as reload does multiple calls to the function and does not
11313 expect the decision to change within single iteration. */
11314 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11315 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11316 {
11317 int count = frame->nregs;
11318 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11319
11320 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11321
11322 /* The fast prologue uses move instead of push to save registers. This
11323 is significantly longer, but also executes faster as modern hardware
11324 can execute the moves in parallel, but can't do that for push/pop.
11325
11326 Be careful about choosing what prologue to emit: When function takes
11327 many instructions to execute we may use slow version as well as in
11328 case function is known to be outside hot spot (this is known with
11329 feedback only). Weight the size of function by number of registers
11330 to save as it is cheap to use one or two push instructions but very
11331 slow to use many of them. */
11332 if (count)
11333 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11334 if (node->frequency < NODE_FREQUENCY_NORMAL
11335 || (flag_branch_probabilities
11336 && node->frequency < NODE_FREQUENCY_HOT))
11337 cfun->machine->use_fast_prologue_epilogue = false;
11338 else
11339 cfun->machine->use_fast_prologue_epilogue
11340 = !expensive_function_p (count);
11341 }
11342
11343 frame->save_regs_using_mov
11344 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11345 /* If static stack checking is enabled and done with probes,
11346 the registers need to be saved before allocating the frame. */
11347 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11348
11349 /* Skip return address. */
11350 offset = UNITS_PER_WORD;
11351
11352 /* Skip pushed static chain. */
11353 if (ix86_static_chain_on_stack)
11354 offset += UNITS_PER_WORD;
11355
11356 /* Skip saved base pointer. */
11357 if (frame_pointer_needed)
11358 offset += UNITS_PER_WORD;
11359 frame->hfp_save_offset = offset;
11360
11361 /* The traditional frame pointer location is at the top of the frame. */
11362 frame->hard_frame_pointer_offset = offset;
11363
11364 /* Register save area */
11365 offset += frame->nregs * UNITS_PER_WORD;
11366 frame->reg_save_offset = offset;
11367
11368 /* On SEH target, registers are pushed just before the frame pointer
11369 location. */
11370 if (TARGET_SEH)
11371 frame->hard_frame_pointer_offset = offset;
11372
11373 /* Align and set SSE register save area. */
11374 if (frame->nsseregs)
11375 {
11376 /* The only ABI that has saved SSE registers (Win64) also has a
11377 16-byte aligned default stack, and thus we don't need to be
11378 within the re-aligned local stack frame to save them. In case
11379 incoming stack boundary is aligned to less than 16 bytes,
11380 unaligned move of SSE register will be emitted, so there is
11381 no point to round up the SSE register save area outside the
11382 re-aligned local stack frame to 16 bytes. */
11383 if (ix86_incoming_stack_boundary >= 128)
11384 offset = ROUND_UP (offset, 16);
11385 offset += frame->nsseregs * 16;
11386 }
11387 frame->sse_reg_save_offset = offset;
11388
11389 /* The re-aligned stack starts here. Values before this point are not
11390 directly comparable with values below this point. In order to make
11391 sure that no value happens to be the same before and after, force
11392 the alignment computation below to add a non-zero value. */
11393 if (stack_realign_fp)
11394 offset = ROUND_UP (offset, stack_alignment_needed);
11395
11396 /* Va-arg area */
11397 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11398 offset += frame->va_arg_size;
11399
11400 /* Align start of frame for local function. */
11401 if (stack_realign_fp
11402 || offset != frame->sse_reg_save_offset
11403 || size != 0
11404 || !crtl->is_leaf
11405 || cfun->calls_alloca
11406 || ix86_current_function_calls_tls_descriptor)
11407 offset = ROUND_UP (offset, stack_alignment_needed);
11408
11409 /* Frame pointer points here. */
11410 frame->frame_pointer_offset = offset;
11411
11412 offset += size;
11413
11414 /* Add outgoing arguments area. Can be skipped if we eliminated
11415 all the function calls as dead code.
11416 Skipping is however impossible when function calls alloca. Alloca
11417 expander assumes that last crtl->outgoing_args_size
11418 of stack frame are unused. */
11419 if (ACCUMULATE_OUTGOING_ARGS
11420 && (!crtl->is_leaf || cfun->calls_alloca
11421 || ix86_current_function_calls_tls_descriptor))
11422 {
11423 offset += crtl->outgoing_args_size;
11424 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11425 }
11426 else
11427 frame->outgoing_arguments_size = 0;
11428
11429 /* Align stack boundary. Only needed if we're calling another function
11430 or using alloca. */
11431 if (!crtl->is_leaf || cfun->calls_alloca
11432 || ix86_current_function_calls_tls_descriptor)
11433 offset = ROUND_UP (offset, preferred_alignment);
11434
11435 /* We've reached end of stack frame. */
11436 frame->stack_pointer_offset = offset;
11437
11438 /* Size prologue needs to allocate. */
11439 to_allocate = offset - frame->sse_reg_save_offset;
11440
11441 if ((!to_allocate && frame->nregs <= 1)
11442 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11443 frame->save_regs_using_mov = false;
11444
11445 if (ix86_using_red_zone ()
11446 && crtl->sp_is_unchanging
11447 && crtl->is_leaf
11448 && !ix86_current_function_calls_tls_descriptor)
11449 {
11450 frame->red_zone_size = to_allocate;
11451 if (frame->save_regs_using_mov)
11452 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11453 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11454 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11455 }
11456 else
11457 frame->red_zone_size = 0;
11458 frame->stack_pointer_offset -= frame->red_zone_size;
11459
11460 /* The SEH frame pointer location is near the bottom of the frame.
11461 This is enforced by the fact that the difference between the
11462 stack pointer and the frame pointer is limited to 240 bytes in
11463 the unwind data structure. */
11464 if (TARGET_SEH)
11465 {
11466 HOST_WIDE_INT diff;
11467
11468 /* If we can leave the frame pointer where it is, do so. Also, returns
11469 the establisher frame for __builtin_frame_address (0). */
11470 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11471 if (diff <= SEH_MAX_FRAME_SIZE
11472 && (diff > 240 || (diff & 15) != 0)
11473 && !crtl->accesses_prior_frames)
11474 {
11475 /* Ideally we'd determine what portion of the local stack frame
11476 (within the constraint of the lowest 240) is most heavily used.
11477 But without that complication, simply bias the frame pointer
11478 by 128 bytes so as to maximize the amount of the local stack
11479 frame that is addressable with 8-bit offsets. */
11480 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11481 }
11482 }
11483 }
11484
11485 /* This is semi-inlined memory_address_length, but simplified
11486 since we know that we're always dealing with reg+offset, and
11487 to avoid having to create and discard all that rtl. */
11488
11489 static inline int
11490 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11491 {
11492 int len = 4;
11493
11494 if (offset == 0)
11495 {
11496 /* EBP and R13 cannot be encoded without an offset. */
11497 len = (regno == BP_REG || regno == R13_REG);
11498 }
11499 else if (IN_RANGE (offset, -128, 127))
11500 len = 1;
11501
11502 /* ESP and R12 must be encoded with a SIB byte. */
11503 if (regno == SP_REG || regno == R12_REG)
11504 len++;
11505
11506 return len;
11507 }
11508
11509 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11510 The valid base registers are taken from CFUN->MACHINE->FS. */
11511
11512 static rtx
11513 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11514 {
11515 const struct machine_function *m = cfun->machine;
11516 rtx base_reg = NULL;
11517 HOST_WIDE_INT base_offset = 0;
11518
11519 if (m->use_fast_prologue_epilogue)
11520 {
11521 /* Choose the base register most likely to allow the most scheduling
11522 opportunities. Generally FP is valid throughout the function,
11523 while DRAP must be reloaded within the epilogue. But choose either
11524 over the SP due to increased encoding size. */
11525
11526 if (m->fs.fp_valid)
11527 {
11528 base_reg = hard_frame_pointer_rtx;
11529 base_offset = m->fs.fp_offset - cfa_offset;
11530 }
11531 else if (m->fs.drap_valid)
11532 {
11533 base_reg = crtl->drap_reg;
11534 base_offset = 0 - cfa_offset;
11535 }
11536 else if (m->fs.sp_valid)
11537 {
11538 base_reg = stack_pointer_rtx;
11539 base_offset = m->fs.sp_offset - cfa_offset;
11540 }
11541 }
11542 else
11543 {
11544 HOST_WIDE_INT toffset;
11545 int len = 16, tlen;
11546
11547 /* Choose the base register with the smallest address encoding.
11548 With a tie, choose FP > DRAP > SP. */
11549 if (m->fs.sp_valid)
11550 {
11551 base_reg = stack_pointer_rtx;
11552 base_offset = m->fs.sp_offset - cfa_offset;
11553 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11554 }
11555 if (m->fs.drap_valid)
11556 {
11557 toffset = 0 - cfa_offset;
11558 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11559 if (tlen <= len)
11560 {
11561 base_reg = crtl->drap_reg;
11562 base_offset = toffset;
11563 len = tlen;
11564 }
11565 }
11566 if (m->fs.fp_valid)
11567 {
11568 toffset = m->fs.fp_offset - cfa_offset;
11569 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11570 if (tlen <= len)
11571 {
11572 base_reg = hard_frame_pointer_rtx;
11573 base_offset = toffset;
11574 len = tlen;
11575 }
11576 }
11577 }
11578 gcc_assert (base_reg != NULL);
11579
11580 return plus_constant (Pmode, base_reg, base_offset);
11581 }
11582
11583 /* Emit code to save registers in the prologue. */
11584
11585 static void
11586 ix86_emit_save_regs (void)
11587 {
11588 unsigned int regno;
11589 rtx_insn *insn;
11590
11591 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11592 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11593 {
11594 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11595 RTX_FRAME_RELATED_P (insn) = 1;
11596 }
11597 }
11598
11599 /* Emit a single register save at CFA - CFA_OFFSET. */
11600
11601 static void
11602 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11603 HOST_WIDE_INT cfa_offset)
11604 {
11605 struct machine_function *m = cfun->machine;
11606 rtx reg = gen_rtx_REG (mode, regno);
11607 rtx unspec = NULL_RTX;
11608 rtx mem, addr, base, insn;
11609 unsigned int align;
11610
11611 addr = choose_baseaddr (cfa_offset);
11612 mem = gen_frame_mem (mode, addr);
11613
11614 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11615 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11616 set_mem_align (mem, align);
11617
11618 /* SSE saves are not within re-aligned local stack frame.
11619 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11620 to emit unaligned store. */
11621 if (mode == V4SFmode && align < 128)
11622 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11623
11624 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11625 RTX_FRAME_RELATED_P (insn) = 1;
11626
11627 base = addr;
11628 if (GET_CODE (base) == PLUS)
11629 base = XEXP (base, 0);
11630 gcc_checking_assert (REG_P (base));
11631
11632 /* When saving registers into a re-aligned local stack frame, avoid
11633 any tricky guessing by dwarf2out. */
11634 if (m->fs.realigned)
11635 {
11636 gcc_checking_assert (stack_realign_drap);
11637
11638 if (regno == REGNO (crtl->drap_reg))
11639 {
11640 /* A bit of a hack. We force the DRAP register to be saved in
11641 the re-aligned stack frame, which provides us with a copy
11642 of the CFA that will last past the prologue. Install it. */
11643 gcc_checking_assert (cfun->machine->fs.fp_valid);
11644 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11645 cfun->machine->fs.fp_offset - cfa_offset);
11646 mem = gen_rtx_MEM (mode, addr);
11647 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11648 }
11649 else
11650 {
11651 /* The frame pointer is a stable reference within the
11652 aligned frame. Use it. */
11653 gcc_checking_assert (cfun->machine->fs.fp_valid);
11654 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11655 cfun->machine->fs.fp_offset - cfa_offset);
11656 mem = gen_rtx_MEM (mode, addr);
11657 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11658 }
11659 }
11660
11661 /* The memory may not be relative to the current CFA register,
11662 which means that we may need to generate a new pattern for
11663 use by the unwind info. */
11664 else if (base != m->fs.cfa_reg)
11665 {
11666 addr = plus_constant (Pmode, m->fs.cfa_reg,
11667 m->fs.cfa_offset - cfa_offset);
11668 mem = gen_rtx_MEM (mode, addr);
11669 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11670 }
11671 else if (unspec)
11672 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11673 }
11674
11675 /* Emit code to save registers using MOV insns.
11676 First register is stored at CFA - CFA_OFFSET. */
11677 static void
11678 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11679 {
11680 unsigned int regno;
11681
11682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11683 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11684 {
11685 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11686 cfa_offset -= UNITS_PER_WORD;
11687 }
11688 }
11689
11690 /* Emit code to save SSE registers using MOV insns.
11691 First register is stored at CFA - CFA_OFFSET. */
11692 static void
11693 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11694 {
11695 unsigned int regno;
11696
11697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11698 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11699 {
11700 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11701 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11702 }
11703 }
11704
11705 static GTY(()) rtx queued_cfa_restores;
11706
11707 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11708 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11709 Don't add the note if the previously saved value will be left untouched
11710 within stack red-zone till return, as unwinders can find the same value
11711 in the register and on the stack. */
11712
11713 static void
11714 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11715 {
11716 if (!crtl->shrink_wrapped
11717 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11718 return;
11719
11720 if (insn)
11721 {
11722 add_reg_note (insn, REG_CFA_RESTORE, reg);
11723 RTX_FRAME_RELATED_P (insn) = 1;
11724 }
11725 else
11726 queued_cfa_restores
11727 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11728 }
11729
11730 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11731
11732 static void
11733 ix86_add_queued_cfa_restore_notes (rtx insn)
11734 {
11735 rtx last;
11736 if (!queued_cfa_restores)
11737 return;
11738 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11739 ;
11740 XEXP (last, 1) = REG_NOTES (insn);
11741 REG_NOTES (insn) = queued_cfa_restores;
11742 queued_cfa_restores = NULL_RTX;
11743 RTX_FRAME_RELATED_P (insn) = 1;
11744 }
11745
11746 /* Expand prologue or epilogue stack adjustment.
11747 The pattern exist to put a dependency on all ebp-based memory accesses.
11748 STYLE should be negative if instructions should be marked as frame related,
11749 zero if %r11 register is live and cannot be freely used and positive
11750 otherwise. */
11751
11752 static void
11753 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11754 int style, bool set_cfa)
11755 {
11756 struct machine_function *m = cfun->machine;
11757 rtx insn;
11758 bool add_frame_related_expr = false;
11759
11760 if (Pmode == SImode)
11761 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11762 else if (x86_64_immediate_operand (offset, DImode))
11763 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11764 else
11765 {
11766 rtx tmp;
11767 /* r11 is used by indirect sibcall return as well, set before the
11768 epilogue and used after the epilogue. */
11769 if (style)
11770 tmp = gen_rtx_REG (DImode, R11_REG);
11771 else
11772 {
11773 gcc_assert (src != hard_frame_pointer_rtx
11774 && dest != hard_frame_pointer_rtx);
11775 tmp = hard_frame_pointer_rtx;
11776 }
11777 insn = emit_insn (gen_rtx_SET (tmp, offset));
11778 if (style < 0)
11779 add_frame_related_expr = true;
11780
11781 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11782 }
11783
11784 insn = emit_insn (insn);
11785 if (style >= 0)
11786 ix86_add_queued_cfa_restore_notes (insn);
11787
11788 if (set_cfa)
11789 {
11790 rtx r;
11791
11792 gcc_assert (m->fs.cfa_reg == src);
11793 m->fs.cfa_offset += INTVAL (offset);
11794 m->fs.cfa_reg = dest;
11795
11796 r = gen_rtx_PLUS (Pmode, src, offset);
11797 r = gen_rtx_SET (dest, r);
11798 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11799 RTX_FRAME_RELATED_P (insn) = 1;
11800 }
11801 else if (style < 0)
11802 {
11803 RTX_FRAME_RELATED_P (insn) = 1;
11804 if (add_frame_related_expr)
11805 {
11806 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11807 r = gen_rtx_SET (dest, r);
11808 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11809 }
11810 }
11811
11812 if (dest == stack_pointer_rtx)
11813 {
11814 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11815 bool valid = m->fs.sp_valid;
11816
11817 if (src == hard_frame_pointer_rtx)
11818 {
11819 valid = m->fs.fp_valid;
11820 ooffset = m->fs.fp_offset;
11821 }
11822 else if (src == crtl->drap_reg)
11823 {
11824 valid = m->fs.drap_valid;
11825 ooffset = 0;
11826 }
11827 else
11828 {
11829 /* Else there are two possibilities: SP itself, which we set
11830 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11831 taken care of this by hand along the eh_return path. */
11832 gcc_checking_assert (src == stack_pointer_rtx
11833 || offset == const0_rtx);
11834 }
11835
11836 m->fs.sp_offset = ooffset - INTVAL (offset);
11837 m->fs.sp_valid = valid;
11838 }
11839 }
11840
11841 /* Find an available register to be used as dynamic realign argument
11842 pointer regsiter. Such a register will be written in prologue and
11843 used in begin of body, so it must not be
11844 1. parameter passing register.
11845 2. GOT pointer.
11846 We reuse static-chain register if it is available. Otherwise, we
11847 use DI for i386 and R13 for x86-64. We chose R13 since it has
11848 shorter encoding.
11849
11850 Return: the regno of chosen register. */
11851
11852 static unsigned int
11853 find_drap_reg (void)
11854 {
11855 tree decl = cfun->decl;
11856
11857 if (TARGET_64BIT)
11858 {
11859 /* Use R13 for nested function or function need static chain.
11860 Since function with tail call may use any caller-saved
11861 registers in epilogue, DRAP must not use caller-saved
11862 register in such case. */
11863 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11864 return R13_REG;
11865
11866 return R10_REG;
11867 }
11868 else
11869 {
11870 /* Use DI for nested function or function need static chain.
11871 Since function with tail call may use any caller-saved
11872 registers in epilogue, DRAP must not use caller-saved
11873 register in such case. */
11874 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11875 return DI_REG;
11876
11877 /* Reuse static chain register if it isn't used for parameter
11878 passing. */
11879 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11880 {
11881 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11882 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11883 return CX_REG;
11884 }
11885 return DI_REG;
11886 }
11887 }
11888
11889 /* Handle a "force_align_arg_pointer" attribute. */
11890
11891 static tree
11892 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11893 tree, int, bool *no_add_attrs)
11894 {
11895 if (TREE_CODE (*node) != FUNCTION_TYPE
11896 && TREE_CODE (*node) != METHOD_TYPE
11897 && TREE_CODE (*node) != FIELD_DECL
11898 && TREE_CODE (*node) != TYPE_DECL)
11899 {
11900 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11901 name);
11902 *no_add_attrs = true;
11903 }
11904
11905 return NULL_TREE;
11906 }
11907
11908 /* Return minimum incoming stack alignment. */
11909
11910 static unsigned int
11911 ix86_minimum_incoming_stack_boundary (bool sibcall)
11912 {
11913 unsigned int incoming_stack_boundary;
11914
11915 /* Prefer the one specified at command line. */
11916 if (ix86_user_incoming_stack_boundary)
11917 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11918 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11919 if -mstackrealign is used, it isn't used for sibcall check and
11920 estimated stack alignment is 128bit. */
11921 else if (!sibcall
11922 && ix86_force_align_arg_pointer
11923 && crtl->stack_alignment_estimated == 128)
11924 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11925 else
11926 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11927
11928 /* Incoming stack alignment can be changed on individual functions
11929 via force_align_arg_pointer attribute. We use the smallest
11930 incoming stack boundary. */
11931 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11932 && lookup_attribute (ix86_force_align_arg_pointer_string,
11933 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11934 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11935
11936 /* The incoming stack frame has to be aligned at least at
11937 parm_stack_boundary. */
11938 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11939 incoming_stack_boundary = crtl->parm_stack_boundary;
11940
11941 /* Stack at entrance of main is aligned by runtime. We use the
11942 smallest incoming stack boundary. */
11943 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
11944 && DECL_NAME (current_function_decl)
11945 && MAIN_NAME_P (DECL_NAME (current_function_decl))
11946 && DECL_FILE_SCOPE_P (current_function_decl))
11947 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
11948
11949 return incoming_stack_boundary;
11950 }
11951
11952 /* Update incoming stack boundary and estimated stack alignment. */
11953
11954 static void
11955 ix86_update_stack_boundary (void)
11956 {
11957 ix86_incoming_stack_boundary
11958 = ix86_minimum_incoming_stack_boundary (false);
11959
11960 /* x86_64 vararg needs 16byte stack alignment for register save
11961 area. */
11962 if (TARGET_64BIT
11963 && cfun->stdarg
11964 && crtl->stack_alignment_estimated < 128)
11965 crtl->stack_alignment_estimated = 128;
11966 }
11967
11968 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
11969 needed or an rtx for DRAP otherwise. */
11970
11971 static rtx
11972 ix86_get_drap_rtx (void)
11973 {
11974 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
11975 crtl->need_drap = true;
11976
11977 if (stack_realign_drap)
11978 {
11979 /* Assign DRAP to vDRAP and returns vDRAP */
11980 unsigned int regno = find_drap_reg ();
11981 rtx drap_vreg;
11982 rtx arg_ptr;
11983 rtx_insn *seq, *insn;
11984
11985 arg_ptr = gen_rtx_REG (Pmode, regno);
11986 crtl->drap_reg = arg_ptr;
11987
11988 start_sequence ();
11989 drap_vreg = copy_to_reg (arg_ptr);
11990 seq = get_insns ();
11991 end_sequence ();
11992
11993 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
11994 if (!optimize)
11995 {
11996 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
11997 RTX_FRAME_RELATED_P (insn) = 1;
11998 }
11999 return drap_vreg;
12000 }
12001 else
12002 return NULL;
12003 }
12004
12005 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12006
12007 static rtx
12008 ix86_internal_arg_pointer (void)
12009 {
12010 return virtual_incoming_args_rtx;
12011 }
12012
12013 struct scratch_reg {
12014 rtx reg;
12015 bool saved;
12016 };
12017
12018 /* Return a short-lived scratch register for use on function entry.
12019 In 32-bit mode, it is valid only after the registers are saved
12020 in the prologue. This register must be released by means of
12021 release_scratch_register_on_entry once it is dead. */
12022
12023 static void
12024 get_scratch_register_on_entry (struct scratch_reg *sr)
12025 {
12026 int regno;
12027
12028 sr->saved = false;
12029
12030 if (TARGET_64BIT)
12031 {
12032 /* We always use R11 in 64-bit mode. */
12033 regno = R11_REG;
12034 }
12035 else
12036 {
12037 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12038 bool fastcall_p
12039 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12040 bool thiscall_p
12041 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12042 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12043 int regparm = ix86_function_regparm (fntype, decl);
12044 int drap_regno
12045 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12046
12047 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12048 for the static chain register. */
12049 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12050 && drap_regno != AX_REG)
12051 regno = AX_REG;
12052 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12053 for the static chain register. */
12054 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12055 regno = AX_REG;
12056 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12057 regno = DX_REG;
12058 /* ecx is the static chain register. */
12059 else if (regparm < 3 && !fastcall_p && !thiscall_p
12060 && !static_chain_p
12061 && drap_regno != CX_REG)
12062 regno = CX_REG;
12063 else if (ix86_save_reg (BX_REG, true))
12064 regno = BX_REG;
12065 /* esi is the static chain register. */
12066 else if (!(regparm == 3 && static_chain_p)
12067 && ix86_save_reg (SI_REG, true))
12068 regno = SI_REG;
12069 else if (ix86_save_reg (DI_REG, true))
12070 regno = DI_REG;
12071 else
12072 {
12073 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12074 sr->saved = true;
12075 }
12076 }
12077
12078 sr->reg = gen_rtx_REG (Pmode, regno);
12079 if (sr->saved)
12080 {
12081 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12082 RTX_FRAME_RELATED_P (insn) = 1;
12083 }
12084 }
12085
12086 /* Release a scratch register obtained from the preceding function. */
12087
12088 static void
12089 release_scratch_register_on_entry (struct scratch_reg *sr)
12090 {
12091 if (sr->saved)
12092 {
12093 struct machine_function *m = cfun->machine;
12094 rtx x, insn = emit_insn (gen_pop (sr->reg));
12095
12096 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12097 RTX_FRAME_RELATED_P (insn) = 1;
12098 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12099 x = gen_rtx_SET (stack_pointer_rtx, x);
12100 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12101 m->fs.sp_offset -= UNITS_PER_WORD;
12102 }
12103 }
12104
12105 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12106
12107 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12108
12109 static void
12110 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12111 {
12112 /* We skip the probe for the first interval + a small dope of 4 words and
12113 probe that many bytes past the specified size to maintain a protection
12114 area at the botton of the stack. */
12115 const int dope = 4 * UNITS_PER_WORD;
12116 rtx size_rtx = GEN_INT (size), last;
12117
12118 /* See if we have a constant small number of probes to generate. If so,
12119 that's the easy case. The run-time loop is made up of 11 insns in the
12120 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12121 for n # of intervals. */
12122 if (size <= 5 * PROBE_INTERVAL)
12123 {
12124 HOST_WIDE_INT i, adjust;
12125 bool first_probe = true;
12126
12127 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12128 values of N from 1 until it exceeds SIZE. If only one probe is
12129 needed, this will not generate any code. Then adjust and probe
12130 to PROBE_INTERVAL + SIZE. */
12131 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12132 {
12133 if (first_probe)
12134 {
12135 adjust = 2 * PROBE_INTERVAL + dope;
12136 first_probe = false;
12137 }
12138 else
12139 adjust = PROBE_INTERVAL;
12140
12141 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12142 plus_constant (Pmode, stack_pointer_rtx,
12143 -adjust)));
12144 emit_stack_probe (stack_pointer_rtx);
12145 }
12146
12147 if (first_probe)
12148 adjust = size + PROBE_INTERVAL + dope;
12149 else
12150 adjust = size + PROBE_INTERVAL - i;
12151
12152 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12153 plus_constant (Pmode, stack_pointer_rtx,
12154 -adjust)));
12155 emit_stack_probe (stack_pointer_rtx);
12156
12157 /* Adjust back to account for the additional first interval. */
12158 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12159 plus_constant (Pmode, stack_pointer_rtx,
12160 PROBE_INTERVAL + dope)));
12161 }
12162
12163 /* Otherwise, do the same as above, but in a loop. Note that we must be
12164 extra careful with variables wrapping around because we might be at
12165 the very top (or the very bottom) of the address space and we have
12166 to be able to handle this case properly; in particular, we use an
12167 equality test for the loop condition. */
12168 else
12169 {
12170 HOST_WIDE_INT rounded_size;
12171 struct scratch_reg sr;
12172
12173 get_scratch_register_on_entry (&sr);
12174
12175
12176 /* Step 1: round SIZE to the previous multiple of the interval. */
12177
12178 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12179
12180
12181 /* Step 2: compute initial and final value of the loop counter. */
12182
12183 /* SP = SP_0 + PROBE_INTERVAL. */
12184 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12185 plus_constant (Pmode, stack_pointer_rtx,
12186 - (PROBE_INTERVAL + dope))));
12187
12188 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12189 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12190 emit_insn (gen_rtx_SET (sr.reg,
12191 gen_rtx_PLUS (Pmode, sr.reg,
12192 stack_pointer_rtx)));
12193
12194
12195 /* Step 3: the loop
12196
12197 while (SP != LAST_ADDR)
12198 {
12199 SP = SP + PROBE_INTERVAL
12200 probe at SP
12201 }
12202
12203 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12204 values of N from 1 until it is equal to ROUNDED_SIZE. */
12205
12206 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12207
12208
12209 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12210 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12211
12212 if (size != rounded_size)
12213 {
12214 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12215 plus_constant (Pmode, stack_pointer_rtx,
12216 rounded_size - size)));
12217 emit_stack_probe (stack_pointer_rtx);
12218 }
12219
12220 /* Adjust back to account for the additional first interval. */
12221 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12222 plus_constant (Pmode, stack_pointer_rtx,
12223 PROBE_INTERVAL + dope)));
12224
12225 release_scratch_register_on_entry (&sr);
12226 }
12227
12228 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
12229
12230 /* Even if the stack pointer isn't the CFA register, we need to correctly
12231 describe the adjustments made to it, in particular differentiate the
12232 frame-related ones from the frame-unrelated ones. */
12233 if (size > 0)
12234 {
12235 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12236 XVECEXP (expr, 0, 0)
12237 = gen_rtx_SET (stack_pointer_rtx,
12238 plus_constant (Pmode, stack_pointer_rtx, -size));
12239 XVECEXP (expr, 0, 1)
12240 = gen_rtx_SET (stack_pointer_rtx,
12241 plus_constant (Pmode, stack_pointer_rtx,
12242 PROBE_INTERVAL + dope + size));
12243 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12244 RTX_FRAME_RELATED_P (last) = 1;
12245
12246 cfun->machine->fs.sp_offset += size;
12247 }
12248
12249 /* Make sure nothing is scheduled before we are done. */
12250 emit_insn (gen_blockage ());
12251 }
12252
12253 /* Adjust the stack pointer up to REG while probing it. */
12254
12255 const char *
12256 output_adjust_stack_and_probe (rtx reg)
12257 {
12258 static int labelno = 0;
12259 char loop_lab[32], end_lab[32];
12260 rtx xops[2];
12261
12262 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
12263 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
12264
12265 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12266
12267 /* Jump to END_LAB if SP == LAST_ADDR. */
12268 xops[0] = stack_pointer_rtx;
12269 xops[1] = reg;
12270 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12271 fputs ("\tje\t", asm_out_file);
12272 assemble_name_raw (asm_out_file, end_lab);
12273 fputc ('\n', asm_out_file);
12274
12275 /* SP = SP + PROBE_INTERVAL. */
12276 xops[1] = GEN_INT (PROBE_INTERVAL);
12277 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12278
12279 /* Probe at SP. */
12280 xops[1] = const0_rtx;
12281 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12282
12283 fprintf (asm_out_file, "\tjmp\t");
12284 assemble_name_raw (asm_out_file, loop_lab);
12285 fputc ('\n', asm_out_file);
12286
12287 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
12288
12289 return "";
12290 }
12291
12292 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12293 inclusive. These are offsets from the current stack pointer. */
12294
12295 static void
12296 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12297 {
12298 /* See if we have a constant small number of probes to generate. If so,
12299 that's the easy case. The run-time loop is made up of 7 insns in the
12300 generic case while the compile-time loop is made up of n insns for n #
12301 of intervals. */
12302 if (size <= 7 * PROBE_INTERVAL)
12303 {
12304 HOST_WIDE_INT i;
12305
12306 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12307 it exceeds SIZE. If only one probe is needed, this will not
12308 generate any code. Then probe at FIRST + SIZE. */
12309 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12310 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12311 -(first + i)));
12312
12313 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12314 -(first + size)));
12315 }
12316
12317 /* Otherwise, do the same as above, but in a loop. Note that we must be
12318 extra careful with variables wrapping around because we might be at
12319 the very top (or the very bottom) of the address space and we have
12320 to be able to handle this case properly; in particular, we use an
12321 equality test for the loop condition. */
12322 else
12323 {
12324 HOST_WIDE_INT rounded_size, last;
12325 struct scratch_reg sr;
12326
12327 get_scratch_register_on_entry (&sr);
12328
12329
12330 /* Step 1: round SIZE to the previous multiple of the interval. */
12331
12332 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12333
12334
12335 /* Step 2: compute initial and final value of the loop counter. */
12336
12337 /* TEST_OFFSET = FIRST. */
12338 emit_move_insn (sr.reg, GEN_INT (-first));
12339
12340 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12341 last = first + rounded_size;
12342
12343
12344 /* Step 3: the loop
12345
12346 while (TEST_ADDR != LAST_ADDR)
12347 {
12348 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12349 probe at TEST_ADDR
12350 }
12351
12352 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12353 until it is equal to ROUNDED_SIZE. */
12354
12355 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12356
12357
12358 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12359 that SIZE is equal to ROUNDED_SIZE. */
12360
12361 if (size != rounded_size)
12362 emit_stack_probe (plus_constant (Pmode,
12363 gen_rtx_PLUS (Pmode,
12364 stack_pointer_rtx,
12365 sr.reg),
12366 rounded_size - size));
12367
12368 release_scratch_register_on_entry (&sr);
12369 }
12370
12371 /* Make sure nothing is scheduled before we are done. */
12372 emit_insn (gen_blockage ());
12373 }
12374
12375 /* Probe a range of stack addresses from REG to END, inclusive. These are
12376 offsets from the current stack pointer. */
12377
12378 const char *
12379 output_probe_stack_range (rtx reg, rtx end)
12380 {
12381 static int labelno = 0;
12382 char loop_lab[32], end_lab[32];
12383 rtx xops[3];
12384
12385 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
12386 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
12387
12388 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12389
12390 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
12391 xops[0] = reg;
12392 xops[1] = end;
12393 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12394 fputs ("\tje\t", asm_out_file);
12395 assemble_name_raw (asm_out_file, end_lab);
12396 fputc ('\n', asm_out_file);
12397
12398 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12399 xops[1] = GEN_INT (PROBE_INTERVAL);
12400 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12401
12402 /* Probe at TEST_ADDR. */
12403 xops[0] = stack_pointer_rtx;
12404 xops[1] = reg;
12405 xops[2] = const0_rtx;
12406 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12407
12408 fprintf (asm_out_file, "\tjmp\t");
12409 assemble_name_raw (asm_out_file, loop_lab);
12410 fputc ('\n', asm_out_file);
12411
12412 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
12413
12414 return "";
12415 }
12416
12417 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12418 to be generated in correct form. */
12419 static void
12420 ix86_finalize_stack_realign_flags (void)
12421 {
12422 /* Check if stack realign is really needed after reload, and
12423 stores result in cfun */
12424 unsigned int incoming_stack_boundary
12425 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12426 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12427 unsigned int stack_realign = (incoming_stack_boundary
12428 < (crtl->is_leaf
12429 ? crtl->max_used_stack_slot_alignment
12430 : crtl->stack_alignment_needed));
12431
12432 if (crtl->stack_realign_finalized)
12433 {
12434 /* After stack_realign_needed is finalized, we can't no longer
12435 change it. */
12436 gcc_assert (crtl->stack_realign_needed == stack_realign);
12437 return;
12438 }
12439
12440 /* If the only reason for frame_pointer_needed is that we conservatively
12441 assumed stack realignment might be needed, but in the end nothing that
12442 needed the stack alignment had been spilled, clear frame_pointer_needed
12443 and say we don't need stack realignment. */
12444 if (stack_realign
12445 && frame_pointer_needed
12446 && crtl->is_leaf
12447 && flag_omit_frame_pointer
12448 && crtl->sp_is_unchanging
12449 && !ix86_current_function_calls_tls_descriptor
12450 && !crtl->accesses_prior_frames
12451 && !cfun->calls_alloca
12452 && !crtl->calls_eh_return
12453 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
12454 && !ix86_frame_pointer_required ()
12455 && get_frame_size () == 0
12456 && ix86_nsaved_sseregs () == 0
12457 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12458 {
12459 HARD_REG_SET set_up_by_prologue, prologue_used;
12460 basic_block bb;
12461
12462 CLEAR_HARD_REG_SET (prologue_used);
12463 CLEAR_HARD_REG_SET (set_up_by_prologue);
12464 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12465 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12466 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12467 HARD_FRAME_POINTER_REGNUM);
12468 FOR_EACH_BB_FN (bb, cfun)
12469 {
12470 rtx_insn *insn;
12471 FOR_BB_INSNS (bb, insn)
12472 if (NONDEBUG_INSN_P (insn)
12473 && requires_stack_frame_p (insn, prologue_used,
12474 set_up_by_prologue))
12475 {
12476 crtl->stack_realign_needed = stack_realign;
12477 crtl->stack_realign_finalized = true;
12478 return;
12479 }
12480 }
12481
12482 /* If drap has been set, but it actually isn't live at the start
12483 of the function, there is no reason to set it up. */
12484 if (crtl->drap_reg)
12485 {
12486 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12487 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12488 {
12489 crtl->drap_reg = NULL_RTX;
12490 crtl->need_drap = false;
12491 }
12492 }
12493 else
12494 cfun->machine->no_drap_save_restore = true;
12495
12496 frame_pointer_needed = false;
12497 stack_realign = false;
12498 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12499 crtl->stack_alignment_needed = incoming_stack_boundary;
12500 crtl->stack_alignment_estimated = incoming_stack_boundary;
12501 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12502 crtl->preferred_stack_boundary = incoming_stack_boundary;
12503 df_finish_pass (true);
12504 df_scan_alloc (NULL);
12505 df_scan_blocks ();
12506 df_compute_regs_ever_live (true);
12507 df_analyze ();
12508 }
12509
12510 crtl->stack_realign_needed = stack_realign;
12511 crtl->stack_realign_finalized = true;
12512 }
12513
12514 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12515
12516 static void
12517 ix86_elim_entry_set_got (rtx reg)
12518 {
12519 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12520 rtx_insn *c_insn = BB_HEAD (bb);
12521 if (!NONDEBUG_INSN_P (c_insn))
12522 c_insn = next_nonnote_nondebug_insn (c_insn);
12523 if (c_insn && NONJUMP_INSN_P (c_insn))
12524 {
12525 rtx pat = PATTERN (c_insn);
12526 if (GET_CODE (pat) == PARALLEL)
12527 {
12528 rtx vec = XVECEXP (pat, 0, 0);
12529 if (GET_CODE (vec) == SET
12530 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12531 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12532 delete_insn (c_insn);
12533 }
12534 }
12535 }
12536
12537 /* Expand the prologue into a bunch of separate insns. */
12538
12539 void
12540 ix86_expand_prologue (void)
12541 {
12542 struct machine_function *m = cfun->machine;
12543 rtx insn, t;
12544 struct ix86_frame frame;
12545 HOST_WIDE_INT allocate;
12546 bool int_registers_saved;
12547 bool sse_registers_saved;
12548 rtx static_chain = NULL_RTX;
12549
12550 ix86_finalize_stack_realign_flags ();
12551
12552 /* DRAP should not coexist with stack_realign_fp */
12553 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12554
12555 memset (&m->fs, 0, sizeof (m->fs));
12556
12557 /* Initialize CFA state for before the prologue. */
12558 m->fs.cfa_reg = stack_pointer_rtx;
12559 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12560
12561 /* Track SP offset to the CFA. We continue tracking this after we've
12562 swapped the CFA register away from SP. In the case of re-alignment
12563 this is fudged; we're interested to offsets within the local frame. */
12564 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12565 m->fs.sp_valid = true;
12566
12567 ix86_compute_frame_layout (&frame);
12568
12569 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12570 {
12571 /* We should have already generated an error for any use of
12572 ms_hook on a nested function. */
12573 gcc_checking_assert (!ix86_static_chain_on_stack);
12574
12575 /* Check if profiling is active and we shall use profiling before
12576 prologue variant. If so sorry. */
12577 if (crtl->profile && flag_fentry != 0)
12578 sorry ("ms_hook_prologue attribute isn%'t compatible "
12579 "with -mfentry for 32-bit");
12580
12581 /* In ix86_asm_output_function_label we emitted:
12582 8b ff movl.s %edi,%edi
12583 55 push %ebp
12584 8b ec movl.s %esp,%ebp
12585
12586 This matches the hookable function prologue in Win32 API
12587 functions in Microsoft Windows XP Service Pack 2 and newer.
12588 Wine uses this to enable Windows apps to hook the Win32 API
12589 functions provided by Wine.
12590
12591 What that means is that we've already set up the frame pointer. */
12592
12593 if (frame_pointer_needed
12594 && !(crtl->drap_reg && crtl->stack_realign_needed))
12595 {
12596 rtx push, mov;
12597
12598 /* We've decided to use the frame pointer already set up.
12599 Describe this to the unwinder by pretending that both
12600 push and mov insns happen right here.
12601
12602 Putting the unwind info here at the end of the ms_hook
12603 is done so that we can make absolutely certain we get
12604 the required byte sequence at the start of the function,
12605 rather than relying on an assembler that can produce
12606 the exact encoding required.
12607
12608 However it does mean (in the unpatched case) that we have
12609 a 1 insn window where the asynchronous unwind info is
12610 incorrect. However, if we placed the unwind info at
12611 its correct location we would have incorrect unwind info
12612 in the patched case. Which is probably all moot since
12613 I don't expect Wine generates dwarf2 unwind info for the
12614 system libraries that use this feature. */
12615
12616 insn = emit_insn (gen_blockage ());
12617
12618 push = gen_push (hard_frame_pointer_rtx);
12619 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12620 stack_pointer_rtx);
12621 RTX_FRAME_RELATED_P (push) = 1;
12622 RTX_FRAME_RELATED_P (mov) = 1;
12623
12624 RTX_FRAME_RELATED_P (insn) = 1;
12625 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12626 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12627
12628 /* Note that gen_push incremented m->fs.cfa_offset, even
12629 though we didn't emit the push insn here. */
12630 m->fs.cfa_reg = hard_frame_pointer_rtx;
12631 m->fs.fp_offset = m->fs.cfa_offset;
12632 m->fs.fp_valid = true;
12633 }
12634 else
12635 {
12636 /* The frame pointer is not needed so pop %ebp again.
12637 This leaves us with a pristine state. */
12638 emit_insn (gen_pop (hard_frame_pointer_rtx));
12639 }
12640 }
12641
12642 /* The first insn of a function that accepts its static chain on the
12643 stack is to push the register that would be filled in by a direct
12644 call. This insn will be skipped by the trampoline. */
12645 else if (ix86_static_chain_on_stack)
12646 {
12647 static_chain = ix86_static_chain (cfun->decl, false);
12648 insn = emit_insn (gen_push (static_chain));
12649 emit_insn (gen_blockage ());
12650
12651 /* We don't want to interpret this push insn as a register save,
12652 only as a stack adjustment. The real copy of the register as
12653 a save will be done later, if needed. */
12654 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12655 t = gen_rtx_SET (stack_pointer_rtx, t);
12656 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12657 RTX_FRAME_RELATED_P (insn) = 1;
12658 }
12659
12660 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12661 of DRAP is needed and stack realignment is really needed after reload */
12662 if (stack_realign_drap)
12663 {
12664 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12665
12666 /* Only need to push parameter pointer reg if it is caller saved. */
12667 if (!call_used_regs[REGNO (crtl->drap_reg)])
12668 {
12669 /* Push arg pointer reg */
12670 insn = emit_insn (gen_push (crtl->drap_reg));
12671 RTX_FRAME_RELATED_P (insn) = 1;
12672 }
12673
12674 /* Grab the argument pointer. */
12675 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12676 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12677 RTX_FRAME_RELATED_P (insn) = 1;
12678 m->fs.cfa_reg = crtl->drap_reg;
12679 m->fs.cfa_offset = 0;
12680
12681 /* Align the stack. */
12682 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12683 stack_pointer_rtx,
12684 GEN_INT (-align_bytes)));
12685 RTX_FRAME_RELATED_P (insn) = 1;
12686
12687 /* Replicate the return address on the stack so that return
12688 address can be reached via (argp - 1) slot. This is needed
12689 to implement macro RETURN_ADDR_RTX and intrinsic function
12690 expand_builtin_return_addr etc. */
12691 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12692 t = gen_frame_mem (word_mode, t);
12693 insn = emit_insn (gen_push (t));
12694 RTX_FRAME_RELATED_P (insn) = 1;
12695
12696 /* For the purposes of frame and register save area addressing,
12697 we've started over with a new frame. */
12698 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12699 m->fs.realigned = true;
12700
12701 if (static_chain)
12702 {
12703 /* Replicate static chain on the stack so that static chain
12704 can be reached via (argp - 2) slot. This is needed for
12705 nested function with stack realignment. */
12706 insn = emit_insn (gen_push (static_chain));
12707 RTX_FRAME_RELATED_P (insn) = 1;
12708 }
12709 }
12710
12711 int_registers_saved = (frame.nregs == 0);
12712 sse_registers_saved = (frame.nsseregs == 0);
12713
12714 if (frame_pointer_needed && !m->fs.fp_valid)
12715 {
12716 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12717 slower on all targets. Also sdb doesn't like it. */
12718 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12719 RTX_FRAME_RELATED_P (insn) = 1;
12720
12721 /* Push registers now, before setting the frame pointer
12722 on SEH target. */
12723 if (!int_registers_saved
12724 && TARGET_SEH
12725 && !frame.save_regs_using_mov)
12726 {
12727 ix86_emit_save_regs ();
12728 int_registers_saved = true;
12729 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12730 }
12731
12732 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12733 {
12734 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12735 RTX_FRAME_RELATED_P (insn) = 1;
12736
12737 if (m->fs.cfa_reg == stack_pointer_rtx)
12738 m->fs.cfa_reg = hard_frame_pointer_rtx;
12739 m->fs.fp_offset = m->fs.sp_offset;
12740 m->fs.fp_valid = true;
12741 }
12742 }
12743
12744 if (!int_registers_saved)
12745 {
12746 /* If saving registers via PUSH, do so now. */
12747 if (!frame.save_regs_using_mov)
12748 {
12749 ix86_emit_save_regs ();
12750 int_registers_saved = true;
12751 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12752 }
12753
12754 /* When using red zone we may start register saving before allocating
12755 the stack frame saving one cycle of the prologue. However, avoid
12756 doing this if we have to probe the stack; at least on x86_64 the
12757 stack probe can turn into a call that clobbers a red zone location. */
12758 else if (ix86_using_red_zone ()
12759 && (! TARGET_STACK_PROBE
12760 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12761 {
12762 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12763 int_registers_saved = true;
12764 }
12765 }
12766
12767 if (stack_realign_fp)
12768 {
12769 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12770 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12771
12772 /* The computation of the size of the re-aligned stack frame means
12773 that we must allocate the size of the register save area before
12774 performing the actual alignment. Otherwise we cannot guarantee
12775 that there's enough storage above the realignment point. */
12776 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12777 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12778 GEN_INT (m->fs.sp_offset
12779 - frame.sse_reg_save_offset),
12780 -1, false);
12781
12782 /* Align the stack. */
12783 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12784 stack_pointer_rtx,
12785 GEN_INT (-align_bytes)));
12786
12787 /* For the purposes of register save area addressing, the stack
12788 pointer is no longer valid. As for the value of sp_offset,
12789 see ix86_compute_frame_layout, which we need to match in order
12790 to pass verification of stack_pointer_offset at the end. */
12791 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12792 m->fs.sp_valid = false;
12793 }
12794
12795 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12796
12797 if (flag_stack_usage_info)
12798 {
12799 /* We start to count from ARG_POINTER. */
12800 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12801
12802 /* If it was realigned, take into account the fake frame. */
12803 if (stack_realign_drap)
12804 {
12805 if (ix86_static_chain_on_stack)
12806 stack_size += UNITS_PER_WORD;
12807
12808 if (!call_used_regs[REGNO (crtl->drap_reg)])
12809 stack_size += UNITS_PER_WORD;
12810
12811 /* This over-estimates by 1 minimal-stack-alignment-unit but
12812 mitigates that by counting in the new return address slot. */
12813 current_function_dynamic_stack_size
12814 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12815 }
12816
12817 current_function_static_stack_size = stack_size;
12818 }
12819
12820 /* On SEH target with very large frame size, allocate an area to save
12821 SSE registers (as the very large allocation won't be described). */
12822 if (TARGET_SEH
12823 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12824 && !sse_registers_saved)
12825 {
12826 HOST_WIDE_INT sse_size =
12827 frame.sse_reg_save_offset - frame.reg_save_offset;
12828
12829 gcc_assert (int_registers_saved);
12830
12831 /* No need to do stack checking as the area will be immediately
12832 written. */
12833 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12834 GEN_INT (-sse_size), -1,
12835 m->fs.cfa_reg == stack_pointer_rtx);
12836 allocate -= sse_size;
12837 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12838 sse_registers_saved = true;
12839 }
12840
12841 /* The stack has already been decremented by the instruction calling us
12842 so probe if the size is non-negative to preserve the protection area. */
12843 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12844 {
12845 /* We expect the registers to be saved when probes are used. */
12846 gcc_assert (int_registers_saved);
12847
12848 if (STACK_CHECK_MOVING_SP)
12849 {
12850 if (!(crtl->is_leaf && !cfun->calls_alloca
12851 && allocate <= PROBE_INTERVAL))
12852 {
12853 ix86_adjust_stack_and_probe (allocate);
12854 allocate = 0;
12855 }
12856 }
12857 else
12858 {
12859 HOST_WIDE_INT size = allocate;
12860
12861 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12862 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12863
12864 if (TARGET_STACK_PROBE)
12865 {
12866 if (crtl->is_leaf && !cfun->calls_alloca)
12867 {
12868 if (size > PROBE_INTERVAL)
12869 ix86_emit_probe_stack_range (0, size);
12870 }
12871 else
12872 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12873 }
12874 else
12875 {
12876 if (crtl->is_leaf && !cfun->calls_alloca)
12877 {
12878 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12879 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12880 size - STACK_CHECK_PROTECT);
12881 }
12882 else
12883 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12884 }
12885 }
12886 }
12887
12888 if (allocate == 0)
12889 ;
12890 else if (!ix86_target_stack_probe ()
12891 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12892 {
12893 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12894 GEN_INT (-allocate), -1,
12895 m->fs.cfa_reg == stack_pointer_rtx);
12896 }
12897 else
12898 {
12899 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12900 rtx r10 = NULL;
12901 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12902 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12903 bool eax_live = ix86_eax_live_at_start_p ();
12904 bool r10_live = false;
12905
12906 if (TARGET_64BIT)
12907 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
12908
12909 if (eax_live)
12910 {
12911 insn = emit_insn (gen_push (eax));
12912 allocate -= UNITS_PER_WORD;
12913 /* Note that SEH directives need to continue tracking the stack
12914 pointer even after the frame pointer has been set up. */
12915 if (sp_is_cfa_reg || TARGET_SEH)
12916 {
12917 if (sp_is_cfa_reg)
12918 m->fs.cfa_offset += UNITS_PER_WORD;
12919 RTX_FRAME_RELATED_P (insn) = 1;
12920 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12921 gen_rtx_SET (stack_pointer_rtx,
12922 plus_constant (Pmode, stack_pointer_rtx,
12923 -UNITS_PER_WORD)));
12924 }
12925 }
12926
12927 if (r10_live)
12928 {
12929 r10 = gen_rtx_REG (Pmode, R10_REG);
12930 insn = emit_insn (gen_push (r10));
12931 allocate -= UNITS_PER_WORD;
12932 if (sp_is_cfa_reg || TARGET_SEH)
12933 {
12934 if (sp_is_cfa_reg)
12935 m->fs.cfa_offset += UNITS_PER_WORD;
12936 RTX_FRAME_RELATED_P (insn) = 1;
12937 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12938 gen_rtx_SET (stack_pointer_rtx,
12939 plus_constant (Pmode, stack_pointer_rtx,
12940 -UNITS_PER_WORD)));
12941 }
12942 }
12943
12944 emit_move_insn (eax, GEN_INT (allocate));
12945 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
12946
12947 /* Use the fact that AX still contains ALLOCATE. */
12948 adjust_stack_insn = (Pmode == DImode
12949 ? gen_pro_epilogue_adjust_stack_di_sub
12950 : gen_pro_epilogue_adjust_stack_si_sub);
12951
12952 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
12953 stack_pointer_rtx, eax));
12954
12955 if (sp_is_cfa_reg || TARGET_SEH)
12956 {
12957 if (sp_is_cfa_reg)
12958 m->fs.cfa_offset += allocate;
12959 RTX_FRAME_RELATED_P (insn) = 1;
12960 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12961 gen_rtx_SET (stack_pointer_rtx,
12962 plus_constant (Pmode, stack_pointer_rtx,
12963 -allocate)));
12964 }
12965 m->fs.sp_offset += allocate;
12966
12967 /* Use stack_pointer_rtx for relative addressing so that code
12968 works for realigned stack, too. */
12969 if (r10_live && eax_live)
12970 {
12971 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
12972 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12973 gen_frame_mem (word_mode, t));
12974 t = plus_constant (Pmode, t, UNITS_PER_WORD);
12975 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
12976 gen_frame_mem (word_mode, t));
12977 }
12978 else if (eax_live || r10_live)
12979 {
12980 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
12981 emit_move_insn (gen_rtx_REG (word_mode,
12982 (eax_live ? AX_REG : R10_REG)),
12983 gen_frame_mem (word_mode, t));
12984 }
12985 }
12986 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
12987
12988 /* If we havn't already set up the frame pointer, do so now. */
12989 if (frame_pointer_needed && !m->fs.fp_valid)
12990 {
12991 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
12992 GEN_INT (frame.stack_pointer_offset
12993 - frame.hard_frame_pointer_offset));
12994 insn = emit_insn (insn);
12995 RTX_FRAME_RELATED_P (insn) = 1;
12996 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
12997
12998 if (m->fs.cfa_reg == stack_pointer_rtx)
12999 m->fs.cfa_reg = hard_frame_pointer_rtx;
13000 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13001 m->fs.fp_valid = true;
13002 }
13003
13004 if (!int_registers_saved)
13005 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13006 if (!sse_registers_saved)
13007 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13008
13009 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13010 in PROLOGUE. */
13011 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13012 {
13013 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13014 insn = emit_insn (gen_set_got (pic));
13015 RTX_FRAME_RELATED_P (insn) = 1;
13016 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13017 emit_insn (gen_prologue_use (pic));
13018 /* Deleting already emmitted SET_GOT if exist and allocated to
13019 REAL_PIC_OFFSET_TABLE_REGNUM. */
13020 ix86_elim_entry_set_got (pic);
13021 }
13022
13023 if (crtl->drap_reg && !crtl->stack_realign_needed)
13024 {
13025 /* vDRAP is setup but after reload it turns out stack realign
13026 isn't necessary, here we will emit prologue to setup DRAP
13027 without stack realign adjustment */
13028 t = choose_baseaddr (0);
13029 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13030 }
13031
13032 /* Prevent instructions from being scheduled into register save push
13033 sequence when access to the redzone area is done through frame pointer.
13034 The offset between the frame pointer and the stack pointer is calculated
13035 relative to the value of the stack pointer at the end of the function
13036 prologue, and moving instructions that access redzone area via frame
13037 pointer inside push sequence violates this assumption. */
13038 if (frame_pointer_needed && frame.red_zone_size)
13039 emit_insn (gen_memory_blockage ());
13040
13041 /* Emit cld instruction if stringops are used in the function. */
13042 if (TARGET_CLD && ix86_current_function_needs_cld)
13043 emit_insn (gen_cld ());
13044
13045 /* SEH requires that the prologue end within 256 bytes of the start of
13046 the function. Prevent instruction schedules that would extend that.
13047 Further, prevent alloca modifications to the stack pointer from being
13048 combined with prologue modifications. */
13049 if (TARGET_SEH)
13050 emit_insn (gen_prologue_use (stack_pointer_rtx));
13051 }
13052
13053 /* Emit code to restore REG using a POP insn. */
13054
13055 static void
13056 ix86_emit_restore_reg_using_pop (rtx reg)
13057 {
13058 struct machine_function *m = cfun->machine;
13059 rtx_insn *insn = emit_insn (gen_pop (reg));
13060
13061 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13062 m->fs.sp_offset -= UNITS_PER_WORD;
13063
13064 if (m->fs.cfa_reg == crtl->drap_reg
13065 && REGNO (reg) == REGNO (crtl->drap_reg))
13066 {
13067 /* Previously we'd represented the CFA as an expression
13068 like *(%ebp - 8). We've just popped that value from
13069 the stack, which means we need to reset the CFA to
13070 the drap register. This will remain until we restore
13071 the stack pointer. */
13072 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13073 RTX_FRAME_RELATED_P (insn) = 1;
13074
13075 /* This means that the DRAP register is valid for addressing too. */
13076 m->fs.drap_valid = true;
13077 return;
13078 }
13079
13080 if (m->fs.cfa_reg == stack_pointer_rtx)
13081 {
13082 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13083 x = gen_rtx_SET (stack_pointer_rtx, x);
13084 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13085 RTX_FRAME_RELATED_P (insn) = 1;
13086
13087 m->fs.cfa_offset -= UNITS_PER_WORD;
13088 }
13089
13090 /* When the frame pointer is the CFA, and we pop it, we are
13091 swapping back to the stack pointer as the CFA. This happens
13092 for stack frames that don't allocate other data, so we assume
13093 the stack pointer is now pointing at the return address, i.e.
13094 the function entry state, which makes the offset be 1 word. */
13095 if (reg == hard_frame_pointer_rtx)
13096 {
13097 m->fs.fp_valid = false;
13098 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13099 {
13100 m->fs.cfa_reg = stack_pointer_rtx;
13101 m->fs.cfa_offset -= UNITS_PER_WORD;
13102
13103 add_reg_note (insn, REG_CFA_DEF_CFA,
13104 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13105 GEN_INT (m->fs.cfa_offset)));
13106 RTX_FRAME_RELATED_P (insn) = 1;
13107 }
13108 }
13109 }
13110
13111 /* Emit code to restore saved registers using POP insns. */
13112
13113 static void
13114 ix86_emit_restore_regs_using_pop (void)
13115 {
13116 unsigned int regno;
13117
13118 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13119 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13120 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13121 }
13122
13123 /* Emit code and notes for the LEAVE instruction. */
13124
13125 static void
13126 ix86_emit_leave (void)
13127 {
13128 struct machine_function *m = cfun->machine;
13129 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13130
13131 ix86_add_queued_cfa_restore_notes (insn);
13132
13133 gcc_assert (m->fs.fp_valid);
13134 m->fs.sp_valid = true;
13135 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13136 m->fs.fp_valid = false;
13137
13138 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13139 {
13140 m->fs.cfa_reg = stack_pointer_rtx;
13141 m->fs.cfa_offset = m->fs.sp_offset;
13142
13143 add_reg_note (insn, REG_CFA_DEF_CFA,
13144 plus_constant (Pmode, stack_pointer_rtx,
13145 m->fs.sp_offset));
13146 RTX_FRAME_RELATED_P (insn) = 1;
13147 }
13148 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13149 m->fs.fp_offset);
13150 }
13151
13152 /* Emit code to restore saved registers using MOV insns.
13153 First register is restored from CFA - CFA_OFFSET. */
13154 static void
13155 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13156 bool maybe_eh_return)
13157 {
13158 struct machine_function *m = cfun->machine;
13159 unsigned int regno;
13160
13161 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13162 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13163 {
13164 rtx reg = gen_rtx_REG (word_mode, regno);
13165 rtx mem;
13166 rtx_insn *insn;
13167
13168 mem = choose_baseaddr (cfa_offset);
13169 mem = gen_frame_mem (word_mode, mem);
13170 insn = emit_move_insn (reg, mem);
13171
13172 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13173 {
13174 /* Previously we'd represented the CFA as an expression
13175 like *(%ebp - 8). We've just popped that value from
13176 the stack, which means we need to reset the CFA to
13177 the drap register. This will remain until we restore
13178 the stack pointer. */
13179 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13180 RTX_FRAME_RELATED_P (insn) = 1;
13181
13182 /* This means that the DRAP register is valid for addressing. */
13183 m->fs.drap_valid = true;
13184 }
13185 else
13186 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13187
13188 cfa_offset -= UNITS_PER_WORD;
13189 }
13190 }
13191
13192 /* Emit code to restore saved registers using MOV insns.
13193 First register is restored from CFA - CFA_OFFSET. */
13194 static void
13195 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13196 bool maybe_eh_return)
13197 {
13198 unsigned int regno;
13199
13200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13201 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13202 {
13203 rtx reg = gen_rtx_REG (V4SFmode, regno);
13204 rtx mem;
13205 unsigned int align;
13206
13207 mem = choose_baseaddr (cfa_offset);
13208 mem = gen_rtx_MEM (V4SFmode, mem);
13209
13210 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13211 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13212 set_mem_align (mem, align);
13213
13214 /* SSE saves are not within re-aligned local stack frame.
13215 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13216 to emit unaligned load. */
13217 if (align < 128)
13218 {
13219 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13220 UNSPEC_LOADU);
13221 emit_insn (gen_rtx_SET (reg, unspec));
13222 }
13223 else
13224 emit_insn (gen_rtx_SET (reg, mem));
13225
13226 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13227
13228 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13229 }
13230 }
13231
13232 /* Restore function stack, frame, and registers. */
13233
13234 void
13235 ix86_expand_epilogue (int style)
13236 {
13237 struct machine_function *m = cfun->machine;
13238 struct machine_frame_state frame_state_save = m->fs;
13239 struct ix86_frame frame;
13240 bool restore_regs_via_mov;
13241 bool using_drap;
13242
13243 ix86_finalize_stack_realign_flags ();
13244 ix86_compute_frame_layout (&frame);
13245
13246 m->fs.sp_valid = (!frame_pointer_needed
13247 || (crtl->sp_is_unchanging
13248 && !stack_realign_fp));
13249 gcc_assert (!m->fs.sp_valid
13250 || m->fs.sp_offset == frame.stack_pointer_offset);
13251
13252 /* The FP must be valid if the frame pointer is present. */
13253 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13254 gcc_assert (!m->fs.fp_valid
13255 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13256
13257 /* We must have *some* valid pointer to the stack frame. */
13258 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13259
13260 /* The DRAP is never valid at this point. */
13261 gcc_assert (!m->fs.drap_valid);
13262
13263 /* See the comment about red zone and frame
13264 pointer usage in ix86_expand_prologue. */
13265 if (frame_pointer_needed && frame.red_zone_size)
13266 emit_insn (gen_memory_blockage ());
13267
13268 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13269 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13270
13271 /* Determine the CFA offset of the end of the red-zone. */
13272 m->fs.red_zone_offset = 0;
13273 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13274 {
13275 /* The red-zone begins below the return address. */
13276 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13277
13278 /* When the register save area is in the aligned portion of
13279 the stack, determine the maximum runtime displacement that
13280 matches up with the aligned frame. */
13281 if (stack_realign_drap)
13282 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13283 + UNITS_PER_WORD);
13284 }
13285
13286 /* Special care must be taken for the normal return case of a function
13287 using eh_return: the eax and edx registers are marked as saved, but
13288 not restored along this path. Adjust the save location to match. */
13289 if (crtl->calls_eh_return && style != 2)
13290 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13291
13292 /* EH_RETURN requires the use of moves to function properly. */
13293 if (crtl->calls_eh_return)
13294 restore_regs_via_mov = true;
13295 /* SEH requires the use of pops to identify the epilogue. */
13296 else if (TARGET_SEH)
13297 restore_regs_via_mov = false;
13298 /* If we're only restoring one register and sp is not valid then
13299 using a move instruction to restore the register since it's
13300 less work than reloading sp and popping the register. */
13301 else if (!m->fs.sp_valid && frame.nregs <= 1)
13302 restore_regs_via_mov = true;
13303 else if (TARGET_EPILOGUE_USING_MOVE
13304 && cfun->machine->use_fast_prologue_epilogue
13305 && (frame.nregs > 1
13306 || m->fs.sp_offset != frame.reg_save_offset))
13307 restore_regs_via_mov = true;
13308 else if (frame_pointer_needed
13309 && !frame.nregs
13310 && m->fs.sp_offset != frame.reg_save_offset)
13311 restore_regs_via_mov = true;
13312 else if (frame_pointer_needed
13313 && TARGET_USE_LEAVE
13314 && cfun->machine->use_fast_prologue_epilogue
13315 && frame.nregs == 1)
13316 restore_regs_via_mov = true;
13317 else
13318 restore_regs_via_mov = false;
13319
13320 if (restore_regs_via_mov || frame.nsseregs)
13321 {
13322 /* Ensure that the entire register save area is addressable via
13323 the stack pointer, if we will restore via sp. */
13324 if (TARGET_64BIT
13325 && m->fs.sp_offset > 0x7fffffff
13326 && !(m->fs.fp_valid || m->fs.drap_valid)
13327 && (frame.nsseregs + frame.nregs) != 0)
13328 {
13329 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13330 GEN_INT (m->fs.sp_offset
13331 - frame.sse_reg_save_offset),
13332 style,
13333 m->fs.cfa_reg == stack_pointer_rtx);
13334 }
13335 }
13336
13337 /* If there are any SSE registers to restore, then we have to do it
13338 via moves, since there's obviously no pop for SSE regs. */
13339 if (frame.nsseregs)
13340 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13341 style == 2);
13342
13343 if (restore_regs_via_mov)
13344 {
13345 rtx t;
13346
13347 if (frame.nregs)
13348 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13349
13350 /* eh_return epilogues need %ecx added to the stack pointer. */
13351 if (style == 2)
13352 {
13353 rtx sa = EH_RETURN_STACKADJ_RTX;
13354 rtx_insn *insn;
13355
13356 /* Stack align doesn't work with eh_return. */
13357 gcc_assert (!stack_realign_drap);
13358 /* Neither does regparm nested functions. */
13359 gcc_assert (!ix86_static_chain_on_stack);
13360
13361 if (frame_pointer_needed)
13362 {
13363 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13364 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13365 emit_insn (gen_rtx_SET (sa, t));
13366
13367 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13368 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13369
13370 /* Note that we use SA as a temporary CFA, as the return
13371 address is at the proper place relative to it. We
13372 pretend this happens at the FP restore insn because
13373 prior to this insn the FP would be stored at the wrong
13374 offset relative to SA, and after this insn we have no
13375 other reasonable register to use for the CFA. We don't
13376 bother resetting the CFA to the SP for the duration of
13377 the return insn. */
13378 add_reg_note (insn, REG_CFA_DEF_CFA,
13379 plus_constant (Pmode, sa, UNITS_PER_WORD));
13380 ix86_add_queued_cfa_restore_notes (insn);
13381 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13382 RTX_FRAME_RELATED_P (insn) = 1;
13383
13384 m->fs.cfa_reg = sa;
13385 m->fs.cfa_offset = UNITS_PER_WORD;
13386 m->fs.fp_valid = false;
13387
13388 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13389 const0_rtx, style, false);
13390 }
13391 else
13392 {
13393 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13394 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13395 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13396 ix86_add_queued_cfa_restore_notes (insn);
13397
13398 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13399 if (m->fs.cfa_offset != UNITS_PER_WORD)
13400 {
13401 m->fs.cfa_offset = UNITS_PER_WORD;
13402 add_reg_note (insn, REG_CFA_DEF_CFA,
13403 plus_constant (Pmode, stack_pointer_rtx,
13404 UNITS_PER_WORD));
13405 RTX_FRAME_RELATED_P (insn) = 1;
13406 }
13407 }
13408 m->fs.sp_offset = UNITS_PER_WORD;
13409 m->fs.sp_valid = true;
13410 }
13411 }
13412 else
13413 {
13414 /* SEH requires that the function end with (1) a stack adjustment
13415 if necessary, (2) a sequence of pops, and (3) a return or
13416 jump instruction. Prevent insns from the function body from
13417 being scheduled into this sequence. */
13418 if (TARGET_SEH)
13419 {
13420 /* Prevent a catch region from being adjacent to the standard
13421 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13422 several other flags that would be interesting to test are
13423 not yet set up. */
13424 if (flag_non_call_exceptions)
13425 emit_insn (gen_nops (const1_rtx));
13426 else
13427 emit_insn (gen_blockage ());
13428 }
13429
13430 /* First step is to deallocate the stack frame so that we can
13431 pop the registers. Also do it on SEH target for very large
13432 frame as the emitted instructions aren't allowed by the ABI in
13433 epilogues. */
13434 if (!m->fs.sp_valid
13435 || (TARGET_SEH
13436 && (m->fs.sp_offset - frame.reg_save_offset
13437 >= SEH_MAX_FRAME_SIZE)))
13438 {
13439 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13440 GEN_INT (m->fs.fp_offset
13441 - frame.reg_save_offset),
13442 style, false);
13443 }
13444 else if (m->fs.sp_offset != frame.reg_save_offset)
13445 {
13446 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13447 GEN_INT (m->fs.sp_offset
13448 - frame.reg_save_offset),
13449 style,
13450 m->fs.cfa_reg == stack_pointer_rtx);
13451 }
13452
13453 ix86_emit_restore_regs_using_pop ();
13454 }
13455
13456 /* If we used a stack pointer and haven't already got rid of it,
13457 then do so now. */
13458 if (m->fs.fp_valid)
13459 {
13460 /* If the stack pointer is valid and pointing at the frame
13461 pointer store address, then we only need a pop. */
13462 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13463 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13464 /* Leave results in shorter dependency chains on CPUs that are
13465 able to grok it fast. */
13466 else if (TARGET_USE_LEAVE
13467 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13468 || !cfun->machine->use_fast_prologue_epilogue)
13469 ix86_emit_leave ();
13470 else
13471 {
13472 pro_epilogue_adjust_stack (stack_pointer_rtx,
13473 hard_frame_pointer_rtx,
13474 const0_rtx, style, !using_drap);
13475 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13476 }
13477 }
13478
13479 if (using_drap)
13480 {
13481 int param_ptr_offset = UNITS_PER_WORD;
13482 rtx_insn *insn;
13483
13484 gcc_assert (stack_realign_drap);
13485
13486 if (ix86_static_chain_on_stack)
13487 param_ptr_offset += UNITS_PER_WORD;
13488 if (!call_used_regs[REGNO (crtl->drap_reg)])
13489 param_ptr_offset += UNITS_PER_WORD;
13490
13491 insn = emit_insn (gen_rtx_SET
13492 (stack_pointer_rtx,
13493 gen_rtx_PLUS (Pmode,
13494 crtl->drap_reg,
13495 GEN_INT (-param_ptr_offset))));
13496 m->fs.cfa_reg = stack_pointer_rtx;
13497 m->fs.cfa_offset = param_ptr_offset;
13498 m->fs.sp_offset = param_ptr_offset;
13499 m->fs.realigned = false;
13500
13501 add_reg_note (insn, REG_CFA_DEF_CFA,
13502 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13503 GEN_INT (param_ptr_offset)));
13504 RTX_FRAME_RELATED_P (insn) = 1;
13505
13506 if (!call_used_regs[REGNO (crtl->drap_reg)])
13507 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13508 }
13509
13510 /* At this point the stack pointer must be valid, and we must have
13511 restored all of the registers. We may not have deallocated the
13512 entire stack frame. We've delayed this until now because it may
13513 be possible to merge the local stack deallocation with the
13514 deallocation forced by ix86_static_chain_on_stack. */
13515 gcc_assert (m->fs.sp_valid);
13516 gcc_assert (!m->fs.fp_valid);
13517 gcc_assert (!m->fs.realigned);
13518 if (m->fs.sp_offset != UNITS_PER_WORD)
13519 {
13520 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13521 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13522 style, true);
13523 }
13524 else
13525 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13526
13527 /* Sibcall epilogues don't want a return instruction. */
13528 if (style == 0)
13529 {
13530 m->fs = frame_state_save;
13531 return;
13532 }
13533
13534 if (crtl->args.pops_args && crtl->args.size)
13535 {
13536 rtx popc = GEN_INT (crtl->args.pops_args);
13537
13538 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13539 address, do explicit add, and jump indirectly to the caller. */
13540
13541 if (crtl->args.pops_args >= 65536)
13542 {
13543 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13544 rtx_insn *insn;
13545
13546 /* There is no "pascal" calling convention in any 64bit ABI. */
13547 gcc_assert (!TARGET_64BIT);
13548
13549 insn = emit_insn (gen_pop (ecx));
13550 m->fs.cfa_offset -= UNITS_PER_WORD;
13551 m->fs.sp_offset -= UNITS_PER_WORD;
13552
13553 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13554 x = gen_rtx_SET (stack_pointer_rtx, x);
13555 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13556 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13557 RTX_FRAME_RELATED_P (insn) = 1;
13558
13559 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13560 popc, -1, true);
13561 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13562 }
13563 else
13564 emit_jump_insn (gen_simple_return_pop_internal (popc));
13565 }
13566 else
13567 emit_jump_insn (gen_simple_return_internal ());
13568
13569 /* Restore the state back to the state from the prologue,
13570 so that it's correct for the next epilogue. */
13571 m->fs = frame_state_save;
13572 }
13573
13574 /* Reset from the function's potential modifications. */
13575
13576 static void
13577 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13578 {
13579 if (pic_offset_table_rtx
13580 && !ix86_use_pseudo_pic_reg ())
13581 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13582 #if TARGET_MACHO
13583 /* Mach-O doesn't support labels at the end of objects, so if
13584 it looks like we might want one, insert a NOP. */
13585 {
13586 rtx_insn *insn = get_last_insn ();
13587 rtx_insn *deleted_debug_label = NULL;
13588 while (insn
13589 && NOTE_P (insn)
13590 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13591 {
13592 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13593 notes only, instead set their CODE_LABEL_NUMBER to -1,
13594 otherwise there would be code generation differences
13595 in between -g and -g0. */
13596 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13597 deleted_debug_label = insn;
13598 insn = PREV_INSN (insn);
13599 }
13600 if (insn
13601 && (LABEL_P (insn)
13602 || (NOTE_P (insn)
13603 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13604 fputs ("\tnop\n", file);
13605 else if (deleted_debug_label)
13606 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13607 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13608 CODE_LABEL_NUMBER (insn) = -1;
13609 }
13610 #endif
13611
13612 }
13613
13614 /* Return a scratch register to use in the split stack prologue. The
13615 split stack prologue is used for -fsplit-stack. It is the first
13616 instructions in the function, even before the regular prologue.
13617 The scratch register can be any caller-saved register which is not
13618 used for parameters or for the static chain. */
13619
13620 static unsigned int
13621 split_stack_prologue_scratch_regno (void)
13622 {
13623 if (TARGET_64BIT)
13624 return R11_REG;
13625 else
13626 {
13627 bool is_fastcall, is_thiscall;
13628 int regparm;
13629
13630 is_fastcall = (lookup_attribute ("fastcall",
13631 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13632 != NULL);
13633 is_thiscall = (lookup_attribute ("thiscall",
13634 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13635 != NULL);
13636 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13637
13638 if (is_fastcall)
13639 {
13640 if (DECL_STATIC_CHAIN (cfun->decl))
13641 {
13642 sorry ("-fsplit-stack does not support fastcall with "
13643 "nested function");
13644 return INVALID_REGNUM;
13645 }
13646 return AX_REG;
13647 }
13648 else if (is_thiscall)
13649 {
13650 if (!DECL_STATIC_CHAIN (cfun->decl))
13651 return DX_REG;
13652 return AX_REG;
13653 }
13654 else if (regparm < 3)
13655 {
13656 if (!DECL_STATIC_CHAIN (cfun->decl))
13657 return CX_REG;
13658 else
13659 {
13660 if (regparm >= 2)
13661 {
13662 sorry ("-fsplit-stack does not support 2 register "
13663 "parameters for a nested function");
13664 return INVALID_REGNUM;
13665 }
13666 return DX_REG;
13667 }
13668 }
13669 else
13670 {
13671 /* FIXME: We could make this work by pushing a register
13672 around the addition and comparison. */
13673 sorry ("-fsplit-stack does not support 3 register parameters");
13674 return INVALID_REGNUM;
13675 }
13676 }
13677 }
13678
13679 /* A SYMBOL_REF for the function which allocates new stackspace for
13680 -fsplit-stack. */
13681
13682 static GTY(()) rtx split_stack_fn;
13683
13684 /* A SYMBOL_REF for the more stack function when using the large
13685 model. */
13686
13687 static GTY(()) rtx split_stack_fn_large;
13688
13689 /* Handle -fsplit-stack. These are the first instructions in the
13690 function, even before the regular prologue. */
13691
13692 void
13693 ix86_expand_split_stack_prologue (void)
13694 {
13695 struct ix86_frame frame;
13696 HOST_WIDE_INT allocate;
13697 unsigned HOST_WIDE_INT args_size;
13698 rtx_code_label *label;
13699 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13700 rtx scratch_reg = NULL_RTX;
13701 rtx_code_label *varargs_label = NULL;
13702 rtx fn;
13703
13704 gcc_assert (flag_split_stack && reload_completed);
13705
13706 ix86_finalize_stack_realign_flags ();
13707 ix86_compute_frame_layout (&frame);
13708 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13709
13710 /* This is the label we will branch to if we have enough stack
13711 space. We expect the basic block reordering pass to reverse this
13712 branch if optimizing, so that we branch in the unlikely case. */
13713 label = gen_label_rtx ();
13714
13715 /* We need to compare the stack pointer minus the frame size with
13716 the stack boundary in the TCB. The stack boundary always gives
13717 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13718 can compare directly. Otherwise we need to do an addition. */
13719
13720 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13721 UNSPEC_STACK_CHECK);
13722 limit = gen_rtx_CONST (Pmode, limit);
13723 limit = gen_rtx_MEM (Pmode, limit);
13724 if (allocate < SPLIT_STACK_AVAILABLE)
13725 current = stack_pointer_rtx;
13726 else
13727 {
13728 unsigned int scratch_regno;
13729 rtx offset;
13730
13731 /* We need a scratch register to hold the stack pointer minus
13732 the required frame size. Since this is the very start of the
13733 function, the scratch register can be any caller-saved
13734 register which is not used for parameters. */
13735 offset = GEN_INT (- allocate);
13736 scratch_regno = split_stack_prologue_scratch_regno ();
13737 if (scratch_regno == INVALID_REGNUM)
13738 return;
13739 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13740 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13741 {
13742 /* We don't use ix86_gen_add3 in this case because it will
13743 want to split to lea, but when not optimizing the insn
13744 will not be split after this point. */
13745 emit_insn (gen_rtx_SET (scratch_reg,
13746 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13747 offset)));
13748 }
13749 else
13750 {
13751 emit_move_insn (scratch_reg, offset);
13752 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13753 stack_pointer_rtx));
13754 }
13755 current = scratch_reg;
13756 }
13757
13758 ix86_expand_branch (GEU, current, limit, label);
13759 jump_insn = get_last_insn ();
13760 JUMP_LABEL (jump_insn) = label;
13761
13762 /* Mark the jump as very likely to be taken. */
13763 add_int_reg_note (jump_insn, REG_BR_PROB,
13764 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13765
13766 if (split_stack_fn == NULL_RTX)
13767 {
13768 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13769 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13770 }
13771 fn = split_stack_fn;
13772
13773 /* Get more stack space. We pass in the desired stack space and the
13774 size of the arguments to copy to the new stack. In 32-bit mode
13775 we push the parameters; __morestack will return on a new stack
13776 anyhow. In 64-bit mode we pass the parameters in r10 and
13777 r11. */
13778 allocate_rtx = GEN_INT (allocate);
13779 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13780 call_fusage = NULL_RTX;
13781 if (TARGET_64BIT)
13782 {
13783 rtx reg10, reg11;
13784
13785 reg10 = gen_rtx_REG (Pmode, R10_REG);
13786 reg11 = gen_rtx_REG (Pmode, R11_REG);
13787
13788 /* If this function uses a static chain, it will be in %r10.
13789 Preserve it across the call to __morestack. */
13790 if (DECL_STATIC_CHAIN (cfun->decl))
13791 {
13792 rtx rax;
13793
13794 rax = gen_rtx_REG (word_mode, AX_REG);
13795 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13796 use_reg (&call_fusage, rax);
13797 }
13798
13799 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13800 && !TARGET_PECOFF)
13801 {
13802 HOST_WIDE_INT argval;
13803
13804 gcc_assert (Pmode == DImode);
13805 /* When using the large model we need to load the address
13806 into a register, and we've run out of registers. So we
13807 switch to a different calling convention, and we call a
13808 different function: __morestack_large. We pass the
13809 argument size in the upper 32 bits of r10 and pass the
13810 frame size in the lower 32 bits. */
13811 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13812 gcc_assert ((args_size & 0xffffffff) == args_size);
13813
13814 if (split_stack_fn_large == NULL_RTX)
13815 {
13816 split_stack_fn_large =
13817 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13818 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13819 }
13820 if (ix86_cmodel == CM_LARGE_PIC)
13821 {
13822 rtx_code_label *label;
13823 rtx x;
13824
13825 label = gen_label_rtx ();
13826 emit_label (label);
13827 LABEL_PRESERVE_P (label) = 1;
13828 emit_insn (gen_set_rip_rex64 (reg10, label));
13829 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13830 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13831 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13832 UNSPEC_GOT);
13833 x = gen_rtx_CONST (Pmode, x);
13834 emit_move_insn (reg11, x);
13835 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13836 x = gen_const_mem (Pmode, x);
13837 emit_move_insn (reg11, x);
13838 }
13839 else
13840 emit_move_insn (reg11, split_stack_fn_large);
13841
13842 fn = reg11;
13843
13844 argval = ((args_size << 16) << 16) + allocate;
13845 emit_move_insn (reg10, GEN_INT (argval));
13846 }
13847 else
13848 {
13849 emit_move_insn (reg10, allocate_rtx);
13850 emit_move_insn (reg11, GEN_INT (args_size));
13851 use_reg (&call_fusage, reg11);
13852 }
13853
13854 use_reg (&call_fusage, reg10);
13855 }
13856 else
13857 {
13858 emit_insn (gen_push (GEN_INT (args_size)));
13859 emit_insn (gen_push (allocate_rtx));
13860 }
13861 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13862 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13863 NULL_RTX, false);
13864 add_function_usage_to (call_insn, call_fusage);
13865
13866 /* In order to make call/return prediction work right, we now need
13867 to execute a return instruction. See
13868 libgcc/config/i386/morestack.S for the details on how this works.
13869
13870 For flow purposes gcc must not see this as a return
13871 instruction--we need control flow to continue at the subsequent
13872 label. Therefore, we use an unspec. */
13873 gcc_assert (crtl->args.pops_args < 65536);
13874 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13875
13876 /* If we are in 64-bit mode and this function uses a static chain,
13877 we saved %r10 in %rax before calling _morestack. */
13878 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13879 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13880 gen_rtx_REG (word_mode, AX_REG));
13881
13882 /* If this function calls va_start, we need to store a pointer to
13883 the arguments on the old stack, because they may not have been
13884 all copied to the new stack. At this point the old stack can be
13885 found at the frame pointer value used by __morestack, because
13886 __morestack has set that up before calling back to us. Here we
13887 store that pointer in a scratch register, and in
13888 ix86_expand_prologue we store the scratch register in a stack
13889 slot. */
13890 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13891 {
13892 unsigned int scratch_regno;
13893 rtx frame_reg;
13894 int words;
13895
13896 scratch_regno = split_stack_prologue_scratch_regno ();
13897 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13898 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13899
13900 /* 64-bit:
13901 fp -> old fp value
13902 return address within this function
13903 return address of caller of this function
13904 stack arguments
13905 So we add three words to get to the stack arguments.
13906
13907 32-bit:
13908 fp -> old fp value
13909 return address within this function
13910 first argument to __morestack
13911 second argument to __morestack
13912 return address of caller of this function
13913 stack arguments
13914 So we add five words to get to the stack arguments.
13915 */
13916 words = TARGET_64BIT ? 3 : 5;
13917 emit_insn (gen_rtx_SET (scratch_reg,
13918 gen_rtx_PLUS (Pmode, frame_reg,
13919 GEN_INT (words * UNITS_PER_WORD))));
13920
13921 varargs_label = gen_label_rtx ();
13922 emit_jump_insn (gen_jump (varargs_label));
13923 JUMP_LABEL (get_last_insn ()) = varargs_label;
13924
13925 emit_barrier ();
13926 }
13927
13928 emit_label (label);
13929 LABEL_NUSES (label) = 1;
13930
13931 /* If this function calls va_start, we now have to set the scratch
13932 register for the case where we do not call __morestack. In this
13933 case we need to set it based on the stack pointer. */
13934 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13935 {
13936 emit_insn (gen_rtx_SET (scratch_reg,
13937 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13938 GEN_INT (UNITS_PER_WORD))));
13939
13940 emit_label (varargs_label);
13941 LABEL_NUSES (varargs_label) = 1;
13942 }
13943 }
13944
13945 /* We may have to tell the dataflow pass that the split stack prologue
13946 is initializing a scratch register. */
13947
13948 static void
13949 ix86_live_on_entry (bitmap regs)
13950 {
13951 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13952 {
13953 gcc_assert (flag_split_stack);
13954 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
13955 }
13956 }
13957 \f
13958 /* Extract the parts of an RTL expression that is a valid memory address
13959 for an instruction. Return 0 if the structure of the address is
13960 grossly off. Return -1 if the address contains ASHIFT, so it is not
13961 strictly valid, but still used for computing length of lea instruction. */
13962
13963 int
13964 ix86_decompose_address (rtx addr, struct ix86_address *out)
13965 {
13966 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
13967 rtx base_reg, index_reg;
13968 HOST_WIDE_INT scale = 1;
13969 rtx scale_rtx = NULL_RTX;
13970 rtx tmp;
13971 int retval = 1;
13972 enum ix86_address_seg seg = SEG_DEFAULT;
13973
13974 /* Allow zero-extended SImode addresses,
13975 they will be emitted with addr32 prefix. */
13976 if (TARGET_64BIT && GET_MODE (addr) == DImode)
13977 {
13978 if (GET_CODE (addr) == ZERO_EXTEND
13979 && GET_MODE (XEXP (addr, 0)) == SImode)
13980 {
13981 addr = XEXP (addr, 0);
13982 if (CONST_INT_P (addr))
13983 return 0;
13984 }
13985 else if (GET_CODE (addr) == AND
13986 && const_32bit_mask (XEXP (addr, 1), DImode))
13987 {
13988 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
13989 if (addr == NULL_RTX)
13990 return 0;
13991
13992 if (CONST_INT_P (addr))
13993 return 0;
13994 }
13995 }
13996
13997 /* Allow SImode subregs of DImode addresses,
13998 they will be emitted with addr32 prefix. */
13999 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14000 {
14001 if (SUBREG_P (addr)
14002 && GET_MODE (SUBREG_REG (addr)) == DImode)
14003 {
14004 addr = SUBREG_REG (addr);
14005 if (CONST_INT_P (addr))
14006 return 0;
14007 }
14008 }
14009
14010 if (REG_P (addr))
14011 base = addr;
14012 else if (SUBREG_P (addr))
14013 {
14014 if (REG_P (SUBREG_REG (addr)))
14015 base = addr;
14016 else
14017 return 0;
14018 }
14019 else if (GET_CODE (addr) == PLUS)
14020 {
14021 rtx addends[4], op;
14022 int n = 0, i;
14023
14024 op = addr;
14025 do
14026 {
14027 if (n >= 4)
14028 return 0;
14029 addends[n++] = XEXP (op, 1);
14030 op = XEXP (op, 0);
14031 }
14032 while (GET_CODE (op) == PLUS);
14033 if (n >= 4)
14034 return 0;
14035 addends[n] = op;
14036
14037 for (i = n; i >= 0; --i)
14038 {
14039 op = addends[i];
14040 switch (GET_CODE (op))
14041 {
14042 case MULT:
14043 if (index)
14044 return 0;
14045 index = XEXP (op, 0);
14046 scale_rtx = XEXP (op, 1);
14047 break;
14048
14049 case ASHIFT:
14050 if (index)
14051 return 0;
14052 index = XEXP (op, 0);
14053 tmp = XEXP (op, 1);
14054 if (!CONST_INT_P (tmp))
14055 return 0;
14056 scale = INTVAL (tmp);
14057 if ((unsigned HOST_WIDE_INT) scale > 3)
14058 return 0;
14059 scale = 1 << scale;
14060 break;
14061
14062 case ZERO_EXTEND:
14063 op = XEXP (op, 0);
14064 if (GET_CODE (op) != UNSPEC)
14065 return 0;
14066 /* FALLTHRU */
14067
14068 case UNSPEC:
14069 if (XINT (op, 1) == UNSPEC_TP
14070 && TARGET_TLS_DIRECT_SEG_REFS
14071 && seg == SEG_DEFAULT)
14072 seg = DEFAULT_TLS_SEG_REG;
14073 else
14074 return 0;
14075 break;
14076
14077 case SUBREG:
14078 if (!REG_P (SUBREG_REG (op)))
14079 return 0;
14080 /* FALLTHRU */
14081
14082 case REG:
14083 if (!base)
14084 base = op;
14085 else if (!index)
14086 index = op;
14087 else
14088 return 0;
14089 break;
14090
14091 case CONST:
14092 case CONST_INT:
14093 case SYMBOL_REF:
14094 case LABEL_REF:
14095 if (disp)
14096 return 0;
14097 disp = op;
14098 break;
14099
14100 default:
14101 return 0;
14102 }
14103 }
14104 }
14105 else if (GET_CODE (addr) == MULT)
14106 {
14107 index = XEXP (addr, 0); /* index*scale */
14108 scale_rtx = XEXP (addr, 1);
14109 }
14110 else if (GET_CODE (addr) == ASHIFT)
14111 {
14112 /* We're called for lea too, which implements ashift on occasion. */
14113 index = XEXP (addr, 0);
14114 tmp = XEXP (addr, 1);
14115 if (!CONST_INT_P (tmp))
14116 return 0;
14117 scale = INTVAL (tmp);
14118 if ((unsigned HOST_WIDE_INT) scale > 3)
14119 return 0;
14120 scale = 1 << scale;
14121 retval = -1;
14122 }
14123 else
14124 disp = addr; /* displacement */
14125
14126 if (index)
14127 {
14128 if (REG_P (index))
14129 ;
14130 else if (SUBREG_P (index)
14131 && REG_P (SUBREG_REG (index)))
14132 ;
14133 else
14134 return 0;
14135 }
14136
14137 /* Extract the integral value of scale. */
14138 if (scale_rtx)
14139 {
14140 if (!CONST_INT_P (scale_rtx))
14141 return 0;
14142 scale = INTVAL (scale_rtx);
14143 }
14144
14145 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14146 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14147
14148 /* Avoid useless 0 displacement. */
14149 if (disp == const0_rtx && (base || index))
14150 disp = NULL_RTX;
14151
14152 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14153 if (base_reg && index_reg && scale == 1
14154 && (index_reg == arg_pointer_rtx
14155 || index_reg == frame_pointer_rtx
14156 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14157 {
14158 std::swap (base, index);
14159 std::swap (base_reg, index_reg);
14160 }
14161
14162 /* Special case: %ebp cannot be encoded as a base without a displacement.
14163 Similarly %r13. */
14164 if (!disp
14165 && base_reg
14166 && (base_reg == hard_frame_pointer_rtx
14167 || base_reg == frame_pointer_rtx
14168 || base_reg == arg_pointer_rtx
14169 || (REG_P (base_reg)
14170 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14171 || REGNO (base_reg) == R13_REG))))
14172 disp = const0_rtx;
14173
14174 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14175 Avoid this by transforming to [%esi+0].
14176 Reload calls address legitimization without cfun defined, so we need
14177 to test cfun for being non-NULL. */
14178 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14179 && base_reg && !index_reg && !disp
14180 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14181 disp = const0_rtx;
14182
14183 /* Special case: encode reg+reg instead of reg*2. */
14184 if (!base && index && scale == 2)
14185 base = index, base_reg = index_reg, scale = 1;
14186
14187 /* Special case: scaling cannot be encoded without base or displacement. */
14188 if (!base && !disp && index && scale != 1)
14189 disp = const0_rtx;
14190
14191 out->base = base;
14192 out->index = index;
14193 out->disp = disp;
14194 out->scale = scale;
14195 out->seg = seg;
14196
14197 return retval;
14198 }
14199 \f
14200 /* Return cost of the memory address x.
14201 For i386, it is better to use a complex address than let gcc copy
14202 the address into a reg and make a new pseudo. But not if the address
14203 requires to two regs - that would mean more pseudos with longer
14204 lifetimes. */
14205 static int
14206 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14207 {
14208 struct ix86_address parts;
14209 int cost = 1;
14210 int ok = ix86_decompose_address (x, &parts);
14211
14212 gcc_assert (ok);
14213
14214 if (parts.base && SUBREG_P (parts.base))
14215 parts.base = SUBREG_REG (parts.base);
14216 if (parts.index && SUBREG_P (parts.index))
14217 parts.index = SUBREG_REG (parts.index);
14218
14219 /* Attempt to minimize number of registers in the address by increasing
14220 address cost for each used register. We don't increase address cost
14221 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14222 is not invariant itself it most likely means that base or index is not
14223 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14224 which is not profitable for x86. */
14225 if (parts.base
14226 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14227 && (current_pass->type == GIMPLE_PASS
14228 || !pic_offset_table_rtx
14229 || !REG_P (parts.base)
14230 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14231 cost++;
14232
14233 if (parts.index
14234 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14235 && (current_pass->type == GIMPLE_PASS
14236 || !pic_offset_table_rtx
14237 || !REG_P (parts.index)
14238 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14239 cost++;
14240
14241 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14242 since it's predecode logic can't detect the length of instructions
14243 and it degenerates to vector decoded. Increase cost of such
14244 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14245 to split such addresses or even refuse such addresses at all.
14246
14247 Following addressing modes are affected:
14248 [base+scale*index]
14249 [scale*index+disp]
14250 [base+index]
14251
14252 The first and last case may be avoidable by explicitly coding the zero in
14253 memory address, but I don't have AMD-K6 machine handy to check this
14254 theory. */
14255
14256 if (TARGET_K6
14257 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14258 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14259 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14260 cost += 10;
14261
14262 return cost;
14263 }
14264 \f
14265 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14266 this is used for to form addresses to local data when -fPIC is in
14267 use. */
14268
14269 static bool
14270 darwin_local_data_pic (rtx disp)
14271 {
14272 return (GET_CODE (disp) == UNSPEC
14273 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14274 }
14275
14276 /* Determine if a given RTX is a valid constant. We already know this
14277 satisfies CONSTANT_P. */
14278
14279 static bool
14280 ix86_legitimate_constant_p (machine_mode, rtx x)
14281 {
14282 /* Pointer bounds constants are not valid. */
14283 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14284 return false;
14285
14286 switch (GET_CODE (x))
14287 {
14288 case CONST:
14289 x = XEXP (x, 0);
14290
14291 if (GET_CODE (x) == PLUS)
14292 {
14293 if (!CONST_INT_P (XEXP (x, 1)))
14294 return false;
14295 x = XEXP (x, 0);
14296 }
14297
14298 if (TARGET_MACHO && darwin_local_data_pic (x))
14299 return true;
14300
14301 /* Only some unspecs are valid as "constants". */
14302 if (GET_CODE (x) == UNSPEC)
14303 switch (XINT (x, 1))
14304 {
14305 case UNSPEC_GOT:
14306 case UNSPEC_GOTOFF:
14307 case UNSPEC_PLTOFF:
14308 return TARGET_64BIT;
14309 case UNSPEC_TPOFF:
14310 case UNSPEC_NTPOFF:
14311 x = XVECEXP (x, 0, 0);
14312 return (GET_CODE (x) == SYMBOL_REF
14313 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14314 case UNSPEC_DTPOFF:
14315 x = XVECEXP (x, 0, 0);
14316 return (GET_CODE (x) == SYMBOL_REF
14317 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14318 default:
14319 return false;
14320 }
14321
14322 /* We must have drilled down to a symbol. */
14323 if (GET_CODE (x) == LABEL_REF)
14324 return true;
14325 if (GET_CODE (x) != SYMBOL_REF)
14326 return false;
14327 /* FALLTHRU */
14328
14329 case SYMBOL_REF:
14330 /* TLS symbols are never valid. */
14331 if (SYMBOL_REF_TLS_MODEL (x))
14332 return false;
14333
14334 /* DLLIMPORT symbols are never valid. */
14335 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14336 && SYMBOL_REF_DLLIMPORT_P (x))
14337 return false;
14338
14339 #if TARGET_MACHO
14340 /* mdynamic-no-pic */
14341 if (MACHO_DYNAMIC_NO_PIC_P)
14342 return machopic_symbol_defined_p (x);
14343 #endif
14344 break;
14345
14346 case CONST_WIDE_INT:
14347 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14348 return false;
14349 break;
14350
14351 case CONST_VECTOR:
14352 if (!standard_sse_constant_p (x))
14353 return false;
14354
14355 default:
14356 break;
14357 }
14358
14359 /* Otherwise we handle everything else in the move patterns. */
14360 return true;
14361 }
14362
14363 /* Determine if it's legal to put X into the constant pool. This
14364 is not possible for the address of thread-local symbols, which
14365 is checked above. */
14366
14367 static bool
14368 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14369 {
14370 /* We can always put integral constants and vectors in memory. */
14371 switch (GET_CODE (x))
14372 {
14373 case CONST_INT:
14374 case CONST_WIDE_INT:
14375 case CONST_DOUBLE:
14376 case CONST_VECTOR:
14377 return false;
14378
14379 default:
14380 break;
14381 }
14382 return !ix86_legitimate_constant_p (mode, x);
14383 }
14384
14385 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14386 otherwise zero. */
14387
14388 static bool
14389 is_imported_p (rtx x)
14390 {
14391 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14392 || GET_CODE (x) != SYMBOL_REF)
14393 return false;
14394
14395 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14396 }
14397
14398
14399 /* Nonzero if the constant value X is a legitimate general operand
14400 when generating PIC code. It is given that flag_pic is on and
14401 that X satisfies CONSTANT_P. */
14402
14403 bool
14404 legitimate_pic_operand_p (rtx x)
14405 {
14406 rtx inner;
14407
14408 switch (GET_CODE (x))
14409 {
14410 case CONST:
14411 inner = XEXP (x, 0);
14412 if (GET_CODE (inner) == PLUS
14413 && CONST_INT_P (XEXP (inner, 1)))
14414 inner = XEXP (inner, 0);
14415
14416 /* Only some unspecs are valid as "constants". */
14417 if (GET_CODE (inner) == UNSPEC)
14418 switch (XINT (inner, 1))
14419 {
14420 case UNSPEC_GOT:
14421 case UNSPEC_GOTOFF:
14422 case UNSPEC_PLTOFF:
14423 return TARGET_64BIT;
14424 case UNSPEC_TPOFF:
14425 x = XVECEXP (inner, 0, 0);
14426 return (GET_CODE (x) == SYMBOL_REF
14427 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14428 case UNSPEC_MACHOPIC_OFFSET:
14429 return legitimate_pic_address_disp_p (x);
14430 default:
14431 return false;
14432 }
14433 /* FALLTHRU */
14434
14435 case SYMBOL_REF:
14436 case LABEL_REF:
14437 return legitimate_pic_address_disp_p (x);
14438
14439 default:
14440 return true;
14441 }
14442 }
14443
14444 /* Determine if a given CONST RTX is a valid memory displacement
14445 in PIC mode. */
14446
14447 bool
14448 legitimate_pic_address_disp_p (rtx disp)
14449 {
14450 bool saw_plus;
14451
14452 /* In 64bit mode we can allow direct addresses of symbols and labels
14453 when they are not dynamic symbols. */
14454 if (TARGET_64BIT)
14455 {
14456 rtx op0 = disp, op1;
14457
14458 switch (GET_CODE (disp))
14459 {
14460 case LABEL_REF:
14461 return true;
14462
14463 case CONST:
14464 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14465 break;
14466 op0 = XEXP (XEXP (disp, 0), 0);
14467 op1 = XEXP (XEXP (disp, 0), 1);
14468 if (!CONST_INT_P (op1)
14469 || INTVAL (op1) >= 16*1024*1024
14470 || INTVAL (op1) < -16*1024*1024)
14471 break;
14472 if (GET_CODE (op0) == LABEL_REF)
14473 return true;
14474 if (GET_CODE (op0) == CONST
14475 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14476 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14477 return true;
14478 if (GET_CODE (op0) == UNSPEC
14479 && XINT (op0, 1) == UNSPEC_PCREL)
14480 return true;
14481 if (GET_CODE (op0) != SYMBOL_REF)
14482 break;
14483 /* FALLTHRU */
14484
14485 case SYMBOL_REF:
14486 /* TLS references should always be enclosed in UNSPEC.
14487 The dllimported symbol needs always to be resolved. */
14488 if (SYMBOL_REF_TLS_MODEL (op0)
14489 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14490 return false;
14491
14492 if (TARGET_PECOFF)
14493 {
14494 if (is_imported_p (op0))
14495 return true;
14496
14497 if (SYMBOL_REF_FAR_ADDR_P (op0)
14498 || !SYMBOL_REF_LOCAL_P (op0))
14499 break;
14500
14501 /* Function-symbols need to be resolved only for
14502 large-model.
14503 For the small-model we don't need to resolve anything
14504 here. */
14505 if ((ix86_cmodel != CM_LARGE_PIC
14506 && SYMBOL_REF_FUNCTION_P (op0))
14507 || ix86_cmodel == CM_SMALL_PIC)
14508 return true;
14509 /* Non-external symbols don't need to be resolved for
14510 large, and medium-model. */
14511 if ((ix86_cmodel == CM_LARGE_PIC
14512 || ix86_cmodel == CM_MEDIUM_PIC)
14513 && !SYMBOL_REF_EXTERNAL_P (op0))
14514 return true;
14515 }
14516 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14517 && (SYMBOL_REF_LOCAL_P (op0)
14518 || (HAVE_LD_PIE_COPYRELOC
14519 && flag_pie
14520 && !SYMBOL_REF_WEAK (op0)
14521 && !SYMBOL_REF_FUNCTION_P (op0)))
14522 && ix86_cmodel != CM_LARGE_PIC)
14523 return true;
14524 break;
14525
14526 default:
14527 break;
14528 }
14529 }
14530 if (GET_CODE (disp) != CONST)
14531 return false;
14532 disp = XEXP (disp, 0);
14533
14534 if (TARGET_64BIT)
14535 {
14536 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14537 of GOT tables. We should not need these anyway. */
14538 if (GET_CODE (disp) != UNSPEC
14539 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14540 && XINT (disp, 1) != UNSPEC_GOTOFF
14541 && XINT (disp, 1) != UNSPEC_PCREL
14542 && XINT (disp, 1) != UNSPEC_PLTOFF))
14543 return false;
14544
14545 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14546 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14547 return false;
14548 return true;
14549 }
14550
14551 saw_plus = false;
14552 if (GET_CODE (disp) == PLUS)
14553 {
14554 if (!CONST_INT_P (XEXP (disp, 1)))
14555 return false;
14556 disp = XEXP (disp, 0);
14557 saw_plus = true;
14558 }
14559
14560 if (TARGET_MACHO && darwin_local_data_pic (disp))
14561 return true;
14562
14563 if (GET_CODE (disp) != UNSPEC)
14564 return false;
14565
14566 switch (XINT (disp, 1))
14567 {
14568 case UNSPEC_GOT:
14569 if (saw_plus)
14570 return false;
14571 /* We need to check for both symbols and labels because VxWorks loads
14572 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14573 details. */
14574 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14575 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14576 case UNSPEC_GOTOFF:
14577 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14578 While ABI specify also 32bit relocation but we don't produce it in
14579 small PIC model at all. */
14580 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14581 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14582 && !TARGET_64BIT)
14583 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14584 return false;
14585 case UNSPEC_GOTTPOFF:
14586 case UNSPEC_GOTNTPOFF:
14587 case UNSPEC_INDNTPOFF:
14588 if (saw_plus)
14589 return false;
14590 disp = XVECEXP (disp, 0, 0);
14591 return (GET_CODE (disp) == SYMBOL_REF
14592 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14593 case UNSPEC_NTPOFF:
14594 disp = XVECEXP (disp, 0, 0);
14595 return (GET_CODE (disp) == SYMBOL_REF
14596 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14597 case UNSPEC_DTPOFF:
14598 disp = XVECEXP (disp, 0, 0);
14599 return (GET_CODE (disp) == SYMBOL_REF
14600 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14601 }
14602
14603 return false;
14604 }
14605
14606 /* Determine if op is suitable RTX for an address register.
14607 Return naked register if a register or a register subreg is
14608 found, otherwise return NULL_RTX. */
14609
14610 static rtx
14611 ix86_validate_address_register (rtx op)
14612 {
14613 machine_mode mode = GET_MODE (op);
14614
14615 /* Only SImode or DImode registers can form the address. */
14616 if (mode != SImode && mode != DImode)
14617 return NULL_RTX;
14618
14619 if (REG_P (op))
14620 return op;
14621 else if (SUBREG_P (op))
14622 {
14623 rtx reg = SUBREG_REG (op);
14624
14625 if (!REG_P (reg))
14626 return NULL_RTX;
14627
14628 mode = GET_MODE (reg);
14629
14630 /* Don't allow SUBREGs that span more than a word. It can
14631 lead to spill failures when the register is one word out
14632 of a two word structure. */
14633 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14634 return NULL_RTX;
14635
14636 /* Allow only SUBREGs of non-eliminable hard registers. */
14637 if (register_no_elim_operand (reg, mode))
14638 return reg;
14639 }
14640
14641 /* Op is not a register. */
14642 return NULL_RTX;
14643 }
14644
14645 /* Recognizes RTL expressions that are valid memory addresses for an
14646 instruction. The MODE argument is the machine mode for the MEM
14647 expression that wants to use this address.
14648
14649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14650 convert common non-canonical forms to canonical form so that they will
14651 be recognized. */
14652
14653 static bool
14654 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14655 {
14656 struct ix86_address parts;
14657 rtx base, index, disp;
14658 HOST_WIDE_INT scale;
14659 enum ix86_address_seg seg;
14660
14661 if (ix86_decompose_address (addr, &parts) <= 0)
14662 /* Decomposition failed. */
14663 return false;
14664
14665 base = parts.base;
14666 index = parts.index;
14667 disp = parts.disp;
14668 scale = parts.scale;
14669 seg = parts.seg;
14670
14671 /* Validate base register. */
14672 if (base)
14673 {
14674 rtx reg = ix86_validate_address_register (base);
14675
14676 if (reg == NULL_RTX)
14677 return false;
14678
14679 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14680 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14681 /* Base is not valid. */
14682 return false;
14683 }
14684
14685 /* Validate index register. */
14686 if (index)
14687 {
14688 rtx reg = ix86_validate_address_register (index);
14689
14690 if (reg == NULL_RTX)
14691 return false;
14692
14693 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14694 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14695 /* Index is not valid. */
14696 return false;
14697 }
14698
14699 /* Index and base should have the same mode. */
14700 if (base && index
14701 && GET_MODE (base) != GET_MODE (index))
14702 return false;
14703
14704 /* Address override works only on the (%reg) part of %fs:(%reg). */
14705 if (seg != SEG_DEFAULT
14706 && ((base && GET_MODE (base) != word_mode)
14707 || (index && GET_MODE (index) != word_mode)))
14708 return false;
14709
14710 /* Validate scale factor. */
14711 if (scale != 1)
14712 {
14713 if (!index)
14714 /* Scale without index. */
14715 return false;
14716
14717 if (scale != 2 && scale != 4 && scale != 8)
14718 /* Scale is not a valid multiplier. */
14719 return false;
14720 }
14721
14722 /* Validate displacement. */
14723 if (disp)
14724 {
14725 if (GET_CODE (disp) == CONST
14726 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14727 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14728 switch (XINT (XEXP (disp, 0), 1))
14729 {
14730 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14731 used. While ABI specify also 32bit relocations, we don't produce
14732 them at all and use IP relative instead. */
14733 case UNSPEC_GOT:
14734 case UNSPEC_GOTOFF:
14735 gcc_assert (flag_pic);
14736 if (!TARGET_64BIT)
14737 goto is_legitimate_pic;
14738
14739 /* 64bit address unspec. */
14740 return false;
14741
14742 case UNSPEC_GOTPCREL:
14743 case UNSPEC_PCREL:
14744 gcc_assert (flag_pic);
14745 goto is_legitimate_pic;
14746
14747 case UNSPEC_GOTTPOFF:
14748 case UNSPEC_GOTNTPOFF:
14749 case UNSPEC_INDNTPOFF:
14750 case UNSPEC_NTPOFF:
14751 case UNSPEC_DTPOFF:
14752 break;
14753
14754 case UNSPEC_STACK_CHECK:
14755 gcc_assert (flag_split_stack);
14756 break;
14757
14758 default:
14759 /* Invalid address unspec. */
14760 return false;
14761 }
14762
14763 else if (SYMBOLIC_CONST (disp)
14764 && (flag_pic
14765 || (TARGET_MACHO
14766 #if TARGET_MACHO
14767 && MACHOPIC_INDIRECT
14768 && !machopic_operand_p (disp)
14769 #endif
14770 )))
14771 {
14772
14773 is_legitimate_pic:
14774 if (TARGET_64BIT && (index || base))
14775 {
14776 /* foo@dtpoff(%rX) is ok. */
14777 if (GET_CODE (disp) != CONST
14778 || GET_CODE (XEXP (disp, 0)) != PLUS
14779 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14780 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14781 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14782 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14783 /* Non-constant pic memory reference. */
14784 return false;
14785 }
14786 else if ((!TARGET_MACHO || flag_pic)
14787 && ! legitimate_pic_address_disp_p (disp))
14788 /* Displacement is an invalid pic construct. */
14789 return false;
14790 #if TARGET_MACHO
14791 else if (MACHO_DYNAMIC_NO_PIC_P
14792 && !ix86_legitimate_constant_p (Pmode, disp))
14793 /* displacment must be referenced via non_lazy_pointer */
14794 return false;
14795 #endif
14796
14797 /* This code used to verify that a symbolic pic displacement
14798 includes the pic_offset_table_rtx register.
14799
14800 While this is good idea, unfortunately these constructs may
14801 be created by "adds using lea" optimization for incorrect
14802 code like:
14803
14804 int a;
14805 int foo(int i)
14806 {
14807 return *(&a+i);
14808 }
14809
14810 This code is nonsensical, but results in addressing
14811 GOT table with pic_offset_table_rtx base. We can't
14812 just refuse it easily, since it gets matched by
14813 "addsi3" pattern, that later gets split to lea in the
14814 case output register differs from input. While this
14815 can be handled by separate addsi pattern for this case
14816 that never results in lea, this seems to be easier and
14817 correct fix for crash to disable this test. */
14818 }
14819 else if (GET_CODE (disp) != LABEL_REF
14820 && !CONST_INT_P (disp)
14821 && (GET_CODE (disp) != CONST
14822 || !ix86_legitimate_constant_p (Pmode, disp))
14823 && (GET_CODE (disp) != SYMBOL_REF
14824 || !ix86_legitimate_constant_p (Pmode, disp)))
14825 /* Displacement is not constant. */
14826 return false;
14827 else if (TARGET_64BIT
14828 && !x86_64_immediate_operand (disp, VOIDmode))
14829 /* Displacement is out of range. */
14830 return false;
14831 /* In x32 mode, constant addresses are sign extended to 64bit, so
14832 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14833 else if (TARGET_X32 && !(index || base)
14834 && CONST_INT_P (disp)
14835 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14836 return false;
14837 }
14838
14839 /* Everything looks valid. */
14840 return true;
14841 }
14842
14843 /* Determine if a given RTX is a valid constant address. */
14844
14845 bool
14846 constant_address_p (rtx x)
14847 {
14848 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14849 }
14850 \f
14851 /* Return a unique alias set for the GOT. */
14852
14853 static alias_set_type
14854 ix86_GOT_alias_set (void)
14855 {
14856 static alias_set_type set = -1;
14857 if (set == -1)
14858 set = new_alias_set ();
14859 return set;
14860 }
14861
14862 /* Return a legitimate reference for ORIG (an address) using the
14863 register REG. If REG is 0, a new pseudo is generated.
14864
14865 There are two types of references that must be handled:
14866
14867 1. Global data references must load the address from the GOT, via
14868 the PIC reg. An insn is emitted to do this load, and the reg is
14869 returned.
14870
14871 2. Static data references, constant pool addresses, and code labels
14872 compute the address as an offset from the GOT, whose base is in
14873 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14874 differentiate them from global data objects. The returned
14875 address is the PIC reg + an unspec constant.
14876
14877 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14878 reg also appears in the address. */
14879
14880 static rtx
14881 legitimize_pic_address (rtx orig, rtx reg)
14882 {
14883 rtx addr = orig;
14884 rtx new_rtx = orig;
14885
14886 #if TARGET_MACHO
14887 if (TARGET_MACHO && !TARGET_64BIT)
14888 {
14889 if (reg == 0)
14890 reg = gen_reg_rtx (Pmode);
14891 /* Use the generic Mach-O PIC machinery. */
14892 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14893 }
14894 #endif
14895
14896 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14897 {
14898 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14899 if (tmp)
14900 return tmp;
14901 }
14902
14903 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
14904 new_rtx = addr;
14905 else if (TARGET_64BIT && !TARGET_PECOFF
14906 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
14907 {
14908 rtx tmpreg;
14909 /* This symbol may be referenced via a displacement from the PIC
14910 base address (@GOTOFF). */
14911
14912 if (GET_CODE (addr) == CONST)
14913 addr = XEXP (addr, 0);
14914 if (GET_CODE (addr) == PLUS)
14915 {
14916 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14917 UNSPEC_GOTOFF);
14918 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
14919 }
14920 else
14921 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14922 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14923 if (!reg)
14924 tmpreg = gen_reg_rtx (Pmode);
14925 else
14926 tmpreg = reg;
14927 emit_move_insn (tmpreg, new_rtx);
14928
14929 if (reg != 0)
14930 {
14931 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
14932 tmpreg, 1, OPTAB_DIRECT);
14933 new_rtx = reg;
14934 }
14935 else
14936 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
14937 }
14938 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
14939 {
14940 /* This symbol may be referenced via a displacement from the PIC
14941 base address (@GOTOFF). */
14942
14943 if (GET_CODE (addr) == CONST)
14944 addr = XEXP (addr, 0);
14945 if (GET_CODE (addr) == PLUS)
14946 {
14947 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14948 UNSPEC_GOTOFF);
14949 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
14950 }
14951 else
14952 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14953 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14954 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
14955
14956 if (reg != 0)
14957 {
14958 emit_move_insn (reg, new_rtx);
14959 new_rtx = reg;
14960 }
14961 }
14962 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
14963 /* We can't use @GOTOFF for text labels on VxWorks;
14964 see gotoff_operand. */
14965 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
14966 {
14967 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14968 if (tmp)
14969 return tmp;
14970
14971 /* For x64 PE-COFF there is no GOT table. So we use address
14972 directly. */
14973 if (TARGET_64BIT && TARGET_PECOFF)
14974 {
14975 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
14976 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14977
14978 if (reg == 0)
14979 reg = gen_reg_rtx (Pmode);
14980 emit_move_insn (reg, new_rtx);
14981 new_rtx = reg;
14982 }
14983 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
14984 {
14985 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14986 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14987 new_rtx = gen_const_mem (Pmode, new_rtx);
14988 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
14989
14990 if (reg == 0)
14991 reg = gen_reg_rtx (Pmode);
14992 /* Use directly gen_movsi, otherwise the address is loaded
14993 into register for CSE. We don't want to CSE this addresses,
14994 instead we CSE addresses from the GOT table, so skip this. */
14995 emit_insn (gen_movsi (reg, new_rtx));
14996 new_rtx = reg;
14997 }
14998 else
14999 {
15000 /* This symbol must be referenced via a load from the
15001 Global Offset Table (@GOT). */
15002
15003 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15004 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15005 if (TARGET_64BIT)
15006 new_rtx = force_reg (Pmode, new_rtx);
15007 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15008 new_rtx = gen_const_mem (Pmode, new_rtx);
15009 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15010
15011 if (reg == 0)
15012 reg = gen_reg_rtx (Pmode);
15013 emit_move_insn (reg, new_rtx);
15014 new_rtx = reg;
15015 }
15016 }
15017 else
15018 {
15019 if (CONST_INT_P (addr)
15020 && !x86_64_immediate_operand (addr, VOIDmode))
15021 {
15022 if (reg)
15023 {
15024 emit_move_insn (reg, addr);
15025 new_rtx = reg;
15026 }
15027 else
15028 new_rtx = force_reg (Pmode, addr);
15029 }
15030 else if (GET_CODE (addr) == CONST)
15031 {
15032 addr = XEXP (addr, 0);
15033
15034 /* We must match stuff we generate before. Assume the only
15035 unspecs that can get here are ours. Not that we could do
15036 anything with them anyway.... */
15037 if (GET_CODE (addr) == UNSPEC
15038 || (GET_CODE (addr) == PLUS
15039 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15040 return orig;
15041 gcc_assert (GET_CODE (addr) == PLUS);
15042 }
15043 if (GET_CODE (addr) == PLUS)
15044 {
15045 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15046
15047 /* Check first to see if this is a constant offset from a @GOTOFF
15048 symbol reference. */
15049 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15050 && CONST_INT_P (op1))
15051 {
15052 if (!TARGET_64BIT)
15053 {
15054 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15055 UNSPEC_GOTOFF);
15056 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15057 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15058 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15059
15060 if (reg != 0)
15061 {
15062 emit_move_insn (reg, new_rtx);
15063 new_rtx = reg;
15064 }
15065 }
15066 else
15067 {
15068 if (INTVAL (op1) < -16*1024*1024
15069 || INTVAL (op1) >= 16*1024*1024)
15070 {
15071 if (!x86_64_immediate_operand (op1, Pmode))
15072 op1 = force_reg (Pmode, op1);
15073 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15074 }
15075 }
15076 }
15077 else
15078 {
15079 rtx base = legitimize_pic_address (op0, reg);
15080 machine_mode mode = GET_MODE (base);
15081 new_rtx
15082 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15083
15084 if (CONST_INT_P (new_rtx))
15085 {
15086 if (INTVAL (new_rtx) < -16*1024*1024
15087 || INTVAL (new_rtx) >= 16*1024*1024)
15088 {
15089 if (!x86_64_immediate_operand (new_rtx, mode))
15090 new_rtx = force_reg (mode, new_rtx);
15091 new_rtx
15092 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15093 }
15094 else
15095 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15096 }
15097 else
15098 {
15099 /* For %rip addressing, we have to use just disp32, not
15100 base nor index. */
15101 if (TARGET_64BIT
15102 && (GET_CODE (base) == SYMBOL_REF
15103 || GET_CODE (base) == LABEL_REF))
15104 base = force_reg (mode, base);
15105 if (GET_CODE (new_rtx) == PLUS
15106 && CONSTANT_P (XEXP (new_rtx, 1)))
15107 {
15108 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15109 new_rtx = XEXP (new_rtx, 1);
15110 }
15111 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15112 }
15113 }
15114 }
15115 }
15116 return new_rtx;
15117 }
15118 \f
15119 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15120
15121 static rtx
15122 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15123 {
15124 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15125
15126 if (GET_MODE (tp) != tp_mode)
15127 {
15128 gcc_assert (GET_MODE (tp) == SImode);
15129 gcc_assert (tp_mode == DImode);
15130
15131 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15132 }
15133
15134 if (to_reg)
15135 tp = copy_to_mode_reg (tp_mode, tp);
15136
15137 return tp;
15138 }
15139
15140 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15141
15142 static GTY(()) rtx ix86_tls_symbol;
15143
15144 static rtx
15145 ix86_tls_get_addr (void)
15146 {
15147 if (!ix86_tls_symbol)
15148 {
15149 const char *sym
15150 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15151 ? "___tls_get_addr" : "__tls_get_addr");
15152
15153 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15154 }
15155
15156 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15157 {
15158 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15159 UNSPEC_PLTOFF);
15160 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15161 gen_rtx_CONST (Pmode, unspec));
15162 }
15163
15164 return ix86_tls_symbol;
15165 }
15166
15167 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15168
15169 static GTY(()) rtx ix86_tls_module_base_symbol;
15170
15171 rtx
15172 ix86_tls_module_base (void)
15173 {
15174 if (!ix86_tls_module_base_symbol)
15175 {
15176 ix86_tls_module_base_symbol
15177 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15178
15179 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15180 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15181 }
15182
15183 return ix86_tls_module_base_symbol;
15184 }
15185
15186 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15187 false if we expect this to be used for a memory address and true if
15188 we expect to load the address into a register. */
15189
15190 static rtx
15191 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15192 {
15193 rtx dest, base, off;
15194 rtx pic = NULL_RTX, tp = NULL_RTX;
15195 machine_mode tp_mode = Pmode;
15196 int type;
15197
15198 /* Fall back to global dynamic model if tool chain cannot support local
15199 dynamic. */
15200 if (TARGET_SUN_TLS && !TARGET_64BIT
15201 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15202 && model == TLS_MODEL_LOCAL_DYNAMIC)
15203 model = TLS_MODEL_GLOBAL_DYNAMIC;
15204
15205 switch (model)
15206 {
15207 case TLS_MODEL_GLOBAL_DYNAMIC:
15208 dest = gen_reg_rtx (Pmode);
15209
15210 if (!TARGET_64BIT)
15211 {
15212 if (flag_pic && !TARGET_PECOFF)
15213 pic = pic_offset_table_rtx;
15214 else
15215 {
15216 pic = gen_reg_rtx (Pmode);
15217 emit_insn (gen_set_got (pic));
15218 }
15219 }
15220
15221 if (TARGET_GNU2_TLS)
15222 {
15223 if (TARGET_64BIT)
15224 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15225 else
15226 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15227
15228 tp = get_thread_pointer (Pmode, true);
15229 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15230
15231 if (GET_MODE (x) != Pmode)
15232 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15233
15234 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15235 }
15236 else
15237 {
15238 rtx caddr = ix86_tls_get_addr ();
15239
15240 if (TARGET_64BIT)
15241 {
15242 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15243 rtx_insn *insns;
15244
15245 start_sequence ();
15246 emit_call_insn
15247 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15248 insns = get_insns ();
15249 end_sequence ();
15250
15251 if (GET_MODE (x) != Pmode)
15252 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15253
15254 RTL_CONST_CALL_P (insns) = 1;
15255 emit_libcall_block (insns, dest, rax, x);
15256 }
15257 else
15258 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15259 }
15260 break;
15261
15262 case TLS_MODEL_LOCAL_DYNAMIC:
15263 base = gen_reg_rtx (Pmode);
15264
15265 if (!TARGET_64BIT)
15266 {
15267 if (flag_pic)
15268 pic = pic_offset_table_rtx;
15269 else
15270 {
15271 pic = gen_reg_rtx (Pmode);
15272 emit_insn (gen_set_got (pic));
15273 }
15274 }
15275
15276 if (TARGET_GNU2_TLS)
15277 {
15278 rtx tmp = ix86_tls_module_base ();
15279
15280 if (TARGET_64BIT)
15281 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15282 else
15283 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15284
15285 tp = get_thread_pointer (Pmode, true);
15286 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15287 gen_rtx_MINUS (Pmode, tmp, tp));
15288 }
15289 else
15290 {
15291 rtx caddr = ix86_tls_get_addr ();
15292
15293 if (TARGET_64BIT)
15294 {
15295 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15296 rtx_insn *insns;
15297 rtx eqv;
15298
15299 start_sequence ();
15300 emit_call_insn
15301 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15302 insns = get_insns ();
15303 end_sequence ();
15304
15305 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15306 share the LD_BASE result with other LD model accesses. */
15307 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15308 UNSPEC_TLS_LD_BASE);
15309
15310 RTL_CONST_CALL_P (insns) = 1;
15311 emit_libcall_block (insns, base, rax, eqv);
15312 }
15313 else
15314 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15315 }
15316
15317 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15318 off = gen_rtx_CONST (Pmode, off);
15319
15320 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15321
15322 if (TARGET_GNU2_TLS)
15323 {
15324 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15325
15326 if (GET_MODE (x) != Pmode)
15327 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15328
15329 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15330 }
15331 break;
15332
15333 case TLS_MODEL_INITIAL_EXEC:
15334 if (TARGET_64BIT)
15335 {
15336 if (TARGET_SUN_TLS && !TARGET_X32)
15337 {
15338 /* The Sun linker took the AMD64 TLS spec literally
15339 and can only handle %rax as destination of the
15340 initial executable code sequence. */
15341
15342 dest = gen_reg_rtx (DImode);
15343 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15344 return dest;
15345 }
15346
15347 /* Generate DImode references to avoid %fs:(%reg32)
15348 problems and linker IE->LE relaxation bug. */
15349 tp_mode = DImode;
15350 pic = NULL;
15351 type = UNSPEC_GOTNTPOFF;
15352 }
15353 else if (flag_pic)
15354 {
15355 pic = pic_offset_table_rtx;
15356 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15357 }
15358 else if (!TARGET_ANY_GNU_TLS)
15359 {
15360 pic = gen_reg_rtx (Pmode);
15361 emit_insn (gen_set_got (pic));
15362 type = UNSPEC_GOTTPOFF;
15363 }
15364 else
15365 {
15366 pic = NULL;
15367 type = UNSPEC_INDNTPOFF;
15368 }
15369
15370 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15371 off = gen_rtx_CONST (tp_mode, off);
15372 if (pic)
15373 off = gen_rtx_PLUS (tp_mode, pic, off);
15374 off = gen_const_mem (tp_mode, off);
15375 set_mem_alias_set (off, ix86_GOT_alias_set ());
15376
15377 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15378 {
15379 base = get_thread_pointer (tp_mode,
15380 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15381 off = force_reg (tp_mode, off);
15382 return gen_rtx_PLUS (tp_mode, base, off);
15383 }
15384 else
15385 {
15386 base = get_thread_pointer (Pmode, true);
15387 dest = gen_reg_rtx (Pmode);
15388 emit_insn (ix86_gen_sub3 (dest, base, off));
15389 }
15390 break;
15391
15392 case TLS_MODEL_LOCAL_EXEC:
15393 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15394 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15395 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15396 off = gen_rtx_CONST (Pmode, off);
15397
15398 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15399 {
15400 base = get_thread_pointer (Pmode,
15401 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15402 return gen_rtx_PLUS (Pmode, base, off);
15403 }
15404 else
15405 {
15406 base = get_thread_pointer (Pmode, true);
15407 dest = gen_reg_rtx (Pmode);
15408 emit_insn (ix86_gen_sub3 (dest, base, off));
15409 }
15410 break;
15411
15412 default:
15413 gcc_unreachable ();
15414 }
15415
15416 return dest;
15417 }
15418
15419 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15420 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15421 unique refptr-DECL symbol corresponding to symbol DECL. */
15422
15423 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15424 {
15425 static inline hashval_t hash (tree_map *m) { return m->hash; }
15426 static inline bool
15427 equal (tree_map *a, tree_map *b)
15428 {
15429 return a->base.from == b->base.from;
15430 }
15431
15432 static int
15433 keep_cache_entry (tree_map *&m)
15434 {
15435 return ggc_marked_p (m->base.from);
15436 }
15437 };
15438
15439 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15440
15441 static tree
15442 get_dllimport_decl (tree decl, bool beimport)
15443 {
15444 struct tree_map *h, in;
15445 const char *name;
15446 const char *prefix;
15447 size_t namelen, prefixlen;
15448 char *imp_name;
15449 tree to;
15450 rtx rtl;
15451
15452 if (!dllimport_map)
15453 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15454
15455 in.hash = htab_hash_pointer (decl);
15456 in.base.from = decl;
15457 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15458 h = *loc;
15459 if (h)
15460 return h->to;
15461
15462 *loc = h = ggc_alloc<tree_map> ();
15463 h->hash = in.hash;
15464 h->base.from = decl;
15465 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15466 VAR_DECL, NULL, ptr_type_node);
15467 DECL_ARTIFICIAL (to) = 1;
15468 DECL_IGNORED_P (to) = 1;
15469 DECL_EXTERNAL (to) = 1;
15470 TREE_READONLY (to) = 1;
15471
15472 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15473 name = targetm.strip_name_encoding (name);
15474 if (beimport)
15475 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15476 ? "*__imp_" : "*__imp__";
15477 else
15478 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15479 namelen = strlen (name);
15480 prefixlen = strlen (prefix);
15481 imp_name = (char *) alloca (namelen + prefixlen + 1);
15482 memcpy (imp_name, prefix, prefixlen);
15483 memcpy (imp_name + prefixlen, name, namelen + 1);
15484
15485 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15486 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15487 SET_SYMBOL_REF_DECL (rtl, to);
15488 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15489 if (!beimport)
15490 {
15491 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15492 #ifdef SUB_TARGET_RECORD_STUB
15493 SUB_TARGET_RECORD_STUB (name);
15494 #endif
15495 }
15496
15497 rtl = gen_const_mem (Pmode, rtl);
15498 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15499
15500 SET_DECL_RTL (to, rtl);
15501 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15502
15503 return to;
15504 }
15505
15506 /* Expand SYMBOL into its corresponding far-addresse symbol.
15507 WANT_REG is true if we require the result be a register. */
15508
15509 static rtx
15510 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15511 {
15512 tree imp_decl;
15513 rtx x;
15514
15515 gcc_assert (SYMBOL_REF_DECL (symbol));
15516 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15517
15518 x = DECL_RTL (imp_decl);
15519 if (want_reg)
15520 x = force_reg (Pmode, x);
15521 return x;
15522 }
15523
15524 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15525 true if we require the result be a register. */
15526
15527 static rtx
15528 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15529 {
15530 tree imp_decl;
15531 rtx x;
15532
15533 gcc_assert (SYMBOL_REF_DECL (symbol));
15534 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15535
15536 x = DECL_RTL (imp_decl);
15537 if (want_reg)
15538 x = force_reg (Pmode, x);
15539 return x;
15540 }
15541
15542 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15543 is true if we require the result be a register. */
15544
15545 static rtx
15546 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15547 {
15548 if (!TARGET_PECOFF)
15549 return NULL_RTX;
15550
15551 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15552 {
15553 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15554 return legitimize_dllimport_symbol (addr, inreg);
15555 if (GET_CODE (addr) == CONST
15556 && GET_CODE (XEXP (addr, 0)) == PLUS
15557 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15558 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15559 {
15560 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15561 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15562 }
15563 }
15564
15565 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15566 return NULL_RTX;
15567 if (GET_CODE (addr) == SYMBOL_REF
15568 && !is_imported_p (addr)
15569 && SYMBOL_REF_EXTERNAL_P (addr)
15570 && SYMBOL_REF_DECL (addr))
15571 return legitimize_pe_coff_extern_decl (addr, inreg);
15572
15573 if (GET_CODE (addr) == CONST
15574 && GET_CODE (XEXP (addr, 0)) == PLUS
15575 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15576 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15577 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15578 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15579 {
15580 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15581 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15582 }
15583 return NULL_RTX;
15584 }
15585
15586 /* Try machine-dependent ways of modifying an illegitimate address
15587 to be legitimate. If we find one, return the new, valid address.
15588 This macro is used in only one place: `memory_address' in explow.c.
15589
15590 OLDX is the address as it was before break_out_memory_refs was called.
15591 In some cases it is useful to look at this to decide what needs to be done.
15592
15593 It is always safe for this macro to do nothing. It exists to recognize
15594 opportunities to optimize the output.
15595
15596 For the 80386, we handle X+REG by loading X into a register R and
15597 using R+REG. R will go in a general reg and indexing will be used.
15598 However, if REG is a broken-out memory address or multiplication,
15599 nothing needs to be done because REG can certainly go in a general reg.
15600
15601 When -fpic is used, special handling is needed for symbolic references.
15602 See comments by legitimize_pic_address in i386.c for details. */
15603
15604 static rtx
15605 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15606 {
15607 bool changed = false;
15608 unsigned log;
15609
15610 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15611 if (log)
15612 return legitimize_tls_address (x, (enum tls_model) log, false);
15613 if (GET_CODE (x) == CONST
15614 && GET_CODE (XEXP (x, 0)) == PLUS
15615 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15616 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15617 {
15618 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15619 (enum tls_model) log, false);
15620 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15621 }
15622
15623 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15624 {
15625 rtx tmp = legitimize_pe_coff_symbol (x, true);
15626 if (tmp)
15627 return tmp;
15628 }
15629
15630 if (flag_pic && SYMBOLIC_CONST (x))
15631 return legitimize_pic_address (x, 0);
15632
15633 #if TARGET_MACHO
15634 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15635 return machopic_indirect_data_reference (x, 0);
15636 #endif
15637
15638 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15639 if (GET_CODE (x) == ASHIFT
15640 && CONST_INT_P (XEXP (x, 1))
15641 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15642 {
15643 changed = true;
15644 log = INTVAL (XEXP (x, 1));
15645 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15646 GEN_INT (1 << log));
15647 }
15648
15649 if (GET_CODE (x) == PLUS)
15650 {
15651 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15652
15653 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15654 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15655 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15656 {
15657 changed = true;
15658 log = INTVAL (XEXP (XEXP (x, 0), 1));
15659 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15660 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15661 GEN_INT (1 << log));
15662 }
15663
15664 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15665 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15666 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15667 {
15668 changed = true;
15669 log = INTVAL (XEXP (XEXP (x, 1), 1));
15670 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15671 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15672 GEN_INT (1 << log));
15673 }
15674
15675 /* Put multiply first if it isn't already. */
15676 if (GET_CODE (XEXP (x, 1)) == MULT)
15677 {
15678 std::swap (XEXP (x, 0), XEXP (x, 1));
15679 changed = true;
15680 }
15681
15682 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15683 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15684 created by virtual register instantiation, register elimination, and
15685 similar optimizations. */
15686 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15687 {
15688 changed = true;
15689 x = gen_rtx_PLUS (Pmode,
15690 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15691 XEXP (XEXP (x, 1), 0)),
15692 XEXP (XEXP (x, 1), 1));
15693 }
15694
15695 /* Canonicalize
15696 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15697 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15698 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15699 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15700 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15701 && CONSTANT_P (XEXP (x, 1)))
15702 {
15703 rtx constant;
15704 rtx other = NULL_RTX;
15705
15706 if (CONST_INT_P (XEXP (x, 1)))
15707 {
15708 constant = XEXP (x, 1);
15709 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15710 }
15711 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15712 {
15713 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15714 other = XEXP (x, 1);
15715 }
15716 else
15717 constant = 0;
15718
15719 if (constant)
15720 {
15721 changed = true;
15722 x = gen_rtx_PLUS (Pmode,
15723 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15724 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15725 plus_constant (Pmode, other,
15726 INTVAL (constant)));
15727 }
15728 }
15729
15730 if (changed && ix86_legitimate_address_p (mode, x, false))
15731 return x;
15732
15733 if (GET_CODE (XEXP (x, 0)) == MULT)
15734 {
15735 changed = true;
15736 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15737 }
15738
15739 if (GET_CODE (XEXP (x, 1)) == MULT)
15740 {
15741 changed = true;
15742 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15743 }
15744
15745 if (changed
15746 && REG_P (XEXP (x, 1))
15747 && REG_P (XEXP (x, 0)))
15748 return x;
15749
15750 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15751 {
15752 changed = true;
15753 x = legitimize_pic_address (x, 0);
15754 }
15755
15756 if (changed && ix86_legitimate_address_p (mode, x, false))
15757 return x;
15758
15759 if (REG_P (XEXP (x, 0)))
15760 {
15761 rtx temp = gen_reg_rtx (Pmode);
15762 rtx val = force_operand (XEXP (x, 1), temp);
15763 if (val != temp)
15764 {
15765 val = convert_to_mode (Pmode, val, 1);
15766 emit_move_insn (temp, val);
15767 }
15768
15769 XEXP (x, 1) = temp;
15770 return x;
15771 }
15772
15773 else if (REG_P (XEXP (x, 1)))
15774 {
15775 rtx temp = gen_reg_rtx (Pmode);
15776 rtx val = force_operand (XEXP (x, 0), temp);
15777 if (val != temp)
15778 {
15779 val = convert_to_mode (Pmode, val, 1);
15780 emit_move_insn (temp, val);
15781 }
15782
15783 XEXP (x, 0) = temp;
15784 return x;
15785 }
15786 }
15787
15788 return x;
15789 }
15790 \f
15791 /* Print an integer constant expression in assembler syntax. Addition
15792 and subtraction are the only arithmetic that may appear in these
15793 expressions. FILE is the stdio stream to write to, X is the rtx, and
15794 CODE is the operand print code from the output string. */
15795
15796 static void
15797 output_pic_addr_const (FILE *file, rtx x, int code)
15798 {
15799 char buf[256];
15800
15801 switch (GET_CODE (x))
15802 {
15803 case PC:
15804 gcc_assert (flag_pic);
15805 putc ('.', file);
15806 break;
15807
15808 case SYMBOL_REF:
15809 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15810 output_addr_const (file, x);
15811 else
15812 {
15813 const char *name = XSTR (x, 0);
15814
15815 /* Mark the decl as referenced so that cgraph will
15816 output the function. */
15817 if (SYMBOL_REF_DECL (x))
15818 mark_decl_referenced (SYMBOL_REF_DECL (x));
15819
15820 #if TARGET_MACHO
15821 if (MACHOPIC_INDIRECT
15822 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15823 name = machopic_indirection_name (x, /*stub_p=*/true);
15824 #endif
15825 assemble_name (file, name);
15826 }
15827 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15828 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15829 fputs ("@PLT", file);
15830 break;
15831
15832 case LABEL_REF:
15833 x = XEXP (x, 0);
15834 /* FALLTHRU */
15835 case CODE_LABEL:
15836 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15837 assemble_name (asm_out_file, buf);
15838 break;
15839
15840 case CONST_INT:
15841 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15842 break;
15843
15844 case CONST:
15845 /* This used to output parentheses around the expression,
15846 but that does not work on the 386 (either ATT or BSD assembler). */
15847 output_pic_addr_const (file, XEXP (x, 0), code);
15848 break;
15849
15850 case CONST_DOUBLE:
15851 /* We can't handle floating point constants;
15852 TARGET_PRINT_OPERAND must handle them. */
15853 output_operand_lossage ("floating constant misused");
15854 break;
15855
15856 case PLUS:
15857 /* Some assemblers need integer constants to appear first. */
15858 if (CONST_INT_P (XEXP (x, 0)))
15859 {
15860 output_pic_addr_const (file, XEXP (x, 0), code);
15861 putc ('+', file);
15862 output_pic_addr_const (file, XEXP (x, 1), code);
15863 }
15864 else
15865 {
15866 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15867 output_pic_addr_const (file, XEXP (x, 1), code);
15868 putc ('+', file);
15869 output_pic_addr_const (file, XEXP (x, 0), code);
15870 }
15871 break;
15872
15873 case MINUS:
15874 if (!TARGET_MACHO)
15875 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15876 output_pic_addr_const (file, XEXP (x, 0), code);
15877 putc ('-', file);
15878 output_pic_addr_const (file, XEXP (x, 1), code);
15879 if (!TARGET_MACHO)
15880 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15881 break;
15882
15883 case UNSPEC:
15884 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15885 {
15886 bool f = i386_asm_output_addr_const_extra (file, x);
15887 gcc_assert (f);
15888 break;
15889 }
15890
15891 gcc_assert (XVECLEN (x, 0) == 1);
15892 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15893 switch (XINT (x, 1))
15894 {
15895 case UNSPEC_GOT:
15896 fputs ("@GOT", file);
15897 break;
15898 case UNSPEC_GOTOFF:
15899 fputs ("@GOTOFF", file);
15900 break;
15901 case UNSPEC_PLTOFF:
15902 fputs ("@PLTOFF", file);
15903 break;
15904 case UNSPEC_PCREL:
15905 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15906 "(%rip)" : "[rip]", file);
15907 break;
15908 case UNSPEC_GOTPCREL:
15909 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15910 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
15911 break;
15912 case UNSPEC_GOTTPOFF:
15913 /* FIXME: This might be @TPOFF in Sun ld too. */
15914 fputs ("@gottpoff", file);
15915 break;
15916 case UNSPEC_TPOFF:
15917 fputs ("@tpoff", file);
15918 break;
15919 case UNSPEC_NTPOFF:
15920 if (TARGET_64BIT)
15921 fputs ("@tpoff", file);
15922 else
15923 fputs ("@ntpoff", file);
15924 break;
15925 case UNSPEC_DTPOFF:
15926 fputs ("@dtpoff", file);
15927 break;
15928 case UNSPEC_GOTNTPOFF:
15929 if (TARGET_64BIT)
15930 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15931 "@gottpoff(%rip)": "@gottpoff[rip]", file);
15932 else
15933 fputs ("@gotntpoff", file);
15934 break;
15935 case UNSPEC_INDNTPOFF:
15936 fputs ("@indntpoff", file);
15937 break;
15938 #if TARGET_MACHO
15939 case UNSPEC_MACHOPIC_OFFSET:
15940 putc ('-', file);
15941 machopic_output_function_base_name (file);
15942 break;
15943 #endif
15944 default:
15945 output_operand_lossage ("invalid UNSPEC as operand");
15946 break;
15947 }
15948 break;
15949
15950 default:
15951 output_operand_lossage ("invalid expression as operand");
15952 }
15953 }
15954
15955 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
15956 We need to emit DTP-relative relocations. */
15957
15958 static void ATTRIBUTE_UNUSED
15959 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
15960 {
15961 fputs (ASM_LONG, file);
15962 output_addr_const (file, x);
15963 fputs ("@dtpoff", file);
15964 switch (size)
15965 {
15966 case 4:
15967 break;
15968 case 8:
15969 fputs (", 0", file);
15970 break;
15971 default:
15972 gcc_unreachable ();
15973 }
15974 }
15975
15976 /* Return true if X is a representation of the PIC register. This copes
15977 with calls from ix86_find_base_term, where the register might have
15978 been replaced by a cselib value. */
15979
15980 static bool
15981 ix86_pic_register_p (rtx x)
15982 {
15983 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
15984 return (pic_offset_table_rtx
15985 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
15986 else if (!REG_P (x))
15987 return false;
15988 else if (pic_offset_table_rtx)
15989 {
15990 if (REGNO (x) == REGNO (pic_offset_table_rtx))
15991 return true;
15992 if (HARD_REGISTER_P (x)
15993 && !HARD_REGISTER_P (pic_offset_table_rtx)
15994 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
15995 return true;
15996 return false;
15997 }
15998 else
15999 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16000 }
16001
16002 /* Helper function for ix86_delegitimize_address.
16003 Attempt to delegitimize TLS local-exec accesses. */
16004
16005 static rtx
16006 ix86_delegitimize_tls_address (rtx orig_x)
16007 {
16008 rtx x = orig_x, unspec;
16009 struct ix86_address addr;
16010
16011 if (!TARGET_TLS_DIRECT_SEG_REFS)
16012 return orig_x;
16013 if (MEM_P (x))
16014 x = XEXP (x, 0);
16015 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16016 return orig_x;
16017 if (ix86_decompose_address (x, &addr) == 0
16018 || addr.seg != DEFAULT_TLS_SEG_REG
16019 || addr.disp == NULL_RTX
16020 || GET_CODE (addr.disp) != CONST)
16021 return orig_x;
16022 unspec = XEXP (addr.disp, 0);
16023 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16024 unspec = XEXP (unspec, 0);
16025 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16026 return orig_x;
16027 x = XVECEXP (unspec, 0, 0);
16028 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16029 if (unspec != XEXP (addr.disp, 0))
16030 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16031 if (addr.index)
16032 {
16033 rtx idx = addr.index;
16034 if (addr.scale != 1)
16035 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16036 x = gen_rtx_PLUS (Pmode, idx, x);
16037 }
16038 if (addr.base)
16039 x = gen_rtx_PLUS (Pmode, addr.base, x);
16040 if (MEM_P (orig_x))
16041 x = replace_equiv_address_nv (orig_x, x);
16042 return x;
16043 }
16044
16045 /* In the name of slightly smaller debug output, and to cater to
16046 general assembler lossage, recognize PIC+GOTOFF and turn it back
16047 into a direct symbol reference.
16048
16049 On Darwin, this is necessary to avoid a crash, because Darwin
16050 has a different PIC label for each routine but the DWARF debugging
16051 information is not associated with any particular routine, so it's
16052 necessary to remove references to the PIC label from RTL stored by
16053 the DWARF output code. */
16054
16055 static rtx
16056 ix86_delegitimize_address (rtx x)
16057 {
16058 rtx orig_x = delegitimize_mem_from_attrs (x);
16059 /* addend is NULL or some rtx if x is something+GOTOFF where
16060 something doesn't include the PIC register. */
16061 rtx addend = NULL_RTX;
16062 /* reg_addend is NULL or a multiple of some register. */
16063 rtx reg_addend = NULL_RTX;
16064 /* const_addend is NULL or a const_int. */
16065 rtx const_addend = NULL_RTX;
16066 /* This is the result, or NULL. */
16067 rtx result = NULL_RTX;
16068
16069 x = orig_x;
16070
16071 if (MEM_P (x))
16072 x = XEXP (x, 0);
16073
16074 if (TARGET_64BIT)
16075 {
16076 if (GET_CODE (x) == CONST
16077 && GET_CODE (XEXP (x, 0)) == PLUS
16078 && GET_MODE (XEXP (x, 0)) == Pmode
16079 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16080 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16081 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16082 {
16083 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16084 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16085 if (MEM_P (orig_x))
16086 x = replace_equiv_address_nv (orig_x, x);
16087 return x;
16088 }
16089
16090 if (GET_CODE (x) == CONST
16091 && GET_CODE (XEXP (x, 0)) == UNSPEC
16092 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16093 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16094 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16095 {
16096 x = XVECEXP (XEXP (x, 0), 0, 0);
16097 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16098 {
16099 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16100 GET_MODE (x), 0);
16101 if (x == NULL_RTX)
16102 return orig_x;
16103 }
16104 return x;
16105 }
16106
16107 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16108 return ix86_delegitimize_tls_address (orig_x);
16109
16110 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16111 and -mcmodel=medium -fpic. */
16112 }
16113
16114 if (GET_CODE (x) != PLUS
16115 || GET_CODE (XEXP (x, 1)) != CONST)
16116 return ix86_delegitimize_tls_address (orig_x);
16117
16118 if (ix86_pic_register_p (XEXP (x, 0)))
16119 /* %ebx + GOT/GOTOFF */
16120 ;
16121 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16122 {
16123 /* %ebx + %reg * scale + GOT/GOTOFF */
16124 reg_addend = XEXP (x, 0);
16125 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16126 reg_addend = XEXP (reg_addend, 1);
16127 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16128 reg_addend = XEXP (reg_addend, 0);
16129 else
16130 {
16131 reg_addend = NULL_RTX;
16132 addend = XEXP (x, 0);
16133 }
16134 }
16135 else
16136 addend = XEXP (x, 0);
16137
16138 x = XEXP (XEXP (x, 1), 0);
16139 if (GET_CODE (x) == PLUS
16140 && CONST_INT_P (XEXP (x, 1)))
16141 {
16142 const_addend = XEXP (x, 1);
16143 x = XEXP (x, 0);
16144 }
16145
16146 if (GET_CODE (x) == UNSPEC
16147 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16148 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16149 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16150 && !MEM_P (orig_x) && !addend)))
16151 result = XVECEXP (x, 0, 0);
16152
16153 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16154 && !MEM_P (orig_x))
16155 result = XVECEXP (x, 0, 0);
16156
16157 if (! result)
16158 return ix86_delegitimize_tls_address (orig_x);
16159
16160 if (const_addend)
16161 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16162 if (reg_addend)
16163 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16164 if (addend)
16165 {
16166 /* If the rest of original X doesn't involve the PIC register, add
16167 addend and subtract pic_offset_table_rtx. This can happen e.g.
16168 for code like:
16169 leal (%ebx, %ecx, 4), %ecx
16170 ...
16171 movl foo@GOTOFF(%ecx), %edx
16172 in which case we return (%ecx - %ebx) + foo
16173 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16174 and reload has completed. */
16175 if (pic_offset_table_rtx
16176 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16177 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16178 pic_offset_table_rtx),
16179 result);
16180 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16181 {
16182 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16183 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16184 result = gen_rtx_PLUS (Pmode, tmp, result);
16185 }
16186 else
16187 return orig_x;
16188 }
16189 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16190 {
16191 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16192 if (result == NULL_RTX)
16193 return orig_x;
16194 }
16195 return result;
16196 }
16197
16198 /* If X is a machine specific address (i.e. a symbol or label being
16199 referenced as a displacement from the GOT implemented using an
16200 UNSPEC), then return the base term. Otherwise return X. */
16201
16202 rtx
16203 ix86_find_base_term (rtx x)
16204 {
16205 rtx term;
16206
16207 if (TARGET_64BIT)
16208 {
16209 if (GET_CODE (x) != CONST)
16210 return x;
16211 term = XEXP (x, 0);
16212 if (GET_CODE (term) == PLUS
16213 && CONST_INT_P (XEXP (term, 1)))
16214 term = XEXP (term, 0);
16215 if (GET_CODE (term) != UNSPEC
16216 || (XINT (term, 1) != UNSPEC_GOTPCREL
16217 && XINT (term, 1) != UNSPEC_PCREL))
16218 return x;
16219
16220 return XVECEXP (term, 0, 0);
16221 }
16222
16223 return ix86_delegitimize_address (x);
16224 }
16225 \f
16226 static void
16227 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16228 bool fp, FILE *file)
16229 {
16230 const char *suffix;
16231
16232 if (mode == CCFPmode || mode == CCFPUmode)
16233 {
16234 code = ix86_fp_compare_code_to_integer (code);
16235 mode = CCmode;
16236 }
16237 if (reverse)
16238 code = reverse_condition (code);
16239
16240 switch (code)
16241 {
16242 case EQ:
16243 switch (mode)
16244 {
16245 case CCAmode:
16246 suffix = "a";
16247 break;
16248 case CCCmode:
16249 suffix = "c";
16250 break;
16251 case CCOmode:
16252 suffix = "o";
16253 break;
16254 case CCPmode:
16255 suffix = "p";
16256 break;
16257 case CCSmode:
16258 suffix = "s";
16259 break;
16260 default:
16261 suffix = "e";
16262 break;
16263 }
16264 break;
16265 case NE:
16266 switch (mode)
16267 {
16268 case CCAmode:
16269 suffix = "na";
16270 break;
16271 case CCCmode:
16272 suffix = "nc";
16273 break;
16274 case CCOmode:
16275 suffix = "no";
16276 break;
16277 case CCPmode:
16278 suffix = "np";
16279 break;
16280 case CCSmode:
16281 suffix = "ns";
16282 break;
16283 default:
16284 suffix = "ne";
16285 break;
16286 }
16287 break;
16288 case GT:
16289 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16290 suffix = "g";
16291 break;
16292 case GTU:
16293 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16294 Those same assemblers have the same but opposite lossage on cmov. */
16295 if (mode == CCmode)
16296 suffix = fp ? "nbe" : "a";
16297 else
16298 gcc_unreachable ();
16299 break;
16300 case LT:
16301 switch (mode)
16302 {
16303 case CCNOmode:
16304 case CCGOCmode:
16305 suffix = "s";
16306 break;
16307
16308 case CCmode:
16309 case CCGCmode:
16310 suffix = "l";
16311 break;
16312
16313 default:
16314 gcc_unreachable ();
16315 }
16316 break;
16317 case LTU:
16318 if (mode == CCmode)
16319 suffix = "b";
16320 else if (mode == CCCmode)
16321 suffix = fp ? "b" : "c";
16322 else
16323 gcc_unreachable ();
16324 break;
16325 case GE:
16326 switch (mode)
16327 {
16328 case CCNOmode:
16329 case CCGOCmode:
16330 suffix = "ns";
16331 break;
16332
16333 case CCmode:
16334 case CCGCmode:
16335 suffix = "ge";
16336 break;
16337
16338 default:
16339 gcc_unreachable ();
16340 }
16341 break;
16342 case GEU:
16343 if (mode == CCmode)
16344 suffix = "nb";
16345 else if (mode == CCCmode)
16346 suffix = fp ? "nb" : "nc";
16347 else
16348 gcc_unreachable ();
16349 break;
16350 case LE:
16351 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16352 suffix = "le";
16353 break;
16354 case LEU:
16355 if (mode == CCmode)
16356 suffix = "be";
16357 else
16358 gcc_unreachable ();
16359 break;
16360 case UNORDERED:
16361 suffix = fp ? "u" : "p";
16362 break;
16363 case ORDERED:
16364 suffix = fp ? "nu" : "np";
16365 break;
16366 default:
16367 gcc_unreachable ();
16368 }
16369 fputs (suffix, file);
16370 }
16371
16372 /* Print the name of register X to FILE based on its machine mode and number.
16373 If CODE is 'w', pretend the mode is HImode.
16374 If CODE is 'b', pretend the mode is QImode.
16375 If CODE is 'k', pretend the mode is SImode.
16376 If CODE is 'q', pretend the mode is DImode.
16377 If CODE is 'x', pretend the mode is V4SFmode.
16378 If CODE is 't', pretend the mode is V8SFmode.
16379 If CODE is 'g', pretend the mode is V16SFmode.
16380 If CODE is 'h', pretend the reg is the 'high' byte register.
16381 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16382 If CODE is 'd', duplicate the operand for AVX instruction.
16383 */
16384
16385 void
16386 print_reg (rtx x, int code, FILE *file)
16387 {
16388 const char *reg;
16389 int msize;
16390 unsigned int regno;
16391 bool duplicated;
16392
16393 if (ASSEMBLER_DIALECT == ASM_ATT)
16394 putc ('%', file);
16395
16396 if (x == pc_rtx)
16397 {
16398 gcc_assert (TARGET_64BIT);
16399 fputs ("rip", file);
16400 return;
16401 }
16402
16403 if (code == 'y' && STACK_TOP_P (x))
16404 {
16405 fputs ("st(0)", file);
16406 return;
16407 }
16408
16409 if (code == 'w')
16410 msize = 2;
16411 else if (code == 'b')
16412 msize = 1;
16413 else if (code == 'k')
16414 msize = 4;
16415 else if (code == 'q')
16416 msize = 8;
16417 else if (code == 'h')
16418 msize = 0;
16419 else if (code == 'x')
16420 msize = 16;
16421 else if (code == 't')
16422 msize = 32;
16423 else if (code == 'g')
16424 msize = 64;
16425 else
16426 msize = GET_MODE_SIZE (GET_MODE (x));
16427
16428 regno = true_regnum (x);
16429
16430 gcc_assert (regno != ARG_POINTER_REGNUM
16431 && regno != FRAME_POINTER_REGNUM
16432 && regno != FLAGS_REG
16433 && regno != FPSR_REG
16434 && regno != FPCR_REG);
16435
16436 duplicated = code == 'd' && TARGET_AVX;
16437
16438 switch (msize)
16439 {
16440 case 8:
16441 case 4:
16442 if (LEGACY_INT_REGNO_P (regno))
16443 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16444 case 16:
16445 case 12:
16446 case 2:
16447 normal:
16448 reg = hi_reg_name[regno];
16449 break;
16450 case 1:
16451 if (regno >= ARRAY_SIZE (qi_reg_name))
16452 goto normal;
16453 reg = qi_reg_name[regno];
16454 break;
16455 case 0:
16456 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16457 goto normal;
16458 reg = qi_high_reg_name[regno];
16459 break;
16460 case 32:
16461 case 64:
16462 if (SSE_REGNO_P (regno))
16463 {
16464 gcc_assert (!duplicated);
16465 putc (msize == 32 ? 'y' : 'z', file);
16466 reg = hi_reg_name[regno] + 1;
16467 break;
16468 }
16469 goto normal;
16470 default:
16471 gcc_unreachable ();
16472 }
16473
16474 fputs (reg, file);
16475
16476 /* Irritatingly, AMD extended registers use
16477 different naming convention: "r%d[bwd]" */
16478 if (REX_INT_REGNO_P (regno))
16479 {
16480 gcc_assert (TARGET_64BIT);
16481 switch (msize)
16482 {
16483 case 0:
16484 error ("extended registers have no high halves");
16485 break;
16486 case 1:
16487 putc ('b', file);
16488 break;
16489 case 2:
16490 putc ('w', file);
16491 break;
16492 case 4:
16493 putc ('d', file);
16494 break;
16495 case 8:
16496 /* no suffix */
16497 break;
16498 default:
16499 error ("unsupported operand size for extended register");
16500 break;
16501 }
16502 return;
16503 }
16504
16505 if (duplicated)
16506 {
16507 if (ASSEMBLER_DIALECT == ASM_ATT)
16508 fprintf (file, ", %%%s", reg);
16509 else
16510 fprintf (file, ", %s", reg);
16511 }
16512 }
16513
16514 /* Meaning of CODE:
16515 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16516 C -- print opcode suffix for set/cmov insn.
16517 c -- like C, but print reversed condition
16518 F,f -- likewise, but for floating-point.
16519 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16520 otherwise nothing
16521 R -- print embeded rounding and sae.
16522 r -- print only sae.
16523 z -- print the opcode suffix for the size of the current operand.
16524 Z -- likewise, with special suffixes for x87 instructions.
16525 * -- print a star (in certain assembler syntax)
16526 A -- print an absolute memory reference.
16527 E -- print address with DImode register names if TARGET_64BIT.
16528 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16529 s -- print a shift double count, followed by the assemblers argument
16530 delimiter.
16531 b -- print the QImode name of the register for the indicated operand.
16532 %b0 would print %al if operands[0] is reg 0.
16533 w -- likewise, print the HImode name of the register.
16534 k -- likewise, print the SImode name of the register.
16535 q -- likewise, print the DImode name of the register.
16536 x -- likewise, print the V4SFmode name of the register.
16537 t -- likewise, print the V8SFmode name of the register.
16538 g -- likewise, print the V16SFmode name of the register.
16539 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16540 y -- print "st(0)" instead of "st" as a register.
16541 d -- print duplicated register operand for AVX instruction.
16542 D -- print condition for SSE cmp instruction.
16543 P -- if PIC, print an @PLT suffix.
16544 p -- print raw symbol name.
16545 X -- don't print any sort of PIC '@' suffix for a symbol.
16546 & -- print some in-use local-dynamic symbol name.
16547 H -- print a memory address offset by 8; used for sse high-parts
16548 Y -- print condition for XOP pcom* instruction.
16549 + -- print a branch hint as 'cs' or 'ds' prefix
16550 ; -- print a semicolon (after prefixes due to bug in older gas).
16551 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16552 @ -- print a segment register of thread base pointer load
16553 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16554 ! -- print MPX prefix for jxx/call/ret instructions if required.
16555 */
16556
16557 void
16558 ix86_print_operand (FILE *file, rtx x, int code)
16559 {
16560 if (code)
16561 {
16562 switch (code)
16563 {
16564 case 'A':
16565 switch (ASSEMBLER_DIALECT)
16566 {
16567 case ASM_ATT:
16568 putc ('*', file);
16569 break;
16570
16571 case ASM_INTEL:
16572 /* Intel syntax. For absolute addresses, registers should not
16573 be surrounded by braces. */
16574 if (!REG_P (x))
16575 {
16576 putc ('[', file);
16577 ix86_print_operand (file, x, 0);
16578 putc (']', file);
16579 return;
16580 }
16581 break;
16582
16583 default:
16584 gcc_unreachable ();
16585 }
16586
16587 ix86_print_operand (file, x, 0);
16588 return;
16589
16590 case 'E':
16591 /* Wrap address in an UNSPEC to declare special handling. */
16592 if (TARGET_64BIT)
16593 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16594
16595 output_address (x);
16596 return;
16597
16598 case 'L':
16599 if (ASSEMBLER_DIALECT == ASM_ATT)
16600 putc ('l', file);
16601 return;
16602
16603 case 'W':
16604 if (ASSEMBLER_DIALECT == ASM_ATT)
16605 putc ('w', file);
16606 return;
16607
16608 case 'B':
16609 if (ASSEMBLER_DIALECT == ASM_ATT)
16610 putc ('b', file);
16611 return;
16612
16613 case 'Q':
16614 if (ASSEMBLER_DIALECT == ASM_ATT)
16615 putc ('l', file);
16616 return;
16617
16618 case 'S':
16619 if (ASSEMBLER_DIALECT == ASM_ATT)
16620 putc ('s', file);
16621 return;
16622
16623 case 'T':
16624 if (ASSEMBLER_DIALECT == ASM_ATT)
16625 putc ('t', file);
16626 return;
16627
16628 case 'O':
16629 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16630 if (ASSEMBLER_DIALECT != ASM_ATT)
16631 return;
16632
16633 switch (GET_MODE_SIZE (GET_MODE (x)))
16634 {
16635 case 2:
16636 putc ('w', file);
16637 break;
16638
16639 case 4:
16640 putc ('l', file);
16641 break;
16642
16643 case 8:
16644 putc ('q', file);
16645 break;
16646
16647 default:
16648 output_operand_lossage
16649 ("invalid operand size for operand code 'O'");
16650 return;
16651 }
16652
16653 putc ('.', file);
16654 #endif
16655 return;
16656
16657 case 'z':
16658 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16659 {
16660 /* Opcodes don't get size suffixes if using Intel opcodes. */
16661 if (ASSEMBLER_DIALECT == ASM_INTEL)
16662 return;
16663
16664 switch (GET_MODE_SIZE (GET_MODE (x)))
16665 {
16666 case 1:
16667 putc ('b', file);
16668 return;
16669
16670 case 2:
16671 putc ('w', file);
16672 return;
16673
16674 case 4:
16675 putc ('l', file);
16676 return;
16677
16678 case 8:
16679 putc ('q', file);
16680 return;
16681
16682 default:
16683 output_operand_lossage
16684 ("invalid operand size for operand code 'z'");
16685 return;
16686 }
16687 }
16688
16689 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16690 warning
16691 (0, "non-integer operand used with operand code 'z'");
16692 /* FALLTHRU */
16693
16694 case 'Z':
16695 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16696 if (ASSEMBLER_DIALECT == ASM_INTEL)
16697 return;
16698
16699 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16700 {
16701 switch (GET_MODE_SIZE (GET_MODE (x)))
16702 {
16703 case 2:
16704 #ifdef HAVE_AS_IX86_FILDS
16705 putc ('s', file);
16706 #endif
16707 return;
16708
16709 case 4:
16710 putc ('l', file);
16711 return;
16712
16713 case 8:
16714 #ifdef HAVE_AS_IX86_FILDQ
16715 putc ('q', file);
16716 #else
16717 fputs ("ll", file);
16718 #endif
16719 return;
16720
16721 default:
16722 break;
16723 }
16724 }
16725 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16726 {
16727 /* 387 opcodes don't get size suffixes
16728 if the operands are registers. */
16729 if (STACK_REG_P (x))
16730 return;
16731
16732 switch (GET_MODE_SIZE (GET_MODE (x)))
16733 {
16734 case 4:
16735 putc ('s', file);
16736 return;
16737
16738 case 8:
16739 putc ('l', file);
16740 return;
16741
16742 case 12:
16743 case 16:
16744 putc ('t', file);
16745 return;
16746
16747 default:
16748 break;
16749 }
16750 }
16751 else
16752 {
16753 output_operand_lossage
16754 ("invalid operand type used with operand code 'Z'");
16755 return;
16756 }
16757
16758 output_operand_lossage
16759 ("invalid operand size for operand code 'Z'");
16760 return;
16761
16762 case 'd':
16763 case 'b':
16764 case 'w':
16765 case 'k':
16766 case 'q':
16767 case 'h':
16768 case 't':
16769 case 'g':
16770 case 'y':
16771 case 'x':
16772 case 'X':
16773 case 'P':
16774 case 'p':
16775 break;
16776
16777 case 's':
16778 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16779 {
16780 ix86_print_operand (file, x, 0);
16781 fputs (", ", file);
16782 }
16783 return;
16784
16785 case 'Y':
16786 switch (GET_CODE (x))
16787 {
16788 case NE:
16789 fputs ("neq", file);
16790 break;
16791 case EQ:
16792 fputs ("eq", file);
16793 break;
16794 case GE:
16795 case GEU:
16796 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16797 break;
16798 case GT:
16799 case GTU:
16800 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16801 break;
16802 case LE:
16803 case LEU:
16804 fputs ("le", file);
16805 break;
16806 case LT:
16807 case LTU:
16808 fputs ("lt", file);
16809 break;
16810 case UNORDERED:
16811 fputs ("unord", file);
16812 break;
16813 case ORDERED:
16814 fputs ("ord", file);
16815 break;
16816 case UNEQ:
16817 fputs ("ueq", file);
16818 break;
16819 case UNGE:
16820 fputs ("nlt", file);
16821 break;
16822 case UNGT:
16823 fputs ("nle", file);
16824 break;
16825 case UNLE:
16826 fputs ("ule", file);
16827 break;
16828 case UNLT:
16829 fputs ("ult", file);
16830 break;
16831 case LTGT:
16832 fputs ("une", file);
16833 break;
16834 default:
16835 output_operand_lossage ("operand is not a condition code, "
16836 "invalid operand code 'Y'");
16837 return;
16838 }
16839 return;
16840
16841 case 'D':
16842 /* Little bit of braindamage here. The SSE compare instructions
16843 does use completely different names for the comparisons that the
16844 fp conditional moves. */
16845 switch (GET_CODE (x))
16846 {
16847 case UNEQ:
16848 if (TARGET_AVX)
16849 {
16850 fputs ("eq_us", file);
16851 break;
16852 }
16853 case EQ:
16854 fputs ("eq", file);
16855 break;
16856 case UNLT:
16857 if (TARGET_AVX)
16858 {
16859 fputs ("nge", file);
16860 break;
16861 }
16862 case LT:
16863 fputs ("lt", file);
16864 break;
16865 case UNLE:
16866 if (TARGET_AVX)
16867 {
16868 fputs ("ngt", file);
16869 break;
16870 }
16871 case LE:
16872 fputs ("le", file);
16873 break;
16874 case UNORDERED:
16875 fputs ("unord", file);
16876 break;
16877 case LTGT:
16878 if (TARGET_AVX)
16879 {
16880 fputs ("neq_oq", file);
16881 break;
16882 }
16883 case NE:
16884 fputs ("neq", file);
16885 break;
16886 case GE:
16887 if (TARGET_AVX)
16888 {
16889 fputs ("ge", file);
16890 break;
16891 }
16892 case UNGE:
16893 fputs ("nlt", file);
16894 break;
16895 case GT:
16896 if (TARGET_AVX)
16897 {
16898 fputs ("gt", file);
16899 break;
16900 }
16901 case UNGT:
16902 fputs ("nle", file);
16903 break;
16904 case ORDERED:
16905 fputs ("ord", file);
16906 break;
16907 default:
16908 output_operand_lossage ("operand is not a condition code, "
16909 "invalid operand code 'D'");
16910 return;
16911 }
16912 return;
16913
16914 case 'F':
16915 case 'f':
16916 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16917 if (ASSEMBLER_DIALECT == ASM_ATT)
16918 putc ('.', file);
16919 #endif
16920
16921 case 'C':
16922 case 'c':
16923 if (!COMPARISON_P (x))
16924 {
16925 output_operand_lossage ("operand is not a condition code, "
16926 "invalid operand code '%c'", code);
16927 return;
16928 }
16929 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
16930 code == 'c' || code == 'f',
16931 code == 'F' || code == 'f',
16932 file);
16933 return;
16934
16935 case 'H':
16936 if (!offsettable_memref_p (x))
16937 {
16938 output_operand_lossage ("operand is not an offsettable memory "
16939 "reference, invalid operand code 'H'");
16940 return;
16941 }
16942 /* It doesn't actually matter what mode we use here, as we're
16943 only going to use this for printing. */
16944 x = adjust_address_nv (x, DImode, 8);
16945 /* Output 'qword ptr' for intel assembler dialect. */
16946 if (ASSEMBLER_DIALECT == ASM_INTEL)
16947 code = 'q';
16948 break;
16949
16950 case 'K':
16951 gcc_assert (CONST_INT_P (x));
16952
16953 if (INTVAL (x) & IX86_HLE_ACQUIRE)
16954 #ifdef HAVE_AS_IX86_HLE
16955 fputs ("xacquire ", file);
16956 #else
16957 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
16958 #endif
16959 else if (INTVAL (x) & IX86_HLE_RELEASE)
16960 #ifdef HAVE_AS_IX86_HLE
16961 fputs ("xrelease ", file);
16962 #else
16963 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
16964 #endif
16965 /* We do not want to print value of the operand. */
16966 return;
16967
16968 case 'N':
16969 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
16970 fputs ("{z}", file);
16971 return;
16972
16973 case 'r':
16974 gcc_assert (CONST_INT_P (x));
16975 gcc_assert (INTVAL (x) == ROUND_SAE);
16976
16977 if (ASSEMBLER_DIALECT == ASM_INTEL)
16978 fputs (", ", file);
16979
16980 fputs ("{sae}", file);
16981
16982 if (ASSEMBLER_DIALECT == ASM_ATT)
16983 fputs (", ", file);
16984
16985 return;
16986
16987 case 'R':
16988 gcc_assert (CONST_INT_P (x));
16989
16990 if (ASSEMBLER_DIALECT == ASM_INTEL)
16991 fputs (", ", file);
16992
16993 switch (INTVAL (x))
16994 {
16995 case ROUND_NEAREST_INT | ROUND_SAE:
16996 fputs ("{rn-sae}", file);
16997 break;
16998 case ROUND_NEG_INF | ROUND_SAE:
16999 fputs ("{rd-sae}", file);
17000 break;
17001 case ROUND_POS_INF | ROUND_SAE:
17002 fputs ("{ru-sae}", file);
17003 break;
17004 case ROUND_ZERO | ROUND_SAE:
17005 fputs ("{rz-sae}", file);
17006 break;
17007 default:
17008 gcc_unreachable ();
17009 }
17010
17011 if (ASSEMBLER_DIALECT == ASM_ATT)
17012 fputs (", ", file);
17013
17014 return;
17015
17016 case '*':
17017 if (ASSEMBLER_DIALECT == ASM_ATT)
17018 putc ('*', file);
17019 return;
17020
17021 case '&':
17022 {
17023 const char *name = get_some_local_dynamic_name ();
17024 if (name == NULL)
17025 output_operand_lossage ("'%%&' used without any "
17026 "local dynamic TLS references");
17027 else
17028 assemble_name (file, name);
17029 return;
17030 }
17031
17032 case '+':
17033 {
17034 rtx x;
17035
17036 if (!optimize
17037 || optimize_function_for_size_p (cfun)
17038 || !TARGET_BRANCH_PREDICTION_HINTS)
17039 return;
17040
17041 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17042 if (x)
17043 {
17044 int pred_val = XINT (x, 0);
17045
17046 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17047 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17048 {
17049 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17050 bool cputaken
17051 = final_forward_branch_p (current_output_insn) == 0;
17052
17053 /* Emit hints only in the case default branch prediction
17054 heuristics would fail. */
17055 if (taken != cputaken)
17056 {
17057 /* We use 3e (DS) prefix for taken branches and
17058 2e (CS) prefix for not taken branches. */
17059 if (taken)
17060 fputs ("ds ; ", file);
17061 else
17062 fputs ("cs ; ", file);
17063 }
17064 }
17065 }
17066 return;
17067 }
17068
17069 case ';':
17070 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17071 putc (';', file);
17072 #endif
17073 return;
17074
17075 case '@':
17076 if (ASSEMBLER_DIALECT == ASM_ATT)
17077 putc ('%', file);
17078
17079 /* The kernel uses a different segment register for performance
17080 reasons; a system call would not have to trash the userspace
17081 segment register, which would be expensive. */
17082 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17083 fputs ("fs", file);
17084 else
17085 fputs ("gs", file);
17086 return;
17087
17088 case '~':
17089 putc (TARGET_AVX2 ? 'i' : 'f', file);
17090 return;
17091
17092 case '^':
17093 if (TARGET_64BIT && Pmode != word_mode)
17094 fputs ("addr32 ", file);
17095 return;
17096
17097 case '!':
17098 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17099 fputs ("bnd ", file);
17100 return;
17101
17102 default:
17103 output_operand_lossage ("invalid operand code '%c'", code);
17104 }
17105 }
17106
17107 if (REG_P (x))
17108 print_reg (x, code, file);
17109
17110 else if (MEM_P (x))
17111 {
17112 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
17113 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
17114 && GET_MODE (x) != BLKmode)
17115 {
17116 const char * size;
17117 switch (GET_MODE_SIZE (GET_MODE (x)))
17118 {
17119 case 1: size = "BYTE"; break;
17120 case 2: size = "WORD"; break;
17121 case 4: size = "DWORD"; break;
17122 case 8: size = "QWORD"; break;
17123 case 12: size = "TBYTE"; break;
17124 case 16:
17125 if (GET_MODE (x) == XFmode)
17126 size = "TBYTE";
17127 else
17128 size = "XMMWORD";
17129 break;
17130 case 32: size = "YMMWORD"; break;
17131 case 64: size = "ZMMWORD"; break;
17132 default:
17133 gcc_unreachable ();
17134 }
17135
17136 /* Check for explicit size override (codes 'b', 'w', 'k',
17137 'q' and 'x') */
17138 if (code == 'b')
17139 size = "BYTE";
17140 else if (code == 'w')
17141 size = "WORD";
17142 else if (code == 'k')
17143 size = "DWORD";
17144 else if (code == 'q')
17145 size = "QWORD";
17146 else if (code == 'x')
17147 size = "XMMWORD";
17148
17149 fputs (size, file);
17150 fputs (" PTR ", file);
17151 }
17152
17153 x = XEXP (x, 0);
17154 /* Avoid (%rip) for call operands. */
17155 if (CONSTANT_ADDRESS_P (x) && code == 'P'
17156 && !CONST_INT_P (x))
17157 output_addr_const (file, x);
17158 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
17159 output_operand_lossage ("invalid constraints for operand");
17160 else
17161 output_address (x);
17162 }
17163
17164 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17165 {
17166 long l;
17167
17168 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17169
17170 if (ASSEMBLER_DIALECT == ASM_ATT)
17171 putc ('$', file);
17172 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17173 if (code == 'q')
17174 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17175 (unsigned long long) (int) l);
17176 else
17177 fprintf (file, "0x%08x", (unsigned int) l);
17178 }
17179
17180 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17181 {
17182 long l[2];
17183
17184 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17185
17186 if (ASSEMBLER_DIALECT == ASM_ATT)
17187 putc ('$', file);
17188 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17189 }
17190
17191 /* These float cases don't actually occur as immediate operands. */
17192 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17193 {
17194 char dstr[30];
17195
17196 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17197 fputs (dstr, file);
17198 }
17199
17200 else
17201 {
17202 /* We have patterns that allow zero sets of memory, for instance.
17203 In 64-bit mode, we should probably support all 8-byte vectors,
17204 since we can in fact encode that into an immediate. */
17205 if (GET_CODE (x) == CONST_VECTOR)
17206 {
17207 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17208 x = const0_rtx;
17209 }
17210
17211 if (code != 'P' && code != 'p')
17212 {
17213 if (CONST_INT_P (x))
17214 {
17215 if (ASSEMBLER_DIALECT == ASM_ATT)
17216 putc ('$', file);
17217 }
17218 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17219 || GET_CODE (x) == LABEL_REF)
17220 {
17221 if (ASSEMBLER_DIALECT == ASM_ATT)
17222 putc ('$', file);
17223 else
17224 fputs ("OFFSET FLAT:", file);
17225 }
17226 }
17227 if (CONST_INT_P (x))
17228 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17229 else if (flag_pic || MACHOPIC_INDIRECT)
17230 output_pic_addr_const (file, x, code);
17231 else
17232 output_addr_const (file, x);
17233 }
17234 }
17235
17236 static bool
17237 ix86_print_operand_punct_valid_p (unsigned char code)
17238 {
17239 return (code == '@' || code == '*' || code == '+' || code == '&'
17240 || code == ';' || code == '~' || code == '^' || code == '!');
17241 }
17242 \f
17243 /* Print a memory operand whose address is ADDR. */
17244
17245 static void
17246 ix86_print_operand_address (FILE *file, rtx addr)
17247 {
17248 struct ix86_address parts;
17249 rtx base, index, disp;
17250 int scale;
17251 int ok;
17252 bool vsib = false;
17253 int code = 0;
17254
17255 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17256 {
17257 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17258 gcc_assert (parts.index == NULL_RTX);
17259 parts.index = XVECEXP (addr, 0, 1);
17260 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17261 addr = XVECEXP (addr, 0, 0);
17262 vsib = true;
17263 }
17264 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17265 {
17266 gcc_assert (TARGET_64BIT);
17267 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17268 code = 'q';
17269 }
17270 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17271 {
17272 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17273 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17274 if (parts.base != NULL_RTX)
17275 {
17276 parts.index = parts.base;
17277 parts.scale = 1;
17278 }
17279 parts.base = XVECEXP (addr, 0, 0);
17280 addr = XVECEXP (addr, 0, 0);
17281 }
17282 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17283 {
17284 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17285 gcc_assert (parts.index == NULL_RTX);
17286 parts.index = XVECEXP (addr, 0, 1);
17287 addr = XVECEXP (addr, 0, 0);
17288 }
17289 else
17290 ok = ix86_decompose_address (addr, &parts);
17291
17292 gcc_assert (ok);
17293
17294 base = parts.base;
17295 index = parts.index;
17296 disp = parts.disp;
17297 scale = parts.scale;
17298
17299 switch (parts.seg)
17300 {
17301 case SEG_DEFAULT:
17302 break;
17303 case SEG_FS:
17304 case SEG_GS:
17305 if (ASSEMBLER_DIALECT == ASM_ATT)
17306 putc ('%', file);
17307 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
17308 break;
17309 default:
17310 gcc_unreachable ();
17311 }
17312
17313 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17314 if (TARGET_64BIT && !base && !index)
17315 {
17316 rtx symbol = disp;
17317
17318 if (GET_CODE (disp) == CONST
17319 && GET_CODE (XEXP (disp, 0)) == PLUS
17320 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17321 symbol = XEXP (XEXP (disp, 0), 0);
17322
17323 if (GET_CODE (symbol) == LABEL_REF
17324 || (GET_CODE (symbol) == SYMBOL_REF
17325 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17326 base = pc_rtx;
17327 }
17328 if (!base && !index)
17329 {
17330 /* Displacement only requires special attention. */
17331
17332 if (CONST_INT_P (disp))
17333 {
17334 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
17335 fputs ("ds:", file);
17336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17337 }
17338 else if (flag_pic)
17339 output_pic_addr_const (file, disp, 0);
17340 else
17341 output_addr_const (file, disp);
17342 }
17343 else
17344 {
17345 /* Print SImode register names to force addr32 prefix. */
17346 if (SImode_address_operand (addr, VOIDmode))
17347 {
17348 #ifdef ENABLE_CHECKING
17349 gcc_assert (TARGET_64BIT);
17350 switch (GET_CODE (addr))
17351 {
17352 case SUBREG:
17353 gcc_assert (GET_MODE (addr) == SImode);
17354 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17355 break;
17356 case ZERO_EXTEND:
17357 case AND:
17358 gcc_assert (GET_MODE (addr) == DImode);
17359 break;
17360 default:
17361 gcc_unreachable ();
17362 }
17363 #endif
17364 gcc_assert (!code);
17365 code = 'k';
17366 }
17367 else if (code == 0
17368 && TARGET_X32
17369 && disp
17370 && CONST_INT_P (disp)
17371 && INTVAL (disp) < -16*1024*1024)
17372 {
17373 /* X32 runs in 64-bit mode, where displacement, DISP, in
17374 address DISP(%r64), is encoded as 32-bit immediate sign-
17375 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17376 address is %r64 + 0xffffffffbffffd00. When %r64 <
17377 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17378 which is invalid for x32. The correct address is %r64
17379 - 0x40000300 == 0xf7ffdd64. To properly encode
17380 -0x40000300(%r64) for x32, we zero-extend negative
17381 displacement by forcing addr32 prefix which truncates
17382 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17383 zero-extend all negative displacements, including -1(%rsp).
17384 However, for small negative displacements, sign-extension
17385 won't cause overflow. We only zero-extend negative
17386 displacements if they < -16*1024*1024, which is also used
17387 to check legitimate address displacements for PIC. */
17388 code = 'k';
17389 }
17390
17391 if (ASSEMBLER_DIALECT == ASM_ATT)
17392 {
17393 if (disp)
17394 {
17395 if (flag_pic)
17396 output_pic_addr_const (file, disp, 0);
17397 else if (GET_CODE (disp) == LABEL_REF)
17398 output_asm_label (disp);
17399 else
17400 output_addr_const (file, disp);
17401 }
17402
17403 putc ('(', file);
17404 if (base)
17405 print_reg (base, code, file);
17406 if (index)
17407 {
17408 putc (',', file);
17409 print_reg (index, vsib ? 0 : code, file);
17410 if (scale != 1 || vsib)
17411 fprintf (file, ",%d", scale);
17412 }
17413 putc (')', file);
17414 }
17415 else
17416 {
17417 rtx offset = NULL_RTX;
17418
17419 if (disp)
17420 {
17421 /* Pull out the offset of a symbol; print any symbol itself. */
17422 if (GET_CODE (disp) == CONST
17423 && GET_CODE (XEXP (disp, 0)) == PLUS
17424 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17425 {
17426 offset = XEXP (XEXP (disp, 0), 1);
17427 disp = gen_rtx_CONST (VOIDmode,
17428 XEXP (XEXP (disp, 0), 0));
17429 }
17430
17431 if (flag_pic)
17432 output_pic_addr_const (file, disp, 0);
17433 else if (GET_CODE (disp) == LABEL_REF)
17434 output_asm_label (disp);
17435 else if (CONST_INT_P (disp))
17436 offset = disp;
17437 else
17438 output_addr_const (file, disp);
17439 }
17440
17441 putc ('[', file);
17442 if (base)
17443 {
17444 print_reg (base, code, file);
17445 if (offset)
17446 {
17447 if (INTVAL (offset) >= 0)
17448 putc ('+', file);
17449 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17450 }
17451 }
17452 else if (offset)
17453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17454 else
17455 putc ('0', file);
17456
17457 if (index)
17458 {
17459 putc ('+', file);
17460 print_reg (index, vsib ? 0 : code, file);
17461 if (scale != 1 || vsib)
17462 fprintf (file, "*%d", scale);
17463 }
17464 putc (']', file);
17465 }
17466 }
17467 }
17468
17469 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17470
17471 static bool
17472 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17473 {
17474 rtx op;
17475
17476 if (GET_CODE (x) != UNSPEC)
17477 return false;
17478
17479 op = XVECEXP (x, 0, 0);
17480 switch (XINT (x, 1))
17481 {
17482 case UNSPEC_GOTTPOFF:
17483 output_addr_const (file, op);
17484 /* FIXME: This might be @TPOFF in Sun ld. */
17485 fputs ("@gottpoff", file);
17486 break;
17487 case UNSPEC_TPOFF:
17488 output_addr_const (file, op);
17489 fputs ("@tpoff", file);
17490 break;
17491 case UNSPEC_NTPOFF:
17492 output_addr_const (file, op);
17493 if (TARGET_64BIT)
17494 fputs ("@tpoff", file);
17495 else
17496 fputs ("@ntpoff", file);
17497 break;
17498 case UNSPEC_DTPOFF:
17499 output_addr_const (file, op);
17500 fputs ("@dtpoff", file);
17501 break;
17502 case UNSPEC_GOTNTPOFF:
17503 output_addr_const (file, op);
17504 if (TARGET_64BIT)
17505 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17506 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17507 else
17508 fputs ("@gotntpoff", file);
17509 break;
17510 case UNSPEC_INDNTPOFF:
17511 output_addr_const (file, op);
17512 fputs ("@indntpoff", file);
17513 break;
17514 #if TARGET_MACHO
17515 case UNSPEC_MACHOPIC_OFFSET:
17516 output_addr_const (file, op);
17517 putc ('-', file);
17518 machopic_output_function_base_name (file);
17519 break;
17520 #endif
17521
17522 case UNSPEC_STACK_CHECK:
17523 {
17524 int offset;
17525
17526 gcc_assert (flag_split_stack);
17527
17528 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17529 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17530 #else
17531 gcc_unreachable ();
17532 #endif
17533
17534 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17535 }
17536 break;
17537
17538 default:
17539 return false;
17540 }
17541
17542 return true;
17543 }
17544 \f
17545 /* Split one or more double-mode RTL references into pairs of half-mode
17546 references. The RTL can be REG, offsettable MEM, integer constant, or
17547 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17548 split and "num" is its length. lo_half and hi_half are output arrays
17549 that parallel "operands". */
17550
17551 void
17552 split_double_mode (machine_mode mode, rtx operands[],
17553 int num, rtx lo_half[], rtx hi_half[])
17554 {
17555 machine_mode half_mode;
17556 unsigned int byte;
17557
17558 switch (mode)
17559 {
17560 case TImode:
17561 half_mode = DImode;
17562 break;
17563 case DImode:
17564 half_mode = SImode;
17565 break;
17566 default:
17567 gcc_unreachable ();
17568 }
17569
17570 byte = GET_MODE_SIZE (half_mode);
17571
17572 while (num--)
17573 {
17574 rtx op = operands[num];
17575
17576 /* simplify_subreg refuse to split volatile memory addresses,
17577 but we still have to handle it. */
17578 if (MEM_P (op))
17579 {
17580 lo_half[num] = adjust_address (op, half_mode, 0);
17581 hi_half[num] = adjust_address (op, half_mode, byte);
17582 }
17583 else
17584 {
17585 lo_half[num] = simplify_gen_subreg (half_mode, op,
17586 GET_MODE (op) == VOIDmode
17587 ? mode : GET_MODE (op), 0);
17588 hi_half[num] = simplify_gen_subreg (half_mode, op,
17589 GET_MODE (op) == VOIDmode
17590 ? mode : GET_MODE (op), byte);
17591 }
17592 }
17593 }
17594 \f
17595 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17596 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17597 is the expression of the binary operation. The output may either be
17598 emitted here, or returned to the caller, like all output_* functions.
17599
17600 There is no guarantee that the operands are the same mode, as they
17601 might be within FLOAT or FLOAT_EXTEND expressions. */
17602
17603 #ifndef SYSV386_COMPAT
17604 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17605 wants to fix the assemblers because that causes incompatibility
17606 with gcc. No-one wants to fix gcc because that causes
17607 incompatibility with assemblers... You can use the option of
17608 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17609 #define SYSV386_COMPAT 1
17610 #endif
17611
17612 const char *
17613 output_387_binary_op (rtx insn, rtx *operands)
17614 {
17615 static char buf[40];
17616 const char *p;
17617 const char *ssep;
17618 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17619
17620 #ifdef ENABLE_CHECKING
17621 /* Even if we do not want to check the inputs, this documents input
17622 constraints. Which helps in understanding the following code. */
17623 if (STACK_REG_P (operands[0])
17624 && ((REG_P (operands[1])
17625 && REGNO (operands[0]) == REGNO (operands[1])
17626 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17627 || (REG_P (operands[2])
17628 && REGNO (operands[0]) == REGNO (operands[2])
17629 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17630 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17631 ; /* ok */
17632 else
17633 gcc_assert (is_sse);
17634 #endif
17635
17636 switch (GET_CODE (operands[3]))
17637 {
17638 case PLUS:
17639 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17640 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17641 p = "fiadd";
17642 else
17643 p = "fadd";
17644 ssep = "vadd";
17645 break;
17646
17647 case MINUS:
17648 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17649 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17650 p = "fisub";
17651 else
17652 p = "fsub";
17653 ssep = "vsub";
17654 break;
17655
17656 case MULT:
17657 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17658 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17659 p = "fimul";
17660 else
17661 p = "fmul";
17662 ssep = "vmul";
17663 break;
17664
17665 case DIV:
17666 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17667 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17668 p = "fidiv";
17669 else
17670 p = "fdiv";
17671 ssep = "vdiv";
17672 break;
17673
17674 default:
17675 gcc_unreachable ();
17676 }
17677
17678 if (is_sse)
17679 {
17680 if (TARGET_AVX)
17681 {
17682 strcpy (buf, ssep);
17683 if (GET_MODE (operands[0]) == SFmode)
17684 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17685 else
17686 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17687 }
17688 else
17689 {
17690 strcpy (buf, ssep + 1);
17691 if (GET_MODE (operands[0]) == SFmode)
17692 strcat (buf, "ss\t{%2, %0|%0, %2}");
17693 else
17694 strcat (buf, "sd\t{%2, %0|%0, %2}");
17695 }
17696 return buf;
17697 }
17698 strcpy (buf, p);
17699
17700 switch (GET_CODE (operands[3]))
17701 {
17702 case MULT:
17703 case PLUS:
17704 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17705 std::swap (operands[1], operands[2]);
17706
17707 /* know operands[0] == operands[1]. */
17708
17709 if (MEM_P (operands[2]))
17710 {
17711 p = "%Z2\t%2";
17712 break;
17713 }
17714
17715 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17716 {
17717 if (STACK_TOP_P (operands[0]))
17718 /* How is it that we are storing to a dead operand[2]?
17719 Well, presumably operands[1] is dead too. We can't
17720 store the result to st(0) as st(0) gets popped on this
17721 instruction. Instead store to operands[2] (which I
17722 think has to be st(1)). st(1) will be popped later.
17723 gcc <= 2.8.1 didn't have this check and generated
17724 assembly code that the Unixware assembler rejected. */
17725 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17726 else
17727 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17728 break;
17729 }
17730
17731 if (STACK_TOP_P (operands[0]))
17732 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17733 else
17734 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17735 break;
17736
17737 case MINUS:
17738 case DIV:
17739 if (MEM_P (operands[1]))
17740 {
17741 p = "r%Z1\t%1";
17742 break;
17743 }
17744
17745 if (MEM_P (operands[2]))
17746 {
17747 p = "%Z2\t%2";
17748 break;
17749 }
17750
17751 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17752 {
17753 #if SYSV386_COMPAT
17754 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17755 derived assemblers, confusingly reverse the direction of
17756 the operation for fsub{r} and fdiv{r} when the
17757 destination register is not st(0). The Intel assembler
17758 doesn't have this brain damage. Read !SYSV386_COMPAT to
17759 figure out what the hardware really does. */
17760 if (STACK_TOP_P (operands[0]))
17761 p = "{p\t%0, %2|rp\t%2, %0}";
17762 else
17763 p = "{rp\t%2, %0|p\t%0, %2}";
17764 #else
17765 if (STACK_TOP_P (operands[0]))
17766 /* As above for fmul/fadd, we can't store to st(0). */
17767 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17768 else
17769 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17770 #endif
17771 break;
17772 }
17773
17774 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17775 {
17776 #if SYSV386_COMPAT
17777 if (STACK_TOP_P (operands[0]))
17778 p = "{rp\t%0, %1|p\t%1, %0}";
17779 else
17780 p = "{p\t%1, %0|rp\t%0, %1}";
17781 #else
17782 if (STACK_TOP_P (operands[0]))
17783 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17784 else
17785 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17786 #endif
17787 break;
17788 }
17789
17790 if (STACK_TOP_P (operands[0]))
17791 {
17792 if (STACK_TOP_P (operands[1]))
17793 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17794 else
17795 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17796 break;
17797 }
17798 else if (STACK_TOP_P (operands[1]))
17799 {
17800 #if SYSV386_COMPAT
17801 p = "{\t%1, %0|r\t%0, %1}";
17802 #else
17803 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17804 #endif
17805 }
17806 else
17807 {
17808 #if SYSV386_COMPAT
17809 p = "{r\t%2, %0|\t%0, %2}";
17810 #else
17811 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17812 #endif
17813 }
17814 break;
17815
17816 default:
17817 gcc_unreachable ();
17818 }
17819
17820 strcat (buf, p);
17821 return buf;
17822 }
17823
17824 /* Check if a 256bit AVX register is referenced inside of EXP. */
17825
17826 static bool
17827 ix86_check_avx256_register (const_rtx exp)
17828 {
17829 if (SUBREG_P (exp))
17830 exp = SUBREG_REG (exp);
17831
17832 return (REG_P (exp)
17833 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17834 }
17835
17836 /* Return needed mode for entity in optimize_mode_switching pass. */
17837
17838 static int
17839 ix86_avx_u128_mode_needed (rtx_insn *insn)
17840 {
17841 if (CALL_P (insn))
17842 {
17843 rtx link;
17844
17845 /* Needed mode is set to AVX_U128_CLEAN if there are
17846 no 256bit modes used in function arguments. */
17847 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17848 link;
17849 link = XEXP (link, 1))
17850 {
17851 if (GET_CODE (XEXP (link, 0)) == USE)
17852 {
17853 rtx arg = XEXP (XEXP (link, 0), 0);
17854
17855 if (ix86_check_avx256_register (arg))
17856 return AVX_U128_DIRTY;
17857 }
17858 }
17859
17860 return AVX_U128_CLEAN;
17861 }
17862
17863 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17864 changes state only when a 256bit register is written to, but we need
17865 to prevent the compiler from moving optimal insertion point above
17866 eventual read from 256bit register. */
17867 subrtx_iterator::array_type array;
17868 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17869 if (ix86_check_avx256_register (*iter))
17870 return AVX_U128_DIRTY;
17871
17872 return AVX_U128_ANY;
17873 }
17874
17875 /* Return mode that i387 must be switched into
17876 prior to the execution of insn. */
17877
17878 static int
17879 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17880 {
17881 enum attr_i387_cw mode;
17882
17883 /* The mode UNINITIALIZED is used to store control word after a
17884 function call or ASM pattern. The mode ANY specify that function
17885 has no requirements on the control word and make no changes in the
17886 bits we are interested in. */
17887
17888 if (CALL_P (insn)
17889 || (NONJUMP_INSN_P (insn)
17890 && (asm_noperands (PATTERN (insn)) >= 0
17891 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
17892 return I387_CW_UNINITIALIZED;
17893
17894 if (recog_memoized (insn) < 0)
17895 return I387_CW_ANY;
17896
17897 mode = get_attr_i387_cw (insn);
17898
17899 switch (entity)
17900 {
17901 case I387_TRUNC:
17902 if (mode == I387_CW_TRUNC)
17903 return mode;
17904 break;
17905
17906 case I387_FLOOR:
17907 if (mode == I387_CW_FLOOR)
17908 return mode;
17909 break;
17910
17911 case I387_CEIL:
17912 if (mode == I387_CW_CEIL)
17913 return mode;
17914 break;
17915
17916 case I387_MASK_PM:
17917 if (mode == I387_CW_MASK_PM)
17918 return mode;
17919 break;
17920
17921 default:
17922 gcc_unreachable ();
17923 }
17924
17925 return I387_CW_ANY;
17926 }
17927
17928 /* Return mode that entity must be switched into
17929 prior to the execution of insn. */
17930
17931 static int
17932 ix86_mode_needed (int entity, rtx_insn *insn)
17933 {
17934 switch (entity)
17935 {
17936 case AVX_U128:
17937 return ix86_avx_u128_mode_needed (insn);
17938 case I387_TRUNC:
17939 case I387_FLOOR:
17940 case I387_CEIL:
17941 case I387_MASK_PM:
17942 return ix86_i387_mode_needed (entity, insn);
17943 default:
17944 gcc_unreachable ();
17945 }
17946 return 0;
17947 }
17948
17949 /* Check if a 256bit AVX register is referenced in stores. */
17950
17951 static void
17952 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
17953 {
17954 if (ix86_check_avx256_register (dest))
17955 {
17956 bool *used = (bool *) data;
17957 *used = true;
17958 }
17959 }
17960
17961 /* Calculate mode of upper 128bit AVX registers after the insn. */
17962
17963 static int
17964 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
17965 {
17966 rtx pat = PATTERN (insn);
17967
17968 if (vzeroupper_operation (pat, VOIDmode)
17969 || vzeroall_operation (pat, VOIDmode))
17970 return AVX_U128_CLEAN;
17971
17972 /* We know that state is clean after CALL insn if there are no
17973 256bit registers used in the function return register. */
17974 if (CALL_P (insn))
17975 {
17976 bool avx_reg256_found = false;
17977 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
17978
17979 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
17980 }
17981
17982 /* Otherwise, return current mode. Remember that if insn
17983 references AVX 256bit registers, the mode was already changed
17984 to DIRTY from MODE_NEEDED. */
17985 return mode;
17986 }
17987
17988 /* Return the mode that an insn results in. */
17989
17990 static int
17991 ix86_mode_after (int entity, int mode, rtx_insn *insn)
17992 {
17993 switch (entity)
17994 {
17995 case AVX_U128:
17996 return ix86_avx_u128_mode_after (mode, insn);
17997 case I387_TRUNC:
17998 case I387_FLOOR:
17999 case I387_CEIL:
18000 case I387_MASK_PM:
18001 return mode;
18002 default:
18003 gcc_unreachable ();
18004 }
18005 }
18006
18007 static int
18008 ix86_avx_u128_mode_entry (void)
18009 {
18010 tree arg;
18011
18012 /* Entry mode is set to AVX_U128_DIRTY if there are
18013 256bit modes used in function arguments. */
18014 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18015 arg = TREE_CHAIN (arg))
18016 {
18017 rtx incoming = DECL_INCOMING_RTL (arg);
18018
18019 if (incoming && ix86_check_avx256_register (incoming))
18020 return AVX_U128_DIRTY;
18021 }
18022
18023 return AVX_U128_CLEAN;
18024 }
18025
18026 /* Return a mode that ENTITY is assumed to be
18027 switched to at function entry. */
18028
18029 static int
18030 ix86_mode_entry (int entity)
18031 {
18032 switch (entity)
18033 {
18034 case AVX_U128:
18035 return ix86_avx_u128_mode_entry ();
18036 case I387_TRUNC:
18037 case I387_FLOOR:
18038 case I387_CEIL:
18039 case I387_MASK_PM:
18040 return I387_CW_ANY;
18041 default:
18042 gcc_unreachable ();
18043 }
18044 }
18045
18046 static int
18047 ix86_avx_u128_mode_exit (void)
18048 {
18049 rtx reg = crtl->return_rtx;
18050
18051 /* Exit mode is set to AVX_U128_DIRTY if there are
18052 256bit modes used in the function return register. */
18053 if (reg && ix86_check_avx256_register (reg))
18054 return AVX_U128_DIRTY;
18055
18056 return AVX_U128_CLEAN;
18057 }
18058
18059 /* Return a mode that ENTITY is assumed to be
18060 switched to at function exit. */
18061
18062 static int
18063 ix86_mode_exit (int entity)
18064 {
18065 switch (entity)
18066 {
18067 case AVX_U128:
18068 return ix86_avx_u128_mode_exit ();
18069 case I387_TRUNC:
18070 case I387_FLOOR:
18071 case I387_CEIL:
18072 case I387_MASK_PM:
18073 return I387_CW_ANY;
18074 default:
18075 gcc_unreachable ();
18076 }
18077 }
18078
18079 static int
18080 ix86_mode_priority (int, int n)
18081 {
18082 return n;
18083 }
18084
18085 /* Output code to initialize control word copies used by trunc?f?i and
18086 rounding patterns. CURRENT_MODE is set to current control word,
18087 while NEW_MODE is set to new control word. */
18088
18089 static void
18090 emit_i387_cw_initialization (int mode)
18091 {
18092 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18093 rtx new_mode;
18094
18095 enum ix86_stack_slot slot;
18096
18097 rtx reg = gen_reg_rtx (HImode);
18098
18099 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18100 emit_move_insn (reg, copy_rtx (stored_mode));
18101
18102 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18103 || optimize_insn_for_size_p ())
18104 {
18105 switch (mode)
18106 {
18107 case I387_CW_TRUNC:
18108 /* round toward zero (truncate) */
18109 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18110 slot = SLOT_CW_TRUNC;
18111 break;
18112
18113 case I387_CW_FLOOR:
18114 /* round down toward -oo */
18115 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18116 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18117 slot = SLOT_CW_FLOOR;
18118 break;
18119
18120 case I387_CW_CEIL:
18121 /* round up toward +oo */
18122 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18123 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18124 slot = SLOT_CW_CEIL;
18125 break;
18126
18127 case I387_CW_MASK_PM:
18128 /* mask precision exception for nearbyint() */
18129 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18130 slot = SLOT_CW_MASK_PM;
18131 break;
18132
18133 default:
18134 gcc_unreachable ();
18135 }
18136 }
18137 else
18138 {
18139 switch (mode)
18140 {
18141 case I387_CW_TRUNC:
18142 /* round toward zero (truncate) */
18143 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18144 slot = SLOT_CW_TRUNC;
18145 break;
18146
18147 case I387_CW_FLOOR:
18148 /* round down toward -oo */
18149 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18150 slot = SLOT_CW_FLOOR;
18151 break;
18152
18153 case I387_CW_CEIL:
18154 /* round up toward +oo */
18155 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18156 slot = SLOT_CW_CEIL;
18157 break;
18158
18159 case I387_CW_MASK_PM:
18160 /* mask precision exception for nearbyint() */
18161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18162 slot = SLOT_CW_MASK_PM;
18163 break;
18164
18165 default:
18166 gcc_unreachable ();
18167 }
18168 }
18169
18170 gcc_assert (slot < MAX_386_STACK_LOCALS);
18171
18172 new_mode = assign_386_stack_local (HImode, slot);
18173 emit_move_insn (new_mode, reg);
18174 }
18175
18176 /* Emit vzeroupper. */
18177
18178 void
18179 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18180 {
18181 int i;
18182
18183 /* Cancel automatic vzeroupper insertion if there are
18184 live call-saved SSE registers at the insertion point. */
18185
18186 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18187 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18188 return;
18189
18190 if (TARGET_64BIT)
18191 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18192 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18193 return;
18194
18195 emit_insn (gen_avx_vzeroupper ());
18196 }
18197
18198 /* Generate one or more insns to set ENTITY to MODE. */
18199
18200 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18201 is the set of hard registers live at the point where the insn(s)
18202 are to be inserted. */
18203
18204 static void
18205 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18206 HARD_REG_SET regs_live)
18207 {
18208 switch (entity)
18209 {
18210 case AVX_U128:
18211 if (mode == AVX_U128_CLEAN)
18212 ix86_avx_emit_vzeroupper (regs_live);
18213 break;
18214 case I387_TRUNC:
18215 case I387_FLOOR:
18216 case I387_CEIL:
18217 case I387_MASK_PM:
18218 if (mode != I387_CW_ANY
18219 && mode != I387_CW_UNINITIALIZED)
18220 emit_i387_cw_initialization (mode);
18221 break;
18222 default:
18223 gcc_unreachable ();
18224 }
18225 }
18226
18227 /* Output code for INSN to convert a float to a signed int. OPERANDS
18228 are the insn operands. The output may be [HSD]Imode and the input
18229 operand may be [SDX]Fmode. */
18230
18231 const char *
18232 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18233 {
18234 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18235 int dimode_p = GET_MODE (operands[0]) == DImode;
18236 int round_mode = get_attr_i387_cw (insn);
18237
18238 /* Jump through a hoop or two for DImode, since the hardware has no
18239 non-popping instruction. We used to do this a different way, but
18240 that was somewhat fragile and broke with post-reload splitters. */
18241 if ((dimode_p || fisttp) && !stack_top_dies)
18242 output_asm_insn ("fld\t%y1", operands);
18243
18244 gcc_assert (STACK_TOP_P (operands[1]));
18245 gcc_assert (MEM_P (operands[0]));
18246 gcc_assert (GET_MODE (operands[1]) != TFmode);
18247
18248 if (fisttp)
18249 output_asm_insn ("fisttp%Z0\t%0", operands);
18250 else
18251 {
18252 if (round_mode != I387_CW_ANY)
18253 output_asm_insn ("fldcw\t%3", operands);
18254 if (stack_top_dies || dimode_p)
18255 output_asm_insn ("fistp%Z0\t%0", operands);
18256 else
18257 output_asm_insn ("fist%Z0\t%0", operands);
18258 if (round_mode != I387_CW_ANY)
18259 output_asm_insn ("fldcw\t%2", operands);
18260 }
18261
18262 return "";
18263 }
18264
18265 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18266 have the values zero or one, indicates the ffreep insn's operand
18267 from the OPERANDS array. */
18268
18269 static const char *
18270 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18271 {
18272 if (TARGET_USE_FFREEP)
18273 #ifdef HAVE_AS_IX86_FFREEP
18274 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18275 #else
18276 {
18277 static char retval[32];
18278 int regno = REGNO (operands[opno]);
18279
18280 gcc_assert (STACK_REGNO_P (regno));
18281
18282 regno -= FIRST_STACK_REG;
18283
18284 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18285 return retval;
18286 }
18287 #endif
18288
18289 return opno ? "fstp\t%y1" : "fstp\t%y0";
18290 }
18291
18292
18293 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18294 should be used. UNORDERED_P is true when fucom should be used. */
18295
18296 const char *
18297 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18298 {
18299 int stack_top_dies;
18300 rtx cmp_op0, cmp_op1;
18301 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18302
18303 if (eflags_p)
18304 {
18305 cmp_op0 = operands[0];
18306 cmp_op1 = operands[1];
18307 }
18308 else
18309 {
18310 cmp_op0 = operands[1];
18311 cmp_op1 = operands[2];
18312 }
18313
18314 if (is_sse)
18315 {
18316 if (GET_MODE (operands[0]) == SFmode)
18317 if (unordered_p)
18318 return "%vucomiss\t{%1, %0|%0, %1}";
18319 else
18320 return "%vcomiss\t{%1, %0|%0, %1}";
18321 else
18322 if (unordered_p)
18323 return "%vucomisd\t{%1, %0|%0, %1}";
18324 else
18325 return "%vcomisd\t{%1, %0|%0, %1}";
18326 }
18327
18328 gcc_assert (STACK_TOP_P (cmp_op0));
18329
18330 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18331
18332 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18333 {
18334 if (stack_top_dies)
18335 {
18336 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18337 return output_387_ffreep (operands, 1);
18338 }
18339 else
18340 return "ftst\n\tfnstsw\t%0";
18341 }
18342
18343 if (STACK_REG_P (cmp_op1)
18344 && stack_top_dies
18345 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18346 && REGNO (cmp_op1) != FIRST_STACK_REG)
18347 {
18348 /* If both the top of the 387 stack dies, and the other operand
18349 is also a stack register that dies, then this must be a
18350 `fcompp' float compare */
18351
18352 if (eflags_p)
18353 {
18354 /* There is no double popping fcomi variant. Fortunately,
18355 eflags is immune from the fstp's cc clobbering. */
18356 if (unordered_p)
18357 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18358 else
18359 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18360 return output_387_ffreep (operands, 0);
18361 }
18362 else
18363 {
18364 if (unordered_p)
18365 return "fucompp\n\tfnstsw\t%0";
18366 else
18367 return "fcompp\n\tfnstsw\t%0";
18368 }
18369 }
18370 else
18371 {
18372 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18373
18374 static const char * const alt[16] =
18375 {
18376 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18377 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18378 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18379 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18380
18381 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18382 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18383 NULL,
18384 NULL,
18385
18386 "fcomi\t{%y1, %0|%0, %y1}",
18387 "fcomip\t{%y1, %0|%0, %y1}",
18388 "fucomi\t{%y1, %0|%0, %y1}",
18389 "fucomip\t{%y1, %0|%0, %y1}",
18390
18391 NULL,
18392 NULL,
18393 NULL,
18394 NULL
18395 };
18396
18397 int mask;
18398 const char *ret;
18399
18400 mask = eflags_p << 3;
18401 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18402 mask |= unordered_p << 1;
18403 mask |= stack_top_dies;
18404
18405 gcc_assert (mask < 16);
18406 ret = alt[mask];
18407 gcc_assert (ret);
18408
18409 return ret;
18410 }
18411 }
18412
18413 void
18414 ix86_output_addr_vec_elt (FILE *file, int value)
18415 {
18416 const char *directive = ASM_LONG;
18417
18418 #ifdef ASM_QUAD
18419 if (TARGET_LP64)
18420 directive = ASM_QUAD;
18421 #else
18422 gcc_assert (!TARGET_64BIT);
18423 #endif
18424
18425 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18426 }
18427
18428 void
18429 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18430 {
18431 const char *directive = ASM_LONG;
18432
18433 #ifdef ASM_QUAD
18434 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18435 directive = ASM_QUAD;
18436 #else
18437 gcc_assert (!TARGET_64BIT);
18438 #endif
18439 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18440 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18441 fprintf (file, "%s%s%d-%s%d\n",
18442 directive, LPREFIX, value, LPREFIX, rel);
18443 else if (HAVE_AS_GOTOFF_IN_DATA)
18444 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18445 #if TARGET_MACHO
18446 else if (TARGET_MACHO)
18447 {
18448 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18449 machopic_output_function_base_name (file);
18450 putc ('\n', file);
18451 }
18452 #endif
18453 else
18454 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18455 GOT_SYMBOL_NAME, LPREFIX, value);
18456 }
18457 \f
18458 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18459 for the target. */
18460
18461 void
18462 ix86_expand_clear (rtx dest)
18463 {
18464 rtx tmp;
18465
18466 /* We play register width games, which are only valid after reload. */
18467 gcc_assert (reload_completed);
18468
18469 /* Avoid HImode and its attendant prefix byte. */
18470 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18471 dest = gen_rtx_REG (SImode, REGNO (dest));
18472 tmp = gen_rtx_SET (dest, const0_rtx);
18473
18474 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18475 {
18476 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18477 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18478 }
18479
18480 emit_insn (tmp);
18481 }
18482
18483 /* X is an unchanging MEM. If it is a constant pool reference, return
18484 the constant pool rtx, else NULL. */
18485
18486 rtx
18487 maybe_get_pool_constant (rtx x)
18488 {
18489 x = ix86_delegitimize_address (XEXP (x, 0));
18490
18491 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18492 return get_pool_constant (x);
18493
18494 return NULL_RTX;
18495 }
18496
18497 void
18498 ix86_expand_move (machine_mode mode, rtx operands[])
18499 {
18500 rtx op0, op1;
18501 enum tls_model model;
18502
18503 op0 = operands[0];
18504 op1 = operands[1];
18505
18506 if (GET_CODE (op1) == SYMBOL_REF)
18507 {
18508 rtx tmp;
18509
18510 model = SYMBOL_REF_TLS_MODEL (op1);
18511 if (model)
18512 {
18513 op1 = legitimize_tls_address (op1, model, true);
18514 op1 = force_operand (op1, op0);
18515 if (op1 == op0)
18516 return;
18517 op1 = convert_to_mode (mode, op1, 1);
18518 }
18519 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18520 op1 = tmp;
18521 }
18522 else if (GET_CODE (op1) == CONST
18523 && GET_CODE (XEXP (op1, 0)) == PLUS
18524 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18525 {
18526 rtx addend = XEXP (XEXP (op1, 0), 1);
18527 rtx symbol = XEXP (XEXP (op1, 0), 0);
18528 rtx tmp;
18529
18530 model = SYMBOL_REF_TLS_MODEL (symbol);
18531 if (model)
18532 tmp = legitimize_tls_address (symbol, model, true);
18533 else
18534 tmp = legitimize_pe_coff_symbol (symbol, true);
18535
18536 if (tmp)
18537 {
18538 tmp = force_operand (tmp, NULL);
18539 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18540 op0, 1, OPTAB_DIRECT);
18541 if (tmp == op0)
18542 return;
18543 op1 = convert_to_mode (mode, tmp, 1);
18544 }
18545 }
18546
18547 if ((flag_pic || MACHOPIC_INDIRECT)
18548 && symbolic_operand (op1, mode))
18549 {
18550 if (TARGET_MACHO && !TARGET_64BIT)
18551 {
18552 #if TARGET_MACHO
18553 /* dynamic-no-pic */
18554 if (MACHOPIC_INDIRECT)
18555 {
18556 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18557 ? op0 : gen_reg_rtx (Pmode);
18558 op1 = machopic_indirect_data_reference (op1, temp);
18559 if (MACHOPIC_PURE)
18560 op1 = machopic_legitimize_pic_address (op1, mode,
18561 temp == op1 ? 0 : temp);
18562 }
18563 if (op0 != op1 && GET_CODE (op0) != MEM)
18564 {
18565 rtx insn = gen_rtx_SET (op0, op1);
18566 emit_insn (insn);
18567 return;
18568 }
18569 if (GET_CODE (op0) == MEM)
18570 op1 = force_reg (Pmode, op1);
18571 else
18572 {
18573 rtx temp = op0;
18574 if (GET_CODE (temp) != REG)
18575 temp = gen_reg_rtx (Pmode);
18576 temp = legitimize_pic_address (op1, temp);
18577 if (temp == op0)
18578 return;
18579 op1 = temp;
18580 }
18581 /* dynamic-no-pic */
18582 #endif
18583 }
18584 else
18585 {
18586 if (MEM_P (op0))
18587 op1 = force_reg (mode, op1);
18588 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18589 {
18590 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18591 op1 = legitimize_pic_address (op1, reg);
18592 if (op0 == op1)
18593 return;
18594 op1 = convert_to_mode (mode, op1, 1);
18595 }
18596 }
18597 }
18598 else
18599 {
18600 if (MEM_P (op0)
18601 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18602 || !push_operand (op0, mode))
18603 && MEM_P (op1))
18604 op1 = force_reg (mode, op1);
18605
18606 if (push_operand (op0, mode)
18607 && ! general_no_elim_operand (op1, mode))
18608 op1 = copy_to_mode_reg (mode, op1);
18609
18610 /* Force large constants in 64bit compilation into register
18611 to get them CSEed. */
18612 if (can_create_pseudo_p ()
18613 && (mode == DImode) && TARGET_64BIT
18614 && immediate_operand (op1, mode)
18615 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18616 && !register_operand (op0, mode)
18617 && optimize)
18618 op1 = copy_to_mode_reg (mode, op1);
18619
18620 if (can_create_pseudo_p ()
18621 && CONST_DOUBLE_P (op1))
18622 {
18623 /* If we are loading a floating point constant to a register,
18624 force the value to memory now, since we'll get better code
18625 out the back end. */
18626
18627 op1 = validize_mem (force_const_mem (mode, op1));
18628 if (!register_operand (op0, mode))
18629 {
18630 rtx temp = gen_reg_rtx (mode);
18631 emit_insn (gen_rtx_SET (temp, op1));
18632 emit_move_insn (op0, temp);
18633 return;
18634 }
18635 }
18636 }
18637
18638 emit_insn (gen_rtx_SET (op0, op1));
18639 }
18640
18641 void
18642 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18643 {
18644 rtx op0 = operands[0], op1 = operands[1];
18645 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18646 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18647 unsigned int align = (TARGET_IAMCU
18648 ? GET_MODE_BITSIZE (mode)
18649 : GET_MODE_ALIGNMENT (mode));
18650
18651 if (push_operand (op0, VOIDmode))
18652 op0 = emit_move_resolve_push (mode, op0);
18653
18654 /* Force constants other than zero into memory. We do not know how
18655 the instructions used to build constants modify the upper 64 bits
18656 of the register, once we have that information we may be able
18657 to handle some of them more efficiently. */
18658 if (can_create_pseudo_p ()
18659 && register_operand (op0, mode)
18660 && (CONSTANT_P (op1)
18661 || (SUBREG_P (op1)
18662 && CONSTANT_P (SUBREG_REG (op1))))
18663 && !standard_sse_constant_p (op1))
18664 op1 = validize_mem (force_const_mem (mode, op1));
18665
18666 /* We need to check memory alignment for SSE mode since attribute
18667 can make operands unaligned. */
18668 if (can_create_pseudo_p ()
18669 && SSE_REG_MODE_P (mode)
18670 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18671 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18672 {
18673 rtx tmp[2];
18674
18675 /* ix86_expand_vector_move_misalign() does not like constants ... */
18676 if (CONSTANT_P (op1)
18677 || (SUBREG_P (op1)
18678 && CONSTANT_P (SUBREG_REG (op1))))
18679 op1 = validize_mem (force_const_mem (mode, op1));
18680
18681 /* ... nor both arguments in memory. */
18682 if (!register_operand (op0, mode)
18683 && !register_operand (op1, mode))
18684 op1 = force_reg (mode, op1);
18685
18686 tmp[0] = op0; tmp[1] = op1;
18687 ix86_expand_vector_move_misalign (mode, tmp);
18688 return;
18689 }
18690
18691 /* Make operand1 a register if it isn't already. */
18692 if (can_create_pseudo_p ()
18693 && !register_operand (op0, mode)
18694 && !register_operand (op1, mode))
18695 {
18696 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18697 return;
18698 }
18699
18700 emit_insn (gen_rtx_SET (op0, op1));
18701 }
18702
18703 /* Split 32-byte AVX unaligned load and store if needed. */
18704
18705 static void
18706 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18707 {
18708 rtx m;
18709 rtx (*extract) (rtx, rtx, rtx);
18710 rtx (*load_unaligned) (rtx, rtx);
18711 rtx (*store_unaligned) (rtx, rtx);
18712 machine_mode mode;
18713
18714 switch (GET_MODE (op0))
18715 {
18716 default:
18717 gcc_unreachable ();
18718 case V32QImode:
18719 extract = gen_avx_vextractf128v32qi;
18720 load_unaligned = gen_avx_loaddquv32qi;
18721 store_unaligned = gen_avx_storedquv32qi;
18722 mode = V16QImode;
18723 break;
18724 case V8SFmode:
18725 extract = gen_avx_vextractf128v8sf;
18726 load_unaligned = gen_avx_loadups256;
18727 store_unaligned = gen_avx_storeups256;
18728 mode = V4SFmode;
18729 break;
18730 case V4DFmode:
18731 extract = gen_avx_vextractf128v4df;
18732 load_unaligned = gen_avx_loadupd256;
18733 store_unaligned = gen_avx_storeupd256;
18734 mode = V2DFmode;
18735 break;
18736 }
18737
18738 if (MEM_P (op1))
18739 {
18740 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18741 && optimize_insn_for_speed_p ())
18742 {
18743 rtx r = gen_reg_rtx (mode);
18744 m = adjust_address (op1, mode, 0);
18745 emit_move_insn (r, m);
18746 m = adjust_address (op1, mode, 16);
18747 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18748 emit_move_insn (op0, r);
18749 }
18750 /* Normal *mov<mode>_internal pattern will handle
18751 unaligned loads just fine if misaligned_operand
18752 is true, and without the UNSPEC it can be combined
18753 with arithmetic instructions. */
18754 else if (misaligned_operand (op1, GET_MODE (op1)))
18755 emit_insn (gen_rtx_SET (op0, op1));
18756 else
18757 emit_insn (load_unaligned (op0, op1));
18758 }
18759 else if (MEM_P (op0))
18760 {
18761 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18762 && optimize_insn_for_speed_p ())
18763 {
18764 m = adjust_address (op0, mode, 0);
18765 emit_insn (extract (m, op1, const0_rtx));
18766 m = adjust_address (op0, mode, 16);
18767 emit_insn (extract (m, op1, const1_rtx));
18768 }
18769 else
18770 emit_insn (store_unaligned (op0, op1));
18771 }
18772 else
18773 gcc_unreachable ();
18774 }
18775
18776 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18777 straight to ix86_expand_vector_move. */
18778 /* Code generation for scalar reg-reg moves of single and double precision data:
18779 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18780 movaps reg, reg
18781 else
18782 movss reg, reg
18783 if (x86_sse_partial_reg_dependency == true)
18784 movapd reg, reg
18785 else
18786 movsd reg, reg
18787
18788 Code generation for scalar loads of double precision data:
18789 if (x86_sse_split_regs == true)
18790 movlpd mem, reg (gas syntax)
18791 else
18792 movsd mem, reg
18793
18794 Code generation for unaligned packed loads of single precision data
18795 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18796 if (x86_sse_unaligned_move_optimal)
18797 movups mem, reg
18798
18799 if (x86_sse_partial_reg_dependency == true)
18800 {
18801 xorps reg, reg
18802 movlps mem, reg
18803 movhps mem+8, reg
18804 }
18805 else
18806 {
18807 movlps mem, reg
18808 movhps mem+8, reg
18809 }
18810
18811 Code generation for unaligned packed loads of double precision data
18812 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18813 if (x86_sse_unaligned_move_optimal)
18814 movupd mem, reg
18815
18816 if (x86_sse_split_regs == true)
18817 {
18818 movlpd mem, reg
18819 movhpd mem+8, reg
18820 }
18821 else
18822 {
18823 movsd mem, reg
18824 movhpd mem+8, reg
18825 }
18826 */
18827
18828 void
18829 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18830 {
18831 rtx op0, op1, orig_op0 = NULL_RTX, m;
18832 rtx (*load_unaligned) (rtx, rtx);
18833 rtx (*store_unaligned) (rtx, rtx);
18834
18835 op0 = operands[0];
18836 op1 = operands[1];
18837
18838 if (GET_MODE_SIZE (mode) == 64)
18839 {
18840 switch (GET_MODE_CLASS (mode))
18841 {
18842 case MODE_VECTOR_INT:
18843 case MODE_INT:
18844 if (GET_MODE (op0) != V16SImode)
18845 {
18846 if (!MEM_P (op0))
18847 {
18848 orig_op0 = op0;
18849 op0 = gen_reg_rtx (V16SImode);
18850 }
18851 else
18852 op0 = gen_lowpart (V16SImode, op0);
18853 }
18854 op1 = gen_lowpart (V16SImode, op1);
18855 /* FALLTHRU */
18856
18857 case MODE_VECTOR_FLOAT:
18858 switch (GET_MODE (op0))
18859 {
18860 default:
18861 gcc_unreachable ();
18862 case V16SImode:
18863 load_unaligned = gen_avx512f_loaddquv16si;
18864 store_unaligned = gen_avx512f_storedquv16si;
18865 break;
18866 case V16SFmode:
18867 load_unaligned = gen_avx512f_loadups512;
18868 store_unaligned = gen_avx512f_storeups512;
18869 break;
18870 case V8DFmode:
18871 load_unaligned = gen_avx512f_loadupd512;
18872 store_unaligned = gen_avx512f_storeupd512;
18873 break;
18874 }
18875
18876 if (MEM_P (op1))
18877 emit_insn (load_unaligned (op0, op1));
18878 else if (MEM_P (op0))
18879 emit_insn (store_unaligned (op0, op1));
18880 else
18881 gcc_unreachable ();
18882 if (orig_op0)
18883 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18884 break;
18885
18886 default:
18887 gcc_unreachable ();
18888 }
18889
18890 return;
18891 }
18892
18893 if (TARGET_AVX
18894 && GET_MODE_SIZE (mode) == 32)
18895 {
18896 switch (GET_MODE_CLASS (mode))
18897 {
18898 case MODE_VECTOR_INT:
18899 case MODE_INT:
18900 if (GET_MODE (op0) != V32QImode)
18901 {
18902 if (!MEM_P (op0))
18903 {
18904 orig_op0 = op0;
18905 op0 = gen_reg_rtx (V32QImode);
18906 }
18907 else
18908 op0 = gen_lowpart (V32QImode, op0);
18909 }
18910 op1 = gen_lowpart (V32QImode, op1);
18911 /* FALLTHRU */
18912
18913 case MODE_VECTOR_FLOAT:
18914 ix86_avx256_split_vector_move_misalign (op0, op1);
18915 if (orig_op0)
18916 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18917 break;
18918
18919 default:
18920 gcc_unreachable ();
18921 }
18922
18923 return;
18924 }
18925
18926 if (MEM_P (op1))
18927 {
18928 /* Normal *mov<mode>_internal pattern will handle
18929 unaligned loads just fine if misaligned_operand
18930 is true, and without the UNSPEC it can be combined
18931 with arithmetic instructions. */
18932 if (TARGET_AVX
18933 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
18934 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
18935 && misaligned_operand (op1, GET_MODE (op1)))
18936 emit_insn (gen_rtx_SET (op0, op1));
18937 /* ??? If we have typed data, then it would appear that using
18938 movdqu is the only way to get unaligned data loaded with
18939 integer type. */
18940 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18941 {
18942 if (GET_MODE (op0) != V16QImode)
18943 {
18944 orig_op0 = op0;
18945 op0 = gen_reg_rtx (V16QImode);
18946 }
18947 op1 = gen_lowpart (V16QImode, op1);
18948 /* We will eventually emit movups based on insn attributes. */
18949 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
18950 if (orig_op0)
18951 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18952 }
18953 else if (TARGET_SSE2 && mode == V2DFmode)
18954 {
18955 rtx zero;
18956
18957 if (TARGET_AVX
18958 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
18959 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18960 || optimize_insn_for_size_p ())
18961 {
18962 /* We will eventually emit movups based on insn attributes. */
18963 emit_insn (gen_sse2_loadupd (op0, op1));
18964 return;
18965 }
18966
18967 /* When SSE registers are split into halves, we can avoid
18968 writing to the top half twice. */
18969 if (TARGET_SSE_SPLIT_REGS)
18970 {
18971 emit_clobber (op0);
18972 zero = op0;
18973 }
18974 else
18975 {
18976 /* ??? Not sure about the best option for the Intel chips.
18977 The following would seem to satisfy; the register is
18978 entirely cleared, breaking the dependency chain. We
18979 then store to the upper half, with a dependency depth
18980 of one. A rumor has it that Intel recommends two movsd
18981 followed by an unpacklpd, but this is unconfirmed. And
18982 given that the dependency depth of the unpacklpd would
18983 still be one, I'm not sure why this would be better. */
18984 zero = CONST0_RTX (V2DFmode);
18985 }
18986
18987 m = adjust_address (op1, DFmode, 0);
18988 emit_insn (gen_sse2_loadlpd (op0, zero, m));
18989 m = adjust_address (op1, DFmode, 8);
18990 emit_insn (gen_sse2_loadhpd (op0, op0, m));
18991 }
18992 else
18993 {
18994 rtx t;
18995
18996 if (TARGET_AVX
18997 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
18998 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18999 || optimize_insn_for_size_p ())
19000 {
19001 if (GET_MODE (op0) != V4SFmode)
19002 {
19003 orig_op0 = op0;
19004 op0 = gen_reg_rtx (V4SFmode);
19005 }
19006 op1 = gen_lowpart (V4SFmode, op1);
19007 emit_insn (gen_sse_loadups (op0, op1));
19008 if (orig_op0)
19009 emit_move_insn (orig_op0,
19010 gen_lowpart (GET_MODE (orig_op0), op0));
19011 return;
19012 }
19013
19014 if (mode != V4SFmode)
19015 t = gen_reg_rtx (V4SFmode);
19016 else
19017 t = op0;
19018
19019 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19020 emit_move_insn (t, CONST0_RTX (V4SFmode));
19021 else
19022 emit_clobber (t);
19023
19024 m = adjust_address (op1, V2SFmode, 0);
19025 emit_insn (gen_sse_loadlps (t, t, m));
19026 m = adjust_address (op1, V2SFmode, 8);
19027 emit_insn (gen_sse_loadhps (t, t, m));
19028 if (mode != V4SFmode)
19029 emit_move_insn (op0, gen_lowpart (mode, t));
19030 }
19031 }
19032 else if (MEM_P (op0))
19033 {
19034 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19035 {
19036 op0 = gen_lowpart (V16QImode, op0);
19037 op1 = gen_lowpart (V16QImode, op1);
19038 /* We will eventually emit movups based on insn attributes. */
19039 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19040 }
19041 else if (TARGET_SSE2 && mode == V2DFmode)
19042 {
19043 if (TARGET_AVX
19044 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19045 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19046 || optimize_insn_for_size_p ())
19047 /* We will eventually emit movups based on insn attributes. */
19048 emit_insn (gen_sse2_storeupd (op0, op1));
19049 else
19050 {
19051 m = adjust_address (op0, DFmode, 0);
19052 emit_insn (gen_sse2_storelpd (m, op1));
19053 m = adjust_address (op0, DFmode, 8);
19054 emit_insn (gen_sse2_storehpd (m, op1));
19055 }
19056 }
19057 else
19058 {
19059 if (mode != V4SFmode)
19060 op1 = gen_lowpart (V4SFmode, op1);
19061
19062 if (TARGET_AVX
19063 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19064 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19065 || optimize_insn_for_size_p ())
19066 {
19067 op0 = gen_lowpart (V4SFmode, op0);
19068 emit_insn (gen_sse_storeups (op0, op1));
19069 }
19070 else
19071 {
19072 m = adjust_address (op0, V2SFmode, 0);
19073 emit_insn (gen_sse_storelps (m, op1));
19074 m = adjust_address (op0, V2SFmode, 8);
19075 emit_insn (gen_sse_storehps (m, op1));
19076 }
19077 }
19078 }
19079 else
19080 gcc_unreachable ();
19081 }
19082
19083 /* Helper function of ix86_fixup_binary_operands to canonicalize
19084 operand order. Returns true if the operands should be swapped. */
19085
19086 static bool
19087 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19088 rtx operands[])
19089 {
19090 rtx dst = operands[0];
19091 rtx src1 = operands[1];
19092 rtx src2 = operands[2];
19093
19094 /* If the operation is not commutative, we can't do anything. */
19095 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19096 return false;
19097
19098 /* Highest priority is that src1 should match dst. */
19099 if (rtx_equal_p (dst, src1))
19100 return false;
19101 if (rtx_equal_p (dst, src2))
19102 return true;
19103
19104 /* Next highest priority is that immediate constants come second. */
19105 if (immediate_operand (src2, mode))
19106 return false;
19107 if (immediate_operand (src1, mode))
19108 return true;
19109
19110 /* Lowest priority is that memory references should come second. */
19111 if (MEM_P (src2))
19112 return false;
19113 if (MEM_P (src1))
19114 return true;
19115
19116 return false;
19117 }
19118
19119
19120 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19121 destination to use for the operation. If different from the true
19122 destination in operands[0], a copy operation will be required. */
19123
19124 rtx
19125 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19126 rtx operands[])
19127 {
19128 rtx dst = operands[0];
19129 rtx src1 = operands[1];
19130 rtx src2 = operands[2];
19131
19132 /* Canonicalize operand order. */
19133 if (ix86_swap_binary_operands_p (code, mode, operands))
19134 {
19135 /* It is invalid to swap operands of different modes. */
19136 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19137
19138 std::swap (src1, src2);
19139 }
19140
19141 /* Both source operands cannot be in memory. */
19142 if (MEM_P (src1) && MEM_P (src2))
19143 {
19144 /* Optimization: Only read from memory once. */
19145 if (rtx_equal_p (src1, src2))
19146 {
19147 src2 = force_reg (mode, src2);
19148 src1 = src2;
19149 }
19150 else if (rtx_equal_p (dst, src1))
19151 src2 = force_reg (mode, src2);
19152 else
19153 src1 = force_reg (mode, src1);
19154 }
19155
19156 /* If the destination is memory, and we do not have matching source
19157 operands, do things in registers. */
19158 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19159 dst = gen_reg_rtx (mode);
19160
19161 /* Source 1 cannot be a constant. */
19162 if (CONSTANT_P (src1))
19163 src1 = force_reg (mode, src1);
19164
19165 /* Source 1 cannot be a non-matching memory. */
19166 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19167 src1 = force_reg (mode, src1);
19168
19169 /* Improve address combine. */
19170 if (code == PLUS
19171 && GET_MODE_CLASS (mode) == MODE_INT
19172 && MEM_P (src2))
19173 src2 = force_reg (mode, src2);
19174
19175 operands[1] = src1;
19176 operands[2] = src2;
19177 return dst;
19178 }
19179
19180 /* Similarly, but assume that the destination has already been
19181 set up properly. */
19182
19183 void
19184 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19185 machine_mode mode, rtx operands[])
19186 {
19187 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19188 gcc_assert (dst == operands[0]);
19189 }
19190
19191 /* Attempt to expand a binary operator. Make the expansion closer to the
19192 actual machine, then just general_operand, which will allow 3 separate
19193 memory references (one output, two input) in a single insn. */
19194
19195 void
19196 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19197 rtx operands[])
19198 {
19199 rtx src1, src2, dst, op, clob;
19200
19201 dst = ix86_fixup_binary_operands (code, mode, operands);
19202 src1 = operands[1];
19203 src2 = operands[2];
19204
19205 /* Emit the instruction. */
19206
19207 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19208
19209 if (reload_completed
19210 && code == PLUS
19211 && !rtx_equal_p (dst, src1))
19212 {
19213 /* This is going to be an LEA; avoid splitting it later. */
19214 emit_insn (op);
19215 }
19216 else
19217 {
19218 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19219 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19220 }
19221
19222 /* Fix up the destination if needed. */
19223 if (dst != operands[0])
19224 emit_move_insn (operands[0], dst);
19225 }
19226
19227 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19228 the given OPERANDS. */
19229
19230 void
19231 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19232 rtx operands[])
19233 {
19234 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19235 if (SUBREG_P (operands[1]))
19236 {
19237 op1 = operands[1];
19238 op2 = operands[2];
19239 }
19240 else if (SUBREG_P (operands[2]))
19241 {
19242 op1 = operands[2];
19243 op2 = operands[1];
19244 }
19245 /* Optimize (__m128i) d | (__m128i) e and similar code
19246 when d and e are float vectors into float vector logical
19247 insn. In C/C++ without using intrinsics there is no other way
19248 to express vector logical operation on float vectors than
19249 to cast them temporarily to integer vectors. */
19250 if (op1
19251 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19252 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19253 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19254 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19255 && SUBREG_BYTE (op1) == 0
19256 && (GET_CODE (op2) == CONST_VECTOR
19257 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19258 && SUBREG_BYTE (op2) == 0))
19259 && can_create_pseudo_p ())
19260 {
19261 rtx dst;
19262 switch (GET_MODE (SUBREG_REG (op1)))
19263 {
19264 case V4SFmode:
19265 case V8SFmode:
19266 case V16SFmode:
19267 case V2DFmode:
19268 case V4DFmode:
19269 case V8DFmode:
19270 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19271 if (GET_CODE (op2) == CONST_VECTOR)
19272 {
19273 op2 = gen_lowpart (GET_MODE (dst), op2);
19274 op2 = force_reg (GET_MODE (dst), op2);
19275 }
19276 else
19277 {
19278 op1 = operands[1];
19279 op2 = SUBREG_REG (operands[2]);
19280 if (!nonimmediate_operand (op2, GET_MODE (dst)))
19281 op2 = force_reg (GET_MODE (dst), op2);
19282 }
19283 op1 = SUBREG_REG (op1);
19284 if (!nonimmediate_operand (op1, GET_MODE (dst)))
19285 op1 = force_reg (GET_MODE (dst), op1);
19286 emit_insn (gen_rtx_SET (dst,
19287 gen_rtx_fmt_ee (code, GET_MODE (dst),
19288 op1, op2)));
19289 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19290 return;
19291 default:
19292 break;
19293 }
19294 }
19295 if (!nonimmediate_operand (operands[1], mode))
19296 operands[1] = force_reg (mode, operands[1]);
19297 if (!nonimmediate_operand (operands[2], mode))
19298 operands[2] = force_reg (mode, operands[2]);
19299 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19300 emit_insn (gen_rtx_SET (operands[0],
19301 gen_rtx_fmt_ee (code, mode, operands[1],
19302 operands[2])));
19303 }
19304
19305 /* Return TRUE or FALSE depending on whether the binary operator meets the
19306 appropriate constraints. */
19307
19308 bool
19309 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19310 rtx operands[3])
19311 {
19312 rtx dst = operands[0];
19313 rtx src1 = operands[1];
19314 rtx src2 = operands[2];
19315
19316 /* Both source operands cannot be in memory. */
19317 if (MEM_P (src1) && MEM_P (src2))
19318 return false;
19319
19320 /* Canonicalize operand order for commutative operators. */
19321 if (ix86_swap_binary_operands_p (code, mode, operands))
19322 std::swap (src1, src2);
19323
19324 /* If the destination is memory, we must have a matching source operand. */
19325 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19326 return false;
19327
19328 /* Source 1 cannot be a constant. */
19329 if (CONSTANT_P (src1))
19330 return false;
19331
19332 /* Source 1 cannot be a non-matching memory. */
19333 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19334 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19335 return (code == AND
19336 && (mode == HImode
19337 || mode == SImode
19338 || (TARGET_64BIT && mode == DImode))
19339 && satisfies_constraint_L (src2));
19340
19341 return true;
19342 }
19343
19344 /* Attempt to expand a unary operator. Make the expansion closer to the
19345 actual machine, then just general_operand, which will allow 2 separate
19346 memory references (one output, one input) in a single insn. */
19347
19348 void
19349 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19350 rtx operands[])
19351 {
19352 bool matching_memory = false;
19353 rtx src, dst, op, clob;
19354
19355 dst = operands[0];
19356 src = operands[1];
19357
19358 /* If the destination is memory, and we do not have matching source
19359 operands, do things in registers. */
19360 if (MEM_P (dst))
19361 {
19362 if (rtx_equal_p (dst, src))
19363 matching_memory = true;
19364 else
19365 dst = gen_reg_rtx (mode);
19366 }
19367
19368 /* When source operand is memory, destination must match. */
19369 if (MEM_P (src) && !matching_memory)
19370 src = force_reg (mode, src);
19371
19372 /* Emit the instruction. */
19373
19374 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19375
19376 if (code == NOT)
19377 emit_insn (op);
19378 else
19379 {
19380 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19381 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19382 }
19383
19384 /* Fix up the destination if needed. */
19385 if (dst != operands[0])
19386 emit_move_insn (operands[0], dst);
19387 }
19388
19389 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19390 divisor are within the range [0-255]. */
19391
19392 void
19393 ix86_split_idivmod (machine_mode mode, rtx operands[],
19394 bool signed_p)
19395 {
19396 rtx_code_label *end_label, *qimode_label;
19397 rtx insn, div, mod;
19398 rtx scratch, tmp0, tmp1, tmp2;
19399 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19400 rtx (*gen_zero_extend) (rtx, rtx);
19401 rtx (*gen_test_ccno_1) (rtx, rtx);
19402
19403 switch (mode)
19404 {
19405 case SImode:
19406 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19407 gen_test_ccno_1 = gen_testsi_ccno_1;
19408 gen_zero_extend = gen_zero_extendqisi2;
19409 break;
19410 case DImode:
19411 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19412 gen_test_ccno_1 = gen_testdi_ccno_1;
19413 gen_zero_extend = gen_zero_extendqidi2;
19414 break;
19415 default:
19416 gcc_unreachable ();
19417 }
19418
19419 end_label = gen_label_rtx ();
19420 qimode_label = gen_label_rtx ();
19421
19422 scratch = gen_reg_rtx (mode);
19423
19424 /* Use 8bit unsigned divimod if dividend and divisor are within
19425 the range [0-255]. */
19426 emit_move_insn (scratch, operands[2]);
19427 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19428 scratch, 1, OPTAB_DIRECT);
19429 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19430 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19431 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19432 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19433 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19434 pc_rtx);
19435 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19436 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19437 JUMP_LABEL (insn) = qimode_label;
19438
19439 /* Generate original signed/unsigned divimod. */
19440 div = gen_divmod4_1 (operands[0], operands[1],
19441 operands[2], operands[3]);
19442 emit_insn (div);
19443
19444 /* Branch to the end. */
19445 emit_jump_insn (gen_jump (end_label));
19446 emit_barrier ();
19447
19448 /* Generate 8bit unsigned divide. */
19449 emit_label (qimode_label);
19450 /* Don't use operands[0] for result of 8bit divide since not all
19451 registers support QImode ZERO_EXTRACT. */
19452 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19453 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19454 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19455 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19456
19457 if (signed_p)
19458 {
19459 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19460 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19461 }
19462 else
19463 {
19464 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19465 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19466 }
19467
19468 /* Extract remainder from AH. */
19469 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19470 if (REG_P (operands[1]))
19471 insn = emit_move_insn (operands[1], tmp1);
19472 else
19473 {
19474 /* Need a new scratch register since the old one has result
19475 of 8bit divide. */
19476 scratch = gen_reg_rtx (mode);
19477 emit_move_insn (scratch, tmp1);
19478 insn = emit_move_insn (operands[1], scratch);
19479 }
19480 set_unique_reg_note (insn, REG_EQUAL, mod);
19481
19482 /* Zero extend quotient from AL. */
19483 tmp1 = gen_lowpart (QImode, tmp0);
19484 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19485 set_unique_reg_note (insn, REG_EQUAL, div);
19486
19487 emit_label (end_label);
19488 }
19489
19490 #define LEA_MAX_STALL (3)
19491 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19492
19493 /* Increase given DISTANCE in half-cycles according to
19494 dependencies between PREV and NEXT instructions.
19495 Add 1 half-cycle if there is no dependency and
19496 go to next cycle if there is some dependecy. */
19497
19498 static unsigned int
19499 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19500 {
19501 df_ref def, use;
19502
19503 if (!prev || !next)
19504 return distance + (distance & 1) + 2;
19505
19506 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19507 return distance + 1;
19508
19509 FOR_EACH_INSN_USE (use, next)
19510 FOR_EACH_INSN_DEF (def, prev)
19511 if (!DF_REF_IS_ARTIFICIAL (def)
19512 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19513 return distance + (distance & 1) + 2;
19514
19515 return distance + 1;
19516 }
19517
19518 /* Function checks if instruction INSN defines register number
19519 REGNO1 or REGNO2. */
19520
19521 static bool
19522 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19523 rtx_insn *insn)
19524 {
19525 df_ref def;
19526
19527 FOR_EACH_INSN_DEF (def, insn)
19528 if (DF_REF_REG_DEF_P (def)
19529 && !DF_REF_IS_ARTIFICIAL (def)
19530 && (regno1 == DF_REF_REGNO (def)
19531 || regno2 == DF_REF_REGNO (def)))
19532 return true;
19533
19534 return false;
19535 }
19536
19537 /* Function checks if instruction INSN uses register number
19538 REGNO as a part of address expression. */
19539
19540 static bool
19541 insn_uses_reg_mem (unsigned int regno, rtx insn)
19542 {
19543 df_ref use;
19544
19545 FOR_EACH_INSN_USE (use, insn)
19546 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19547 return true;
19548
19549 return false;
19550 }
19551
19552 /* Search backward for non-agu definition of register number REGNO1
19553 or register number REGNO2 in basic block starting from instruction
19554 START up to head of basic block or instruction INSN.
19555
19556 Function puts true value into *FOUND var if definition was found
19557 and false otherwise.
19558
19559 Distance in half-cycles between START and found instruction or head
19560 of BB is added to DISTANCE and returned. */
19561
19562 static int
19563 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19564 rtx_insn *insn, int distance,
19565 rtx_insn *start, bool *found)
19566 {
19567 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19568 rtx_insn *prev = start;
19569 rtx_insn *next = NULL;
19570
19571 *found = false;
19572
19573 while (prev
19574 && prev != insn
19575 && distance < LEA_SEARCH_THRESHOLD)
19576 {
19577 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19578 {
19579 distance = increase_distance (prev, next, distance);
19580 if (insn_defines_reg (regno1, regno2, prev))
19581 {
19582 if (recog_memoized (prev) < 0
19583 || get_attr_type (prev) != TYPE_LEA)
19584 {
19585 *found = true;
19586 return distance;
19587 }
19588 }
19589
19590 next = prev;
19591 }
19592 if (prev == BB_HEAD (bb))
19593 break;
19594
19595 prev = PREV_INSN (prev);
19596 }
19597
19598 return distance;
19599 }
19600
19601 /* Search backward for non-agu definition of register number REGNO1
19602 or register number REGNO2 in INSN's basic block until
19603 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19604 2. Reach neighbour BBs boundary, or
19605 3. Reach agu definition.
19606 Returns the distance between the non-agu definition point and INSN.
19607 If no definition point, returns -1. */
19608
19609 static int
19610 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19611 rtx_insn *insn)
19612 {
19613 basic_block bb = BLOCK_FOR_INSN (insn);
19614 int distance = 0;
19615 bool found = false;
19616
19617 if (insn != BB_HEAD (bb))
19618 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19619 distance, PREV_INSN (insn),
19620 &found);
19621
19622 if (!found && distance < LEA_SEARCH_THRESHOLD)
19623 {
19624 edge e;
19625 edge_iterator ei;
19626 bool simple_loop = false;
19627
19628 FOR_EACH_EDGE (e, ei, bb->preds)
19629 if (e->src == bb)
19630 {
19631 simple_loop = true;
19632 break;
19633 }
19634
19635 if (simple_loop)
19636 distance = distance_non_agu_define_in_bb (regno1, regno2,
19637 insn, distance,
19638 BB_END (bb), &found);
19639 else
19640 {
19641 int shortest_dist = -1;
19642 bool found_in_bb = false;
19643
19644 FOR_EACH_EDGE (e, ei, bb->preds)
19645 {
19646 int bb_dist
19647 = distance_non_agu_define_in_bb (regno1, regno2,
19648 insn, distance,
19649 BB_END (e->src),
19650 &found_in_bb);
19651 if (found_in_bb)
19652 {
19653 if (shortest_dist < 0)
19654 shortest_dist = bb_dist;
19655 else if (bb_dist > 0)
19656 shortest_dist = MIN (bb_dist, shortest_dist);
19657
19658 found = true;
19659 }
19660 }
19661
19662 distance = shortest_dist;
19663 }
19664 }
19665
19666 /* get_attr_type may modify recog data. We want to make sure
19667 that recog data is valid for instruction INSN, on which
19668 distance_non_agu_define is called. INSN is unchanged here. */
19669 extract_insn_cached (insn);
19670
19671 if (!found)
19672 return -1;
19673
19674 return distance >> 1;
19675 }
19676
19677 /* Return the distance in half-cycles between INSN and the next
19678 insn that uses register number REGNO in memory address added
19679 to DISTANCE. Return -1 if REGNO0 is set.
19680
19681 Put true value into *FOUND if register usage was found and
19682 false otherwise.
19683 Put true value into *REDEFINED if register redefinition was
19684 found and false otherwise. */
19685
19686 static int
19687 distance_agu_use_in_bb (unsigned int regno,
19688 rtx_insn *insn, int distance, rtx_insn *start,
19689 bool *found, bool *redefined)
19690 {
19691 basic_block bb = NULL;
19692 rtx_insn *next = start;
19693 rtx_insn *prev = NULL;
19694
19695 *found = false;
19696 *redefined = false;
19697
19698 if (start != NULL_RTX)
19699 {
19700 bb = BLOCK_FOR_INSN (start);
19701 if (start != BB_HEAD (bb))
19702 /* If insn and start belong to the same bb, set prev to insn,
19703 so the call to increase_distance will increase the distance
19704 between insns by 1. */
19705 prev = insn;
19706 }
19707
19708 while (next
19709 && next != insn
19710 && distance < LEA_SEARCH_THRESHOLD)
19711 {
19712 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19713 {
19714 distance = increase_distance(prev, next, distance);
19715 if (insn_uses_reg_mem (regno, next))
19716 {
19717 /* Return DISTANCE if OP0 is used in memory
19718 address in NEXT. */
19719 *found = true;
19720 return distance;
19721 }
19722
19723 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19724 {
19725 /* Return -1 if OP0 is set in NEXT. */
19726 *redefined = true;
19727 return -1;
19728 }
19729
19730 prev = next;
19731 }
19732
19733 if (next == BB_END (bb))
19734 break;
19735
19736 next = NEXT_INSN (next);
19737 }
19738
19739 return distance;
19740 }
19741
19742 /* Return the distance between INSN and the next insn that uses
19743 register number REGNO0 in memory address. Return -1 if no such
19744 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19745
19746 static int
19747 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19748 {
19749 basic_block bb = BLOCK_FOR_INSN (insn);
19750 int distance = 0;
19751 bool found = false;
19752 bool redefined = false;
19753
19754 if (insn != BB_END (bb))
19755 distance = distance_agu_use_in_bb (regno0, insn, distance,
19756 NEXT_INSN (insn),
19757 &found, &redefined);
19758
19759 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19760 {
19761 edge e;
19762 edge_iterator ei;
19763 bool simple_loop = false;
19764
19765 FOR_EACH_EDGE (e, ei, bb->succs)
19766 if (e->dest == bb)
19767 {
19768 simple_loop = true;
19769 break;
19770 }
19771
19772 if (simple_loop)
19773 distance = distance_agu_use_in_bb (regno0, insn,
19774 distance, BB_HEAD (bb),
19775 &found, &redefined);
19776 else
19777 {
19778 int shortest_dist = -1;
19779 bool found_in_bb = false;
19780 bool redefined_in_bb = false;
19781
19782 FOR_EACH_EDGE (e, ei, bb->succs)
19783 {
19784 int bb_dist
19785 = distance_agu_use_in_bb (regno0, insn,
19786 distance, BB_HEAD (e->dest),
19787 &found_in_bb, &redefined_in_bb);
19788 if (found_in_bb)
19789 {
19790 if (shortest_dist < 0)
19791 shortest_dist = bb_dist;
19792 else if (bb_dist > 0)
19793 shortest_dist = MIN (bb_dist, shortest_dist);
19794
19795 found = true;
19796 }
19797 }
19798
19799 distance = shortest_dist;
19800 }
19801 }
19802
19803 if (!found || redefined)
19804 return -1;
19805
19806 return distance >> 1;
19807 }
19808
19809 /* Define this macro to tune LEA priority vs ADD, it take effect when
19810 there is a dilemma of choicing LEA or ADD
19811 Negative value: ADD is more preferred than LEA
19812 Zero: Netrual
19813 Positive value: LEA is more preferred than ADD*/
19814 #define IX86_LEA_PRIORITY 0
19815
19816 /* Return true if usage of lea INSN has performance advantage
19817 over a sequence of instructions. Instructions sequence has
19818 SPLIT_COST cycles higher latency than lea latency. */
19819
19820 static bool
19821 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19822 unsigned int regno2, int split_cost, bool has_scale)
19823 {
19824 int dist_define, dist_use;
19825
19826 /* For Silvermont if using a 2-source or 3-source LEA for
19827 non-destructive destination purposes, or due to wanting
19828 ability to use SCALE, the use of LEA is justified. */
19829 if (TARGET_SILVERMONT || TARGET_INTEL)
19830 {
19831 if (has_scale)
19832 return true;
19833 if (split_cost < 1)
19834 return false;
19835 if (regno0 == regno1 || regno0 == regno2)
19836 return false;
19837 return true;
19838 }
19839
19840 dist_define = distance_non_agu_define (regno1, regno2, insn);
19841 dist_use = distance_agu_use (regno0, insn);
19842
19843 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19844 {
19845 /* If there is no non AGU operand definition, no AGU
19846 operand usage and split cost is 0 then both lea
19847 and non lea variants have same priority. Currently
19848 we prefer lea for 64 bit code and non lea on 32 bit
19849 code. */
19850 if (dist_use < 0 && split_cost == 0)
19851 return TARGET_64BIT || IX86_LEA_PRIORITY;
19852 else
19853 return true;
19854 }
19855
19856 /* With longer definitions distance lea is more preferable.
19857 Here we change it to take into account splitting cost and
19858 lea priority. */
19859 dist_define += split_cost + IX86_LEA_PRIORITY;
19860
19861 /* If there is no use in memory addess then we just check
19862 that split cost exceeds AGU stall. */
19863 if (dist_use < 0)
19864 return dist_define > LEA_MAX_STALL;
19865
19866 /* If this insn has both backward non-agu dependence and forward
19867 agu dependence, the one with short distance takes effect. */
19868 return dist_define >= dist_use;
19869 }
19870
19871 /* Return true if it is legal to clobber flags by INSN and
19872 false otherwise. */
19873
19874 static bool
19875 ix86_ok_to_clobber_flags (rtx_insn *insn)
19876 {
19877 basic_block bb = BLOCK_FOR_INSN (insn);
19878 df_ref use;
19879 bitmap live;
19880
19881 while (insn)
19882 {
19883 if (NONDEBUG_INSN_P (insn))
19884 {
19885 FOR_EACH_INSN_USE (use, insn)
19886 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
19887 return false;
19888
19889 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
19890 return true;
19891 }
19892
19893 if (insn == BB_END (bb))
19894 break;
19895
19896 insn = NEXT_INSN (insn);
19897 }
19898
19899 live = df_get_live_out(bb);
19900 return !REGNO_REG_SET_P (live, FLAGS_REG);
19901 }
19902
19903 /* Return true if we need to split op0 = op1 + op2 into a sequence of
19904 move and add to avoid AGU stalls. */
19905
19906 bool
19907 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
19908 {
19909 unsigned int regno0, regno1, regno2;
19910
19911 /* Check if we need to optimize. */
19912 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19913 return false;
19914
19915 /* Check it is correct to split here. */
19916 if (!ix86_ok_to_clobber_flags(insn))
19917 return false;
19918
19919 regno0 = true_regnum (operands[0]);
19920 regno1 = true_regnum (operands[1]);
19921 regno2 = true_regnum (operands[2]);
19922
19923 /* We need to split only adds with non destructive
19924 destination operand. */
19925 if (regno0 == regno1 || regno0 == regno2)
19926 return false;
19927 else
19928 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
19929 }
19930
19931 /* Return true if we should emit lea instruction instead of mov
19932 instruction. */
19933
19934 bool
19935 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
19936 {
19937 unsigned int regno0, regno1;
19938
19939 /* Check if we need to optimize. */
19940 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19941 return false;
19942
19943 /* Use lea for reg to reg moves only. */
19944 if (!REG_P (operands[0]) || !REG_P (operands[1]))
19945 return false;
19946
19947 regno0 = true_regnum (operands[0]);
19948 regno1 = true_regnum (operands[1]);
19949
19950 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
19951 }
19952
19953 /* Return true if we need to split lea into a sequence of
19954 instructions to avoid AGU stalls. */
19955
19956 bool
19957 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
19958 {
19959 unsigned int regno0, regno1, regno2;
19960 int split_cost;
19961 struct ix86_address parts;
19962 int ok;
19963
19964 /* Check we need to optimize. */
19965 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
19966 return false;
19967
19968 /* The "at least two components" test below might not catch simple
19969 move or zero extension insns if parts.base is non-NULL and parts.disp
19970 is const0_rtx as the only components in the address, e.g. if the
19971 register is %rbp or %r13. As this test is much cheaper and moves or
19972 zero extensions are the common case, do this check first. */
19973 if (REG_P (operands[1])
19974 || (SImode_address_operand (operands[1], VOIDmode)
19975 && REG_P (XEXP (operands[1], 0))))
19976 return false;
19977
19978 /* Check if it is OK to split here. */
19979 if (!ix86_ok_to_clobber_flags (insn))
19980 return false;
19981
19982 ok = ix86_decompose_address (operands[1], &parts);
19983 gcc_assert (ok);
19984
19985 /* There should be at least two components in the address. */
19986 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
19987 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
19988 return false;
19989
19990 /* We should not split into add if non legitimate pic
19991 operand is used as displacement. */
19992 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
19993 return false;
19994
19995 regno0 = true_regnum (operands[0]) ;
19996 regno1 = INVALID_REGNUM;
19997 regno2 = INVALID_REGNUM;
19998
19999 if (parts.base)
20000 regno1 = true_regnum (parts.base);
20001 if (parts.index)
20002 regno2 = true_regnum (parts.index);
20003
20004 split_cost = 0;
20005
20006 /* Compute how many cycles we will add to execution time
20007 if split lea into a sequence of instructions. */
20008 if (parts.base || parts.index)
20009 {
20010 /* Have to use mov instruction if non desctructive
20011 destination form is used. */
20012 if (regno1 != regno0 && regno2 != regno0)
20013 split_cost += 1;
20014
20015 /* Have to add index to base if both exist. */
20016 if (parts.base && parts.index)
20017 split_cost += 1;
20018
20019 /* Have to use shift and adds if scale is 2 or greater. */
20020 if (parts.scale > 1)
20021 {
20022 if (regno0 != regno1)
20023 split_cost += 1;
20024 else if (regno2 == regno0)
20025 split_cost += 4;
20026 else
20027 split_cost += parts.scale;
20028 }
20029
20030 /* Have to use add instruction with immediate if
20031 disp is non zero. */
20032 if (parts.disp && parts.disp != const0_rtx)
20033 split_cost += 1;
20034
20035 /* Subtract the price of lea. */
20036 split_cost -= 1;
20037 }
20038
20039 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20040 parts.scale > 1);
20041 }
20042
20043 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20044 matches destination. RTX includes clobber of FLAGS_REG. */
20045
20046 static void
20047 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20048 rtx dst, rtx src)
20049 {
20050 rtx op, clob;
20051
20052 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20053 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20054
20055 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20056 }
20057
20058 /* Return true if regno1 def is nearest to the insn. */
20059
20060 static bool
20061 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20062 {
20063 rtx_insn *prev = insn;
20064 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20065
20066 if (insn == start)
20067 return false;
20068 while (prev && prev != start)
20069 {
20070 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20071 {
20072 prev = PREV_INSN (prev);
20073 continue;
20074 }
20075 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20076 return true;
20077 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20078 return false;
20079 prev = PREV_INSN (prev);
20080 }
20081
20082 /* None of the regs is defined in the bb. */
20083 return false;
20084 }
20085
20086 /* Split lea instructions into a sequence of instructions
20087 which are executed on ALU to avoid AGU stalls.
20088 It is assumed that it is allowed to clobber flags register
20089 at lea position. */
20090
20091 void
20092 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20093 {
20094 unsigned int regno0, regno1, regno2;
20095 struct ix86_address parts;
20096 rtx target, tmp;
20097 int ok, adds;
20098
20099 ok = ix86_decompose_address (operands[1], &parts);
20100 gcc_assert (ok);
20101
20102 target = gen_lowpart (mode, operands[0]);
20103
20104 regno0 = true_regnum (target);
20105 regno1 = INVALID_REGNUM;
20106 regno2 = INVALID_REGNUM;
20107
20108 if (parts.base)
20109 {
20110 parts.base = gen_lowpart (mode, parts.base);
20111 regno1 = true_regnum (parts.base);
20112 }
20113
20114 if (parts.index)
20115 {
20116 parts.index = gen_lowpart (mode, parts.index);
20117 regno2 = true_regnum (parts.index);
20118 }
20119
20120 if (parts.disp)
20121 parts.disp = gen_lowpart (mode, parts.disp);
20122
20123 if (parts.scale > 1)
20124 {
20125 /* Case r1 = r1 + ... */
20126 if (regno1 == regno0)
20127 {
20128 /* If we have a case r1 = r1 + C * r2 then we
20129 should use multiplication which is very
20130 expensive. Assume cost model is wrong if we
20131 have such case here. */
20132 gcc_assert (regno2 != regno0);
20133
20134 for (adds = parts.scale; adds > 0; adds--)
20135 ix86_emit_binop (PLUS, mode, target, parts.index);
20136 }
20137 else
20138 {
20139 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20140 if (regno0 != regno2)
20141 emit_insn (gen_rtx_SET (target, parts.index));
20142
20143 /* Use shift for scaling. */
20144 ix86_emit_binop (ASHIFT, mode, target,
20145 GEN_INT (exact_log2 (parts.scale)));
20146
20147 if (parts.base)
20148 ix86_emit_binop (PLUS, mode, target, parts.base);
20149
20150 if (parts.disp && parts.disp != const0_rtx)
20151 ix86_emit_binop (PLUS, mode, target, parts.disp);
20152 }
20153 }
20154 else if (!parts.base && !parts.index)
20155 {
20156 gcc_assert(parts.disp);
20157 emit_insn (gen_rtx_SET (target, parts.disp));
20158 }
20159 else
20160 {
20161 if (!parts.base)
20162 {
20163 if (regno0 != regno2)
20164 emit_insn (gen_rtx_SET (target, parts.index));
20165 }
20166 else if (!parts.index)
20167 {
20168 if (regno0 != regno1)
20169 emit_insn (gen_rtx_SET (target, parts.base));
20170 }
20171 else
20172 {
20173 if (regno0 == regno1)
20174 tmp = parts.index;
20175 else if (regno0 == regno2)
20176 tmp = parts.base;
20177 else
20178 {
20179 rtx tmp1;
20180
20181 /* Find better operand for SET instruction, depending
20182 on which definition is farther from the insn. */
20183 if (find_nearest_reg_def (insn, regno1, regno2))
20184 tmp = parts.index, tmp1 = parts.base;
20185 else
20186 tmp = parts.base, tmp1 = parts.index;
20187
20188 emit_insn (gen_rtx_SET (target, tmp));
20189
20190 if (parts.disp && parts.disp != const0_rtx)
20191 ix86_emit_binop (PLUS, mode, target, parts.disp);
20192
20193 ix86_emit_binop (PLUS, mode, target, tmp1);
20194 return;
20195 }
20196
20197 ix86_emit_binop (PLUS, mode, target, tmp);
20198 }
20199
20200 if (parts.disp && parts.disp != const0_rtx)
20201 ix86_emit_binop (PLUS, mode, target, parts.disp);
20202 }
20203 }
20204
20205 /* Return true if it is ok to optimize an ADD operation to LEA
20206 operation to avoid flag register consumation. For most processors,
20207 ADD is faster than LEA. For the processors like BONNELL, if the
20208 destination register of LEA holds an actual address which will be
20209 used soon, LEA is better and otherwise ADD is better. */
20210
20211 bool
20212 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20213 {
20214 unsigned int regno0 = true_regnum (operands[0]);
20215 unsigned int regno1 = true_regnum (operands[1]);
20216 unsigned int regno2 = true_regnum (operands[2]);
20217
20218 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20219 if (regno0 != regno1 && regno0 != regno2)
20220 return true;
20221
20222 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20223 return false;
20224
20225 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20226 }
20227
20228 /* Return true if destination reg of SET_BODY is shift count of
20229 USE_BODY. */
20230
20231 static bool
20232 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20233 {
20234 rtx set_dest;
20235 rtx shift_rtx;
20236 int i;
20237
20238 /* Retrieve destination of SET_BODY. */
20239 switch (GET_CODE (set_body))
20240 {
20241 case SET:
20242 set_dest = SET_DEST (set_body);
20243 if (!set_dest || !REG_P (set_dest))
20244 return false;
20245 break;
20246 case PARALLEL:
20247 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20248 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20249 use_body))
20250 return true;
20251 default:
20252 return false;
20253 break;
20254 }
20255
20256 /* Retrieve shift count of USE_BODY. */
20257 switch (GET_CODE (use_body))
20258 {
20259 case SET:
20260 shift_rtx = XEXP (use_body, 1);
20261 break;
20262 case PARALLEL:
20263 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20264 if (ix86_dep_by_shift_count_body (set_body,
20265 XVECEXP (use_body, 0, i)))
20266 return true;
20267 default:
20268 return false;
20269 break;
20270 }
20271
20272 if (shift_rtx
20273 && (GET_CODE (shift_rtx) == ASHIFT
20274 || GET_CODE (shift_rtx) == LSHIFTRT
20275 || GET_CODE (shift_rtx) == ASHIFTRT
20276 || GET_CODE (shift_rtx) == ROTATE
20277 || GET_CODE (shift_rtx) == ROTATERT))
20278 {
20279 rtx shift_count = XEXP (shift_rtx, 1);
20280
20281 /* Return true if shift count is dest of SET_BODY. */
20282 if (REG_P (shift_count))
20283 {
20284 /* Add check since it can be invoked before register
20285 allocation in pre-reload schedule. */
20286 if (reload_completed
20287 && true_regnum (set_dest) == true_regnum (shift_count))
20288 return true;
20289 else if (REGNO(set_dest) == REGNO(shift_count))
20290 return true;
20291 }
20292 }
20293
20294 return false;
20295 }
20296
20297 /* Return true if destination reg of SET_INSN is shift count of
20298 USE_INSN. */
20299
20300 bool
20301 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20302 {
20303 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20304 PATTERN (use_insn));
20305 }
20306
20307 /* Return TRUE or FALSE depending on whether the unary operator meets the
20308 appropriate constraints. */
20309
20310 bool
20311 ix86_unary_operator_ok (enum rtx_code,
20312 machine_mode,
20313 rtx operands[2])
20314 {
20315 /* If one of operands is memory, source and destination must match. */
20316 if ((MEM_P (operands[0])
20317 || MEM_P (operands[1]))
20318 && ! rtx_equal_p (operands[0], operands[1]))
20319 return false;
20320 return true;
20321 }
20322
20323 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20324 are ok, keeping in mind the possible movddup alternative. */
20325
20326 bool
20327 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20328 {
20329 if (MEM_P (operands[0]))
20330 return rtx_equal_p (operands[0], operands[1 + high]);
20331 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20332 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20333 return true;
20334 }
20335
20336 /* Post-reload splitter for converting an SF or DFmode value in an
20337 SSE register into an unsigned SImode. */
20338
20339 void
20340 ix86_split_convert_uns_si_sse (rtx operands[])
20341 {
20342 machine_mode vecmode;
20343 rtx value, large, zero_or_two31, input, two31, x;
20344
20345 large = operands[1];
20346 zero_or_two31 = operands[2];
20347 input = operands[3];
20348 two31 = operands[4];
20349 vecmode = GET_MODE (large);
20350 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20351
20352 /* Load up the value into the low element. We must ensure that the other
20353 elements are valid floats -- zero is the easiest such value. */
20354 if (MEM_P (input))
20355 {
20356 if (vecmode == V4SFmode)
20357 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20358 else
20359 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20360 }
20361 else
20362 {
20363 input = gen_rtx_REG (vecmode, REGNO (input));
20364 emit_move_insn (value, CONST0_RTX (vecmode));
20365 if (vecmode == V4SFmode)
20366 emit_insn (gen_sse_movss (value, value, input));
20367 else
20368 emit_insn (gen_sse2_movsd (value, value, input));
20369 }
20370
20371 emit_move_insn (large, two31);
20372 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20373
20374 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20375 emit_insn (gen_rtx_SET (large, x));
20376
20377 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20378 emit_insn (gen_rtx_SET (zero_or_two31, x));
20379
20380 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20381 emit_insn (gen_rtx_SET (value, x));
20382
20383 large = gen_rtx_REG (V4SImode, REGNO (large));
20384 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20385
20386 x = gen_rtx_REG (V4SImode, REGNO (value));
20387 if (vecmode == V4SFmode)
20388 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20389 else
20390 emit_insn (gen_sse2_cvttpd2dq (x, value));
20391 value = x;
20392
20393 emit_insn (gen_xorv4si3 (value, value, large));
20394 }
20395
20396 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20397 Expects the 64-bit DImode to be supplied in a pair of integral
20398 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20399 -mfpmath=sse, !optimize_size only. */
20400
20401 void
20402 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20403 {
20404 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20405 rtx int_xmm, fp_xmm;
20406 rtx biases, exponents;
20407 rtx x;
20408
20409 int_xmm = gen_reg_rtx (V4SImode);
20410 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20411 emit_insn (gen_movdi_to_sse (int_xmm, input));
20412 else if (TARGET_SSE_SPLIT_REGS)
20413 {
20414 emit_clobber (int_xmm);
20415 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20416 }
20417 else
20418 {
20419 x = gen_reg_rtx (V2DImode);
20420 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20421 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20422 }
20423
20424 x = gen_rtx_CONST_VECTOR (V4SImode,
20425 gen_rtvec (4, GEN_INT (0x43300000UL),
20426 GEN_INT (0x45300000UL),
20427 const0_rtx, const0_rtx));
20428 exponents = validize_mem (force_const_mem (V4SImode, x));
20429
20430 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20431 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20432
20433 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20434 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20435 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20436 (0x1.0p84 + double(fp_value_hi_xmm)).
20437 Note these exponents differ by 32. */
20438
20439 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20440
20441 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20442 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20443 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20444 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20445 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20446 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20447 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20448 biases = validize_mem (force_const_mem (V2DFmode, biases));
20449 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20450
20451 /* Add the upper and lower DFmode values together. */
20452 if (TARGET_SSE3)
20453 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20454 else
20455 {
20456 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20457 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20458 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20459 }
20460
20461 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20462 }
20463
20464 /* Not used, but eases macroization of patterns. */
20465 void
20466 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20467 {
20468 gcc_unreachable ();
20469 }
20470
20471 /* Convert an unsigned SImode value into a DFmode. Only currently used
20472 for SSE, but applicable anywhere. */
20473
20474 void
20475 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20476 {
20477 REAL_VALUE_TYPE TWO31r;
20478 rtx x, fp;
20479
20480 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20481 NULL, 1, OPTAB_DIRECT);
20482
20483 fp = gen_reg_rtx (DFmode);
20484 emit_insn (gen_floatsidf2 (fp, x));
20485
20486 real_ldexp (&TWO31r, &dconst1, 31);
20487 x = const_double_from_real_value (TWO31r, DFmode);
20488
20489 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20490 if (x != target)
20491 emit_move_insn (target, x);
20492 }
20493
20494 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20495 32-bit mode; otherwise we have a direct convert instruction. */
20496
20497 void
20498 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20499 {
20500 REAL_VALUE_TYPE TWO32r;
20501 rtx fp_lo, fp_hi, x;
20502
20503 fp_lo = gen_reg_rtx (DFmode);
20504 fp_hi = gen_reg_rtx (DFmode);
20505
20506 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20507
20508 real_ldexp (&TWO32r, &dconst1, 32);
20509 x = const_double_from_real_value (TWO32r, DFmode);
20510 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20511
20512 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20513
20514 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20515 0, OPTAB_DIRECT);
20516 if (x != target)
20517 emit_move_insn (target, x);
20518 }
20519
20520 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20521 For x86_32, -mfpmath=sse, !optimize_size only. */
20522 void
20523 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20524 {
20525 REAL_VALUE_TYPE ONE16r;
20526 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20527
20528 real_ldexp (&ONE16r, &dconst1, 16);
20529 x = const_double_from_real_value (ONE16r, SFmode);
20530 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20531 NULL, 0, OPTAB_DIRECT);
20532 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20533 NULL, 0, OPTAB_DIRECT);
20534 fp_hi = gen_reg_rtx (SFmode);
20535 fp_lo = gen_reg_rtx (SFmode);
20536 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20537 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20538 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20539 0, OPTAB_DIRECT);
20540 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20541 0, OPTAB_DIRECT);
20542 if (!rtx_equal_p (target, fp_hi))
20543 emit_move_insn (target, fp_hi);
20544 }
20545
20546 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20547 a vector of unsigned ints VAL to vector of floats TARGET. */
20548
20549 void
20550 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20551 {
20552 rtx tmp[8];
20553 REAL_VALUE_TYPE TWO16r;
20554 machine_mode intmode = GET_MODE (val);
20555 machine_mode fltmode = GET_MODE (target);
20556 rtx (*cvt) (rtx, rtx);
20557
20558 if (intmode == V4SImode)
20559 cvt = gen_floatv4siv4sf2;
20560 else
20561 cvt = gen_floatv8siv8sf2;
20562 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20563 tmp[0] = force_reg (intmode, tmp[0]);
20564 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20565 OPTAB_DIRECT);
20566 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20567 NULL_RTX, 1, OPTAB_DIRECT);
20568 tmp[3] = gen_reg_rtx (fltmode);
20569 emit_insn (cvt (tmp[3], tmp[1]));
20570 tmp[4] = gen_reg_rtx (fltmode);
20571 emit_insn (cvt (tmp[4], tmp[2]));
20572 real_ldexp (&TWO16r, &dconst1, 16);
20573 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20574 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20575 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20576 OPTAB_DIRECT);
20577 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20578 OPTAB_DIRECT);
20579 if (tmp[7] != target)
20580 emit_move_insn (target, tmp[7]);
20581 }
20582
20583 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20584 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20585 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20586 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20587
20588 rtx
20589 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20590 {
20591 REAL_VALUE_TYPE TWO31r;
20592 rtx two31r, tmp[4];
20593 machine_mode mode = GET_MODE (val);
20594 machine_mode scalarmode = GET_MODE_INNER (mode);
20595 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20596 rtx (*cmp) (rtx, rtx, rtx, rtx);
20597 int i;
20598
20599 for (i = 0; i < 3; i++)
20600 tmp[i] = gen_reg_rtx (mode);
20601 real_ldexp (&TWO31r, &dconst1, 31);
20602 two31r = const_double_from_real_value (TWO31r, scalarmode);
20603 two31r = ix86_build_const_vector (mode, 1, two31r);
20604 two31r = force_reg (mode, two31r);
20605 switch (mode)
20606 {
20607 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20608 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20609 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20610 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20611 default: gcc_unreachable ();
20612 }
20613 tmp[3] = gen_rtx_LE (mode, two31r, val);
20614 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20615 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20616 0, OPTAB_DIRECT);
20617 if (intmode == V4SImode || TARGET_AVX2)
20618 *xorp = expand_simple_binop (intmode, ASHIFT,
20619 gen_lowpart (intmode, tmp[0]),
20620 GEN_INT (31), NULL_RTX, 0,
20621 OPTAB_DIRECT);
20622 else
20623 {
20624 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20625 two31 = ix86_build_const_vector (intmode, 1, two31);
20626 *xorp = expand_simple_binop (intmode, AND,
20627 gen_lowpart (intmode, tmp[0]),
20628 two31, NULL_RTX, 0,
20629 OPTAB_DIRECT);
20630 }
20631 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20632 0, OPTAB_DIRECT);
20633 }
20634
20635 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20636 then replicate the value for all elements of the vector
20637 register. */
20638
20639 rtx
20640 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20641 {
20642 int i, n_elt;
20643 rtvec v;
20644 machine_mode scalar_mode;
20645
20646 switch (mode)
20647 {
20648 case V64QImode:
20649 case V32QImode:
20650 case V16QImode:
20651 case V32HImode:
20652 case V16HImode:
20653 case V8HImode:
20654 case V16SImode:
20655 case V8SImode:
20656 case V4SImode:
20657 case V8DImode:
20658 case V4DImode:
20659 case V2DImode:
20660 gcc_assert (vect);
20661 case V16SFmode:
20662 case V8SFmode:
20663 case V4SFmode:
20664 case V8DFmode:
20665 case V4DFmode:
20666 case V2DFmode:
20667 n_elt = GET_MODE_NUNITS (mode);
20668 v = rtvec_alloc (n_elt);
20669 scalar_mode = GET_MODE_INNER (mode);
20670
20671 RTVEC_ELT (v, 0) = value;
20672
20673 for (i = 1; i < n_elt; ++i)
20674 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20675
20676 return gen_rtx_CONST_VECTOR (mode, v);
20677
20678 default:
20679 gcc_unreachable ();
20680 }
20681 }
20682
20683 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20684 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20685 for an SSE register. If VECT is true, then replicate the mask for
20686 all elements of the vector register. If INVERT is true, then create
20687 a mask excluding the sign bit. */
20688
20689 rtx
20690 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20691 {
20692 machine_mode vec_mode, imode;
20693 wide_int w;
20694 rtx mask, v;
20695
20696 switch (mode)
20697 {
20698 case V16SImode:
20699 case V16SFmode:
20700 case V8SImode:
20701 case V4SImode:
20702 case V8SFmode:
20703 case V4SFmode:
20704 vec_mode = mode;
20705 imode = SImode;
20706 break;
20707
20708 case V8DImode:
20709 case V4DImode:
20710 case V2DImode:
20711 case V8DFmode:
20712 case V4DFmode:
20713 case V2DFmode:
20714 vec_mode = mode;
20715 imode = DImode;
20716 break;
20717
20718 case TImode:
20719 case TFmode:
20720 vec_mode = VOIDmode;
20721 imode = TImode;
20722 break;
20723
20724 default:
20725 gcc_unreachable ();
20726 }
20727
20728 machine_mode inner_mode = GET_MODE_INNER (mode);
20729 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20730 GET_MODE_BITSIZE (inner_mode));
20731 if (invert)
20732 w = wi::bit_not (w);
20733
20734 /* Force this value into the low part of a fp vector constant. */
20735 mask = immed_wide_int_const (w, imode);
20736 mask = gen_lowpart (inner_mode, mask);
20737
20738 if (vec_mode == VOIDmode)
20739 return force_reg (inner_mode, mask);
20740
20741 v = ix86_build_const_vector (vec_mode, vect, mask);
20742 return force_reg (vec_mode, v);
20743 }
20744
20745 /* Generate code for floating point ABS or NEG. */
20746
20747 void
20748 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20749 rtx operands[])
20750 {
20751 rtx mask, set, dst, src;
20752 bool use_sse = false;
20753 bool vector_mode = VECTOR_MODE_P (mode);
20754 machine_mode vmode = mode;
20755
20756 if (vector_mode)
20757 use_sse = true;
20758 else if (mode == TFmode)
20759 use_sse = true;
20760 else if (TARGET_SSE_MATH)
20761 {
20762 use_sse = SSE_FLOAT_MODE_P (mode);
20763 if (mode == SFmode)
20764 vmode = V4SFmode;
20765 else if (mode == DFmode)
20766 vmode = V2DFmode;
20767 }
20768
20769 /* NEG and ABS performed with SSE use bitwise mask operations.
20770 Create the appropriate mask now. */
20771 if (use_sse)
20772 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20773 else
20774 mask = NULL_RTX;
20775
20776 dst = operands[0];
20777 src = operands[1];
20778
20779 set = gen_rtx_fmt_e (code, mode, src);
20780 set = gen_rtx_SET (dst, set);
20781
20782 if (mask)
20783 {
20784 rtx use, clob;
20785 rtvec par;
20786
20787 use = gen_rtx_USE (VOIDmode, mask);
20788 if (vector_mode)
20789 par = gen_rtvec (2, set, use);
20790 else
20791 {
20792 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20793 par = gen_rtvec (3, set, use, clob);
20794 }
20795 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20796 }
20797 else
20798 emit_insn (set);
20799 }
20800
20801 /* Expand a copysign operation. Special case operand 0 being a constant. */
20802
20803 void
20804 ix86_expand_copysign (rtx operands[])
20805 {
20806 machine_mode mode, vmode;
20807 rtx dest, op0, op1, mask, nmask;
20808
20809 dest = operands[0];
20810 op0 = operands[1];
20811 op1 = operands[2];
20812
20813 mode = GET_MODE (dest);
20814
20815 if (mode == SFmode)
20816 vmode = V4SFmode;
20817 else if (mode == DFmode)
20818 vmode = V2DFmode;
20819 else
20820 vmode = mode;
20821
20822 if (CONST_DOUBLE_P (op0))
20823 {
20824 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20825
20826 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20827 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20828
20829 if (mode == SFmode || mode == DFmode)
20830 {
20831 if (op0 == CONST0_RTX (mode))
20832 op0 = CONST0_RTX (vmode);
20833 else
20834 {
20835 rtx v = ix86_build_const_vector (vmode, false, op0);
20836
20837 op0 = force_reg (vmode, v);
20838 }
20839 }
20840 else if (op0 != CONST0_RTX (mode))
20841 op0 = force_reg (mode, op0);
20842
20843 mask = ix86_build_signbit_mask (vmode, 0, 0);
20844
20845 if (mode == SFmode)
20846 copysign_insn = gen_copysignsf3_const;
20847 else if (mode == DFmode)
20848 copysign_insn = gen_copysigndf3_const;
20849 else
20850 copysign_insn = gen_copysigntf3_const;
20851
20852 emit_insn (copysign_insn (dest, op0, op1, mask));
20853 }
20854 else
20855 {
20856 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20857
20858 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20859 mask = ix86_build_signbit_mask (vmode, 0, 0);
20860
20861 if (mode == SFmode)
20862 copysign_insn = gen_copysignsf3_var;
20863 else if (mode == DFmode)
20864 copysign_insn = gen_copysigndf3_var;
20865 else
20866 copysign_insn = gen_copysigntf3_var;
20867
20868 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20869 }
20870 }
20871
20872 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20873 be a constant, and so has already been expanded into a vector constant. */
20874
20875 void
20876 ix86_split_copysign_const (rtx operands[])
20877 {
20878 machine_mode mode, vmode;
20879 rtx dest, op0, mask, x;
20880
20881 dest = operands[0];
20882 op0 = operands[1];
20883 mask = operands[3];
20884
20885 mode = GET_MODE (dest);
20886 vmode = GET_MODE (mask);
20887
20888 dest = simplify_gen_subreg (vmode, dest, mode, 0);
20889 x = gen_rtx_AND (vmode, dest, mask);
20890 emit_insn (gen_rtx_SET (dest, x));
20891
20892 if (op0 != CONST0_RTX (vmode))
20893 {
20894 x = gen_rtx_IOR (vmode, dest, op0);
20895 emit_insn (gen_rtx_SET (dest, x));
20896 }
20897 }
20898
20899 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
20900 so we have to do two masks. */
20901
20902 void
20903 ix86_split_copysign_var (rtx operands[])
20904 {
20905 machine_mode mode, vmode;
20906 rtx dest, scratch, op0, op1, mask, nmask, x;
20907
20908 dest = operands[0];
20909 scratch = operands[1];
20910 op0 = operands[2];
20911 op1 = operands[3];
20912 nmask = operands[4];
20913 mask = operands[5];
20914
20915 mode = GET_MODE (dest);
20916 vmode = GET_MODE (mask);
20917
20918 if (rtx_equal_p (op0, op1))
20919 {
20920 /* Shouldn't happen often (it's useless, obviously), but when it does
20921 we'd generate incorrect code if we continue below. */
20922 emit_move_insn (dest, op0);
20923 return;
20924 }
20925
20926 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
20927 {
20928 gcc_assert (REGNO (op1) == REGNO (scratch));
20929
20930 x = gen_rtx_AND (vmode, scratch, mask);
20931 emit_insn (gen_rtx_SET (scratch, x));
20932
20933 dest = mask;
20934 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
20935 x = gen_rtx_NOT (vmode, dest);
20936 x = gen_rtx_AND (vmode, x, op0);
20937 emit_insn (gen_rtx_SET (dest, x));
20938 }
20939 else
20940 {
20941 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
20942 {
20943 x = gen_rtx_AND (vmode, scratch, mask);
20944 }
20945 else /* alternative 2,4 */
20946 {
20947 gcc_assert (REGNO (mask) == REGNO (scratch));
20948 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
20949 x = gen_rtx_AND (vmode, scratch, op1);
20950 }
20951 emit_insn (gen_rtx_SET (scratch, x));
20952
20953 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
20954 {
20955 dest = simplify_gen_subreg (vmode, op0, mode, 0);
20956 x = gen_rtx_AND (vmode, dest, nmask);
20957 }
20958 else /* alternative 3,4 */
20959 {
20960 gcc_assert (REGNO (nmask) == REGNO (dest));
20961 dest = nmask;
20962 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
20963 x = gen_rtx_AND (vmode, dest, op0);
20964 }
20965 emit_insn (gen_rtx_SET (dest, x));
20966 }
20967
20968 x = gen_rtx_IOR (vmode, dest, scratch);
20969 emit_insn (gen_rtx_SET (dest, x));
20970 }
20971
20972 /* Return TRUE or FALSE depending on whether the first SET in INSN
20973 has source and destination with matching CC modes, and that the
20974 CC mode is at least as constrained as REQ_MODE. */
20975
20976 bool
20977 ix86_match_ccmode (rtx insn, machine_mode req_mode)
20978 {
20979 rtx set;
20980 machine_mode set_mode;
20981
20982 set = PATTERN (insn);
20983 if (GET_CODE (set) == PARALLEL)
20984 set = XVECEXP (set, 0, 0);
20985 gcc_assert (GET_CODE (set) == SET);
20986 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
20987
20988 set_mode = GET_MODE (SET_DEST (set));
20989 switch (set_mode)
20990 {
20991 case CCNOmode:
20992 if (req_mode != CCNOmode
20993 && (req_mode != CCmode
20994 || XEXP (SET_SRC (set), 1) != const0_rtx))
20995 return false;
20996 break;
20997 case CCmode:
20998 if (req_mode == CCGCmode)
20999 return false;
21000 /* FALLTHRU */
21001 case CCGCmode:
21002 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21003 return false;
21004 /* FALLTHRU */
21005 case CCGOCmode:
21006 if (req_mode == CCZmode)
21007 return false;
21008 /* FALLTHRU */
21009 case CCZmode:
21010 break;
21011
21012 case CCAmode:
21013 case CCCmode:
21014 case CCOmode:
21015 case CCPmode:
21016 case CCSmode:
21017 if (set_mode != req_mode)
21018 return false;
21019 break;
21020
21021 default:
21022 gcc_unreachable ();
21023 }
21024
21025 return GET_MODE (SET_SRC (set)) == set_mode;
21026 }
21027
21028 /* Generate insn patterns to do an integer compare of OPERANDS. */
21029
21030 static rtx
21031 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21032 {
21033 machine_mode cmpmode;
21034 rtx tmp, flags;
21035
21036 cmpmode = SELECT_CC_MODE (code, op0, op1);
21037 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21038
21039 /* This is very simple, but making the interface the same as in the
21040 FP case makes the rest of the code easier. */
21041 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21042 emit_insn (gen_rtx_SET (flags, tmp));
21043
21044 /* Return the test that should be put into the flags user, i.e.
21045 the bcc, scc, or cmov instruction. */
21046 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21047 }
21048
21049 /* Figure out whether to use ordered or unordered fp comparisons.
21050 Return the appropriate mode to use. */
21051
21052 machine_mode
21053 ix86_fp_compare_mode (enum rtx_code)
21054 {
21055 /* ??? In order to make all comparisons reversible, we do all comparisons
21056 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21057 all forms trapping and nontrapping comparisons, we can make inequality
21058 comparisons trapping again, since it results in better code when using
21059 FCOM based compares. */
21060 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21061 }
21062
21063 machine_mode
21064 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21065 {
21066 machine_mode mode = GET_MODE (op0);
21067
21068 if (SCALAR_FLOAT_MODE_P (mode))
21069 {
21070 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21071 return ix86_fp_compare_mode (code);
21072 }
21073
21074 switch (code)
21075 {
21076 /* Only zero flag is needed. */
21077 case EQ: /* ZF=0 */
21078 case NE: /* ZF!=0 */
21079 return CCZmode;
21080 /* Codes needing carry flag. */
21081 case GEU: /* CF=0 */
21082 case LTU: /* CF=1 */
21083 /* Detect overflow checks. They need just the carry flag. */
21084 if (GET_CODE (op0) == PLUS
21085 && rtx_equal_p (op1, XEXP (op0, 0)))
21086 return CCCmode;
21087 else
21088 return CCmode;
21089 case GTU: /* CF=0 & ZF=0 */
21090 case LEU: /* CF=1 | ZF=1 */
21091 return CCmode;
21092 /* Codes possibly doable only with sign flag when
21093 comparing against zero. */
21094 case GE: /* SF=OF or SF=0 */
21095 case LT: /* SF<>OF or SF=1 */
21096 if (op1 == const0_rtx)
21097 return CCGOCmode;
21098 else
21099 /* For other cases Carry flag is not required. */
21100 return CCGCmode;
21101 /* Codes doable only with sign flag when comparing
21102 against zero, but we miss jump instruction for it
21103 so we need to use relational tests against overflow
21104 that thus needs to be zero. */
21105 case GT: /* ZF=0 & SF=OF */
21106 case LE: /* ZF=1 | SF<>OF */
21107 if (op1 == const0_rtx)
21108 return CCNOmode;
21109 else
21110 return CCGCmode;
21111 /* strcmp pattern do (use flags) and combine may ask us for proper
21112 mode. */
21113 case USE:
21114 return CCmode;
21115 default:
21116 gcc_unreachable ();
21117 }
21118 }
21119
21120 /* Return the fixed registers used for condition codes. */
21121
21122 static bool
21123 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21124 {
21125 *p1 = FLAGS_REG;
21126 *p2 = FPSR_REG;
21127 return true;
21128 }
21129
21130 /* If two condition code modes are compatible, return a condition code
21131 mode which is compatible with both. Otherwise, return
21132 VOIDmode. */
21133
21134 static machine_mode
21135 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21136 {
21137 if (m1 == m2)
21138 return m1;
21139
21140 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21141 return VOIDmode;
21142
21143 if ((m1 == CCGCmode && m2 == CCGOCmode)
21144 || (m1 == CCGOCmode && m2 == CCGCmode))
21145 return CCGCmode;
21146
21147 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21148 return m2;
21149 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21150 return m1;
21151
21152 switch (m1)
21153 {
21154 default:
21155 gcc_unreachable ();
21156
21157 case CCmode:
21158 case CCGCmode:
21159 case CCGOCmode:
21160 case CCNOmode:
21161 case CCAmode:
21162 case CCCmode:
21163 case CCOmode:
21164 case CCPmode:
21165 case CCSmode:
21166 case CCZmode:
21167 switch (m2)
21168 {
21169 default:
21170 return VOIDmode;
21171
21172 case CCmode:
21173 case CCGCmode:
21174 case CCGOCmode:
21175 case CCNOmode:
21176 case CCAmode:
21177 case CCCmode:
21178 case CCOmode:
21179 case CCPmode:
21180 case CCSmode:
21181 case CCZmode:
21182 return CCmode;
21183 }
21184
21185 case CCFPmode:
21186 case CCFPUmode:
21187 /* These are only compatible with themselves, which we already
21188 checked above. */
21189 return VOIDmode;
21190 }
21191 }
21192
21193
21194 /* Return a comparison we can do and that it is equivalent to
21195 swap_condition (code) apart possibly from orderedness.
21196 But, never change orderedness if TARGET_IEEE_FP, returning
21197 UNKNOWN in that case if necessary. */
21198
21199 static enum rtx_code
21200 ix86_fp_swap_condition (enum rtx_code code)
21201 {
21202 switch (code)
21203 {
21204 case GT: /* GTU - CF=0 & ZF=0 */
21205 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21206 case GE: /* GEU - CF=0 */
21207 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21208 case UNLT: /* LTU - CF=1 */
21209 return TARGET_IEEE_FP ? UNKNOWN : GT;
21210 case UNLE: /* LEU - CF=1 | ZF=1 */
21211 return TARGET_IEEE_FP ? UNKNOWN : GE;
21212 default:
21213 return swap_condition (code);
21214 }
21215 }
21216
21217 /* Return cost of comparison CODE using the best strategy for performance.
21218 All following functions do use number of instructions as a cost metrics.
21219 In future this should be tweaked to compute bytes for optimize_size and
21220 take into account performance of various instructions on various CPUs. */
21221
21222 static int
21223 ix86_fp_comparison_cost (enum rtx_code code)
21224 {
21225 int arith_cost;
21226
21227 /* The cost of code using bit-twiddling on %ah. */
21228 switch (code)
21229 {
21230 case UNLE:
21231 case UNLT:
21232 case LTGT:
21233 case GT:
21234 case GE:
21235 case UNORDERED:
21236 case ORDERED:
21237 case UNEQ:
21238 arith_cost = 4;
21239 break;
21240 case LT:
21241 case NE:
21242 case EQ:
21243 case UNGE:
21244 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21245 break;
21246 case LE:
21247 case UNGT:
21248 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21249 break;
21250 default:
21251 gcc_unreachable ();
21252 }
21253
21254 switch (ix86_fp_comparison_strategy (code))
21255 {
21256 case IX86_FPCMP_COMI:
21257 return arith_cost > 4 ? 3 : 2;
21258 case IX86_FPCMP_SAHF:
21259 return arith_cost > 4 ? 4 : 3;
21260 default:
21261 return arith_cost;
21262 }
21263 }
21264
21265 /* Return strategy to use for floating-point. We assume that fcomi is always
21266 preferrable where available, since that is also true when looking at size
21267 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21268
21269 enum ix86_fpcmp_strategy
21270 ix86_fp_comparison_strategy (enum rtx_code)
21271 {
21272 /* Do fcomi/sahf based test when profitable. */
21273
21274 if (TARGET_CMOVE)
21275 return IX86_FPCMP_COMI;
21276
21277 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21278 return IX86_FPCMP_SAHF;
21279
21280 return IX86_FPCMP_ARITH;
21281 }
21282
21283 /* Swap, force into registers, or otherwise massage the two operands
21284 to a fp comparison. The operands are updated in place; the new
21285 comparison code is returned. */
21286
21287 static enum rtx_code
21288 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21289 {
21290 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21291 rtx op0 = *pop0, op1 = *pop1;
21292 machine_mode op_mode = GET_MODE (op0);
21293 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21294
21295 /* All of the unordered compare instructions only work on registers.
21296 The same is true of the fcomi compare instructions. The XFmode
21297 compare instructions require registers except when comparing
21298 against zero or when converting operand 1 from fixed point to
21299 floating point. */
21300
21301 if (!is_sse
21302 && (fpcmp_mode == CCFPUmode
21303 || (op_mode == XFmode
21304 && ! (standard_80387_constant_p (op0) == 1
21305 || standard_80387_constant_p (op1) == 1)
21306 && GET_CODE (op1) != FLOAT)
21307 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21308 {
21309 op0 = force_reg (op_mode, op0);
21310 op1 = force_reg (op_mode, op1);
21311 }
21312 else
21313 {
21314 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21315 things around if they appear profitable, otherwise force op0
21316 into a register. */
21317
21318 if (standard_80387_constant_p (op0) == 0
21319 || (MEM_P (op0)
21320 && ! (standard_80387_constant_p (op1) == 0
21321 || MEM_P (op1))))
21322 {
21323 enum rtx_code new_code = ix86_fp_swap_condition (code);
21324 if (new_code != UNKNOWN)
21325 {
21326 std::swap (op0, op1);
21327 code = new_code;
21328 }
21329 }
21330
21331 if (!REG_P (op0))
21332 op0 = force_reg (op_mode, op0);
21333
21334 if (CONSTANT_P (op1))
21335 {
21336 int tmp = standard_80387_constant_p (op1);
21337 if (tmp == 0)
21338 op1 = validize_mem (force_const_mem (op_mode, op1));
21339 else if (tmp == 1)
21340 {
21341 if (TARGET_CMOVE)
21342 op1 = force_reg (op_mode, op1);
21343 }
21344 else
21345 op1 = force_reg (op_mode, op1);
21346 }
21347 }
21348
21349 /* Try to rearrange the comparison to make it cheaper. */
21350 if (ix86_fp_comparison_cost (code)
21351 > ix86_fp_comparison_cost (swap_condition (code))
21352 && (REG_P (op1) || can_create_pseudo_p ()))
21353 {
21354 std::swap (op0, op1);
21355 code = swap_condition (code);
21356 if (!REG_P (op0))
21357 op0 = force_reg (op_mode, op0);
21358 }
21359
21360 *pop0 = op0;
21361 *pop1 = op1;
21362 return code;
21363 }
21364
21365 /* Convert comparison codes we use to represent FP comparison to integer
21366 code that will result in proper branch. Return UNKNOWN if no such code
21367 is available. */
21368
21369 enum rtx_code
21370 ix86_fp_compare_code_to_integer (enum rtx_code code)
21371 {
21372 switch (code)
21373 {
21374 case GT:
21375 return GTU;
21376 case GE:
21377 return GEU;
21378 case ORDERED:
21379 case UNORDERED:
21380 return code;
21381 break;
21382 case UNEQ:
21383 return EQ;
21384 break;
21385 case UNLT:
21386 return LTU;
21387 break;
21388 case UNLE:
21389 return LEU;
21390 break;
21391 case LTGT:
21392 return NE;
21393 break;
21394 default:
21395 return UNKNOWN;
21396 }
21397 }
21398
21399 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21400
21401 static rtx
21402 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21403 {
21404 machine_mode fpcmp_mode, intcmp_mode;
21405 rtx tmp, tmp2;
21406
21407 fpcmp_mode = ix86_fp_compare_mode (code);
21408 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21409
21410 /* Do fcomi/sahf based test when profitable. */
21411 switch (ix86_fp_comparison_strategy (code))
21412 {
21413 case IX86_FPCMP_COMI:
21414 intcmp_mode = fpcmp_mode;
21415 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21416 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21417 emit_insn (tmp);
21418 break;
21419
21420 case IX86_FPCMP_SAHF:
21421 intcmp_mode = fpcmp_mode;
21422 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21423 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21424
21425 if (!scratch)
21426 scratch = gen_reg_rtx (HImode);
21427 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21428 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21429 break;
21430
21431 case IX86_FPCMP_ARITH:
21432 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21433 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21434 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21435 if (!scratch)
21436 scratch = gen_reg_rtx (HImode);
21437 emit_insn (gen_rtx_SET (scratch, tmp2));
21438
21439 /* In the unordered case, we have to check C2 for NaN's, which
21440 doesn't happen to work out to anything nice combination-wise.
21441 So do some bit twiddling on the value we've got in AH to come
21442 up with an appropriate set of condition codes. */
21443
21444 intcmp_mode = CCNOmode;
21445 switch (code)
21446 {
21447 case GT:
21448 case UNGT:
21449 if (code == GT || !TARGET_IEEE_FP)
21450 {
21451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21452 code = EQ;
21453 }
21454 else
21455 {
21456 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21457 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21458 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21459 intcmp_mode = CCmode;
21460 code = GEU;
21461 }
21462 break;
21463 case LT:
21464 case UNLT:
21465 if (code == LT && TARGET_IEEE_FP)
21466 {
21467 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21468 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21469 intcmp_mode = CCmode;
21470 code = EQ;
21471 }
21472 else
21473 {
21474 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21475 code = NE;
21476 }
21477 break;
21478 case GE:
21479 case UNGE:
21480 if (code == GE || !TARGET_IEEE_FP)
21481 {
21482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21483 code = EQ;
21484 }
21485 else
21486 {
21487 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21488 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21489 code = NE;
21490 }
21491 break;
21492 case LE:
21493 case UNLE:
21494 if (code == LE && TARGET_IEEE_FP)
21495 {
21496 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21497 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21498 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21499 intcmp_mode = CCmode;
21500 code = LTU;
21501 }
21502 else
21503 {
21504 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21505 code = NE;
21506 }
21507 break;
21508 case EQ:
21509 case UNEQ:
21510 if (code == EQ && TARGET_IEEE_FP)
21511 {
21512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21513 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21514 intcmp_mode = CCmode;
21515 code = EQ;
21516 }
21517 else
21518 {
21519 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21520 code = NE;
21521 }
21522 break;
21523 case NE:
21524 case LTGT:
21525 if (code == NE && TARGET_IEEE_FP)
21526 {
21527 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21528 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21529 GEN_INT (0x40)));
21530 code = NE;
21531 }
21532 else
21533 {
21534 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21535 code = EQ;
21536 }
21537 break;
21538
21539 case UNORDERED:
21540 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21541 code = NE;
21542 break;
21543 case ORDERED:
21544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21545 code = EQ;
21546 break;
21547
21548 default:
21549 gcc_unreachable ();
21550 }
21551 break;
21552
21553 default:
21554 gcc_unreachable();
21555 }
21556
21557 /* Return the test that should be put into the flags user, i.e.
21558 the bcc, scc, or cmov instruction. */
21559 return gen_rtx_fmt_ee (code, VOIDmode,
21560 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21561 const0_rtx);
21562 }
21563
21564 static rtx
21565 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21566 {
21567 rtx ret;
21568
21569 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21570 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21571
21572 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21573 {
21574 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21575 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21576 }
21577 else
21578 ret = ix86_expand_int_compare (code, op0, op1);
21579
21580 return ret;
21581 }
21582
21583 void
21584 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21585 {
21586 machine_mode mode = GET_MODE (op0);
21587 rtx tmp;
21588
21589 switch (mode)
21590 {
21591 case SFmode:
21592 case DFmode:
21593 case XFmode:
21594 case QImode:
21595 case HImode:
21596 case SImode:
21597 simple:
21598 tmp = ix86_expand_compare (code, op0, op1);
21599 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21600 gen_rtx_LABEL_REF (VOIDmode, label),
21601 pc_rtx);
21602 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21603 return;
21604
21605 case DImode:
21606 if (TARGET_64BIT)
21607 goto simple;
21608 case TImode:
21609 /* Expand DImode branch into multiple compare+branch. */
21610 {
21611 rtx lo[2], hi[2];
21612 rtx_code_label *label2;
21613 enum rtx_code code1, code2, code3;
21614 machine_mode submode;
21615
21616 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21617 {
21618 std::swap (op0, op1);
21619 code = swap_condition (code);
21620 }
21621
21622 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21623 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21624
21625 submode = mode == DImode ? SImode : DImode;
21626
21627 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21628 avoid two branches. This costs one extra insn, so disable when
21629 optimizing for size. */
21630
21631 if ((code == EQ || code == NE)
21632 && (!optimize_insn_for_size_p ()
21633 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21634 {
21635 rtx xor0, xor1;
21636
21637 xor1 = hi[0];
21638 if (hi[1] != const0_rtx)
21639 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21640 NULL_RTX, 0, OPTAB_WIDEN);
21641
21642 xor0 = lo[0];
21643 if (lo[1] != const0_rtx)
21644 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21645 NULL_RTX, 0, OPTAB_WIDEN);
21646
21647 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21648 NULL_RTX, 0, OPTAB_WIDEN);
21649
21650 ix86_expand_branch (code, tmp, const0_rtx, label);
21651 return;
21652 }
21653
21654 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21655 op1 is a constant and the low word is zero, then we can just
21656 examine the high word. Similarly for low word -1 and
21657 less-or-equal-than or greater-than. */
21658
21659 if (CONST_INT_P (hi[1]))
21660 switch (code)
21661 {
21662 case LT: case LTU: case GE: case GEU:
21663 if (lo[1] == const0_rtx)
21664 {
21665 ix86_expand_branch (code, hi[0], hi[1], label);
21666 return;
21667 }
21668 break;
21669 case LE: case LEU: case GT: case GTU:
21670 if (lo[1] == constm1_rtx)
21671 {
21672 ix86_expand_branch (code, hi[0], hi[1], label);
21673 return;
21674 }
21675 break;
21676 default:
21677 break;
21678 }
21679
21680 /* Otherwise, we need two or three jumps. */
21681
21682 label2 = gen_label_rtx ();
21683
21684 code1 = code;
21685 code2 = swap_condition (code);
21686 code3 = unsigned_condition (code);
21687
21688 switch (code)
21689 {
21690 case LT: case GT: case LTU: case GTU:
21691 break;
21692
21693 case LE: code1 = LT; code2 = GT; break;
21694 case GE: code1 = GT; code2 = LT; break;
21695 case LEU: code1 = LTU; code2 = GTU; break;
21696 case GEU: code1 = GTU; code2 = LTU; break;
21697
21698 case EQ: code1 = UNKNOWN; code2 = NE; break;
21699 case NE: code2 = UNKNOWN; break;
21700
21701 default:
21702 gcc_unreachable ();
21703 }
21704
21705 /*
21706 * a < b =>
21707 * if (hi(a) < hi(b)) goto true;
21708 * if (hi(a) > hi(b)) goto false;
21709 * if (lo(a) < lo(b)) goto true;
21710 * false:
21711 */
21712
21713 if (code1 != UNKNOWN)
21714 ix86_expand_branch (code1, hi[0], hi[1], label);
21715 if (code2 != UNKNOWN)
21716 ix86_expand_branch (code2, hi[0], hi[1], label2);
21717
21718 ix86_expand_branch (code3, lo[0], lo[1], label);
21719
21720 if (code2 != UNKNOWN)
21721 emit_label (label2);
21722 return;
21723 }
21724
21725 default:
21726 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21727 goto simple;
21728 }
21729 }
21730
21731 /* Split branch based on floating point condition. */
21732 void
21733 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21734 rtx target1, rtx target2, rtx tmp)
21735 {
21736 rtx condition;
21737 rtx i;
21738
21739 if (target2 != pc_rtx)
21740 {
21741 std::swap (target1, target2);
21742 code = reverse_condition_maybe_unordered (code);
21743 }
21744
21745 condition = ix86_expand_fp_compare (code, op1, op2,
21746 tmp);
21747
21748 i = emit_jump_insn (gen_rtx_SET
21749 (pc_rtx,
21750 gen_rtx_IF_THEN_ELSE (VOIDmode,
21751 condition, target1, target2)));
21752 if (split_branch_probability >= 0)
21753 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21754 }
21755
21756 void
21757 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21758 {
21759 rtx ret;
21760
21761 gcc_assert (GET_MODE (dest) == QImode);
21762
21763 ret = ix86_expand_compare (code, op0, op1);
21764 PUT_MODE (ret, QImode);
21765 emit_insn (gen_rtx_SET (dest, ret));
21766 }
21767
21768 /* Expand comparison setting or clearing carry flag. Return true when
21769 successful and set pop for the operation. */
21770 static bool
21771 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21772 {
21773 machine_mode mode =
21774 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21775
21776 /* Do not handle double-mode compares that go through special path. */
21777 if (mode == (TARGET_64BIT ? TImode : DImode))
21778 return false;
21779
21780 if (SCALAR_FLOAT_MODE_P (mode))
21781 {
21782 rtx compare_op;
21783 rtx_insn *compare_seq;
21784
21785 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21786
21787 /* Shortcut: following common codes never translate
21788 into carry flag compares. */
21789 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21790 || code == ORDERED || code == UNORDERED)
21791 return false;
21792
21793 /* These comparisons require zero flag; swap operands so they won't. */
21794 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21795 && !TARGET_IEEE_FP)
21796 {
21797 std::swap (op0, op1);
21798 code = swap_condition (code);
21799 }
21800
21801 /* Try to expand the comparison and verify that we end up with
21802 carry flag based comparison. This fails to be true only when
21803 we decide to expand comparison using arithmetic that is not
21804 too common scenario. */
21805 start_sequence ();
21806 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21807 compare_seq = get_insns ();
21808 end_sequence ();
21809
21810 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21811 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21812 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21813 else
21814 code = GET_CODE (compare_op);
21815
21816 if (code != LTU && code != GEU)
21817 return false;
21818
21819 emit_insn (compare_seq);
21820 *pop = compare_op;
21821 return true;
21822 }
21823
21824 if (!INTEGRAL_MODE_P (mode))
21825 return false;
21826
21827 switch (code)
21828 {
21829 case LTU:
21830 case GEU:
21831 break;
21832
21833 /* Convert a==0 into (unsigned)a<1. */
21834 case EQ:
21835 case NE:
21836 if (op1 != const0_rtx)
21837 return false;
21838 op1 = const1_rtx;
21839 code = (code == EQ ? LTU : GEU);
21840 break;
21841
21842 /* Convert a>b into b<a or a>=b-1. */
21843 case GTU:
21844 case LEU:
21845 if (CONST_INT_P (op1))
21846 {
21847 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21848 /* Bail out on overflow. We still can swap operands but that
21849 would force loading of the constant into register. */
21850 if (op1 == const0_rtx
21851 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
21852 return false;
21853 code = (code == GTU ? GEU : LTU);
21854 }
21855 else
21856 {
21857 std::swap (op0, op1);
21858 code = (code == GTU ? LTU : GEU);
21859 }
21860 break;
21861
21862 /* Convert a>=0 into (unsigned)a<0x80000000. */
21863 case LT:
21864 case GE:
21865 if (mode == DImode || op1 != const0_rtx)
21866 return false;
21867 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21868 code = (code == LT ? GEU : LTU);
21869 break;
21870 case LE:
21871 case GT:
21872 if (mode == DImode || op1 != constm1_rtx)
21873 return false;
21874 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21875 code = (code == LE ? GEU : LTU);
21876 break;
21877
21878 default:
21879 return false;
21880 }
21881 /* Swapping operands may cause constant to appear as first operand. */
21882 if (!nonimmediate_operand (op0, VOIDmode))
21883 {
21884 if (!can_create_pseudo_p ())
21885 return false;
21886 op0 = force_reg (mode, op0);
21887 }
21888 *pop = ix86_expand_compare (code, op0, op1);
21889 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
21890 return true;
21891 }
21892
21893 bool
21894 ix86_expand_int_movcc (rtx operands[])
21895 {
21896 enum rtx_code code = GET_CODE (operands[1]), compare_code;
21897 rtx_insn *compare_seq;
21898 rtx compare_op;
21899 machine_mode mode = GET_MODE (operands[0]);
21900 bool sign_bit_compare_p = false;
21901 rtx op0 = XEXP (operands[1], 0);
21902 rtx op1 = XEXP (operands[1], 1);
21903
21904 if (GET_MODE (op0) == TImode
21905 || (GET_MODE (op0) == DImode
21906 && !TARGET_64BIT))
21907 return false;
21908
21909 start_sequence ();
21910 compare_op = ix86_expand_compare (code, op0, op1);
21911 compare_seq = get_insns ();
21912 end_sequence ();
21913
21914 compare_code = GET_CODE (compare_op);
21915
21916 if ((op1 == const0_rtx && (code == GE || code == LT))
21917 || (op1 == constm1_rtx && (code == GT || code == LE)))
21918 sign_bit_compare_p = true;
21919
21920 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
21921 HImode insns, we'd be swallowed in word prefix ops. */
21922
21923 if ((mode != HImode || TARGET_FAST_PREFIX)
21924 && (mode != (TARGET_64BIT ? TImode : DImode))
21925 && CONST_INT_P (operands[2])
21926 && CONST_INT_P (operands[3]))
21927 {
21928 rtx out = operands[0];
21929 HOST_WIDE_INT ct = INTVAL (operands[2]);
21930 HOST_WIDE_INT cf = INTVAL (operands[3]);
21931 HOST_WIDE_INT diff;
21932
21933 diff = ct - cf;
21934 /* Sign bit compares are better done using shifts than we do by using
21935 sbb. */
21936 if (sign_bit_compare_p
21937 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21938 {
21939 /* Detect overlap between destination and compare sources. */
21940 rtx tmp = out;
21941
21942 if (!sign_bit_compare_p)
21943 {
21944 rtx flags;
21945 bool fpcmp = false;
21946
21947 compare_code = GET_CODE (compare_op);
21948
21949 flags = XEXP (compare_op, 0);
21950
21951 if (GET_MODE (flags) == CCFPmode
21952 || GET_MODE (flags) == CCFPUmode)
21953 {
21954 fpcmp = true;
21955 compare_code
21956 = ix86_fp_compare_code_to_integer (compare_code);
21957 }
21958
21959 /* To simplify rest of code, restrict to the GEU case. */
21960 if (compare_code == LTU)
21961 {
21962 std::swap (ct, cf);
21963 compare_code = reverse_condition (compare_code);
21964 code = reverse_condition (code);
21965 }
21966 else
21967 {
21968 if (fpcmp)
21969 PUT_CODE (compare_op,
21970 reverse_condition_maybe_unordered
21971 (GET_CODE (compare_op)));
21972 else
21973 PUT_CODE (compare_op,
21974 reverse_condition (GET_CODE (compare_op)));
21975 }
21976 diff = ct - cf;
21977
21978 if (reg_overlap_mentioned_p (out, op0)
21979 || reg_overlap_mentioned_p (out, op1))
21980 tmp = gen_reg_rtx (mode);
21981
21982 if (mode == DImode)
21983 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
21984 else
21985 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
21986 flags, compare_op));
21987 }
21988 else
21989 {
21990 if (code == GT || code == GE)
21991 code = reverse_condition (code);
21992 else
21993 {
21994 std::swap (ct, cf);
21995 diff = ct - cf;
21996 }
21997 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
21998 }
21999
22000 if (diff == 1)
22001 {
22002 /*
22003 * cmpl op0,op1
22004 * sbbl dest,dest
22005 * [addl dest, ct]
22006 *
22007 * Size 5 - 8.
22008 */
22009 if (ct)
22010 tmp = expand_simple_binop (mode, PLUS,
22011 tmp, GEN_INT (ct),
22012 copy_rtx (tmp), 1, OPTAB_DIRECT);
22013 }
22014 else if (cf == -1)
22015 {
22016 /*
22017 * cmpl op0,op1
22018 * sbbl dest,dest
22019 * orl $ct, dest
22020 *
22021 * Size 8.
22022 */
22023 tmp = expand_simple_binop (mode, IOR,
22024 tmp, GEN_INT (ct),
22025 copy_rtx (tmp), 1, OPTAB_DIRECT);
22026 }
22027 else if (diff == -1 && ct)
22028 {
22029 /*
22030 * cmpl op0,op1
22031 * sbbl dest,dest
22032 * notl dest
22033 * [addl dest, cf]
22034 *
22035 * Size 8 - 11.
22036 */
22037 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22038 if (cf)
22039 tmp = expand_simple_binop (mode, PLUS,
22040 copy_rtx (tmp), GEN_INT (cf),
22041 copy_rtx (tmp), 1, OPTAB_DIRECT);
22042 }
22043 else
22044 {
22045 /*
22046 * cmpl op0,op1
22047 * sbbl dest,dest
22048 * [notl dest]
22049 * andl cf - ct, dest
22050 * [addl dest, ct]
22051 *
22052 * Size 8 - 11.
22053 */
22054
22055 if (cf == 0)
22056 {
22057 cf = ct;
22058 ct = 0;
22059 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22060 }
22061
22062 tmp = expand_simple_binop (mode, AND,
22063 copy_rtx (tmp),
22064 gen_int_mode (cf - ct, mode),
22065 copy_rtx (tmp), 1, OPTAB_DIRECT);
22066 if (ct)
22067 tmp = expand_simple_binop (mode, PLUS,
22068 copy_rtx (tmp), GEN_INT (ct),
22069 copy_rtx (tmp), 1, OPTAB_DIRECT);
22070 }
22071
22072 if (!rtx_equal_p (tmp, out))
22073 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22074
22075 return true;
22076 }
22077
22078 if (diff < 0)
22079 {
22080 machine_mode cmp_mode = GET_MODE (op0);
22081 enum rtx_code new_code;
22082
22083 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22084 {
22085 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22086
22087 /* We may be reversing unordered compare to normal compare, that
22088 is not valid in general (we may convert non-trapping condition
22089 to trapping one), however on i386 we currently emit all
22090 comparisons unordered. */
22091 new_code = reverse_condition_maybe_unordered (code);
22092 }
22093 else
22094 new_code = ix86_reverse_condition (code, cmp_mode);
22095 if (new_code != UNKNOWN)
22096 {
22097 std::swap (ct, cf);
22098 diff = -diff;
22099 code = new_code;
22100 }
22101 }
22102
22103 compare_code = UNKNOWN;
22104 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22105 && CONST_INT_P (op1))
22106 {
22107 if (op1 == const0_rtx
22108 && (code == LT || code == GE))
22109 compare_code = code;
22110 else if (op1 == constm1_rtx)
22111 {
22112 if (code == LE)
22113 compare_code = LT;
22114 else if (code == GT)
22115 compare_code = GE;
22116 }
22117 }
22118
22119 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22120 if (compare_code != UNKNOWN
22121 && GET_MODE (op0) == GET_MODE (out)
22122 && (cf == -1 || ct == -1))
22123 {
22124 /* If lea code below could be used, only optimize
22125 if it results in a 2 insn sequence. */
22126
22127 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22128 || diff == 3 || diff == 5 || diff == 9)
22129 || (compare_code == LT && ct == -1)
22130 || (compare_code == GE && cf == -1))
22131 {
22132 /*
22133 * notl op1 (if necessary)
22134 * sarl $31, op1
22135 * orl cf, op1
22136 */
22137 if (ct != -1)
22138 {
22139 cf = ct;
22140 ct = -1;
22141 code = reverse_condition (code);
22142 }
22143
22144 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22145
22146 out = expand_simple_binop (mode, IOR,
22147 out, GEN_INT (cf),
22148 out, 1, OPTAB_DIRECT);
22149 if (out != operands[0])
22150 emit_move_insn (operands[0], out);
22151
22152 return true;
22153 }
22154 }
22155
22156
22157 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22158 || diff == 3 || diff == 5 || diff == 9)
22159 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22160 && (mode != DImode
22161 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22162 {
22163 /*
22164 * xorl dest,dest
22165 * cmpl op1,op2
22166 * setcc dest
22167 * lea cf(dest*(ct-cf)),dest
22168 *
22169 * Size 14.
22170 *
22171 * This also catches the degenerate setcc-only case.
22172 */
22173
22174 rtx tmp;
22175 int nops;
22176
22177 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22178
22179 nops = 0;
22180 /* On x86_64 the lea instruction operates on Pmode, so we need
22181 to get arithmetics done in proper mode to match. */
22182 if (diff == 1)
22183 tmp = copy_rtx (out);
22184 else
22185 {
22186 rtx out1;
22187 out1 = copy_rtx (out);
22188 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22189 nops++;
22190 if (diff & 1)
22191 {
22192 tmp = gen_rtx_PLUS (mode, tmp, out1);
22193 nops++;
22194 }
22195 }
22196 if (cf != 0)
22197 {
22198 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22199 nops++;
22200 }
22201 if (!rtx_equal_p (tmp, out))
22202 {
22203 if (nops == 1)
22204 out = force_operand (tmp, copy_rtx (out));
22205 else
22206 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22207 }
22208 if (!rtx_equal_p (out, operands[0]))
22209 emit_move_insn (operands[0], copy_rtx (out));
22210
22211 return true;
22212 }
22213
22214 /*
22215 * General case: Jumpful:
22216 * xorl dest,dest cmpl op1, op2
22217 * cmpl op1, op2 movl ct, dest
22218 * setcc dest jcc 1f
22219 * decl dest movl cf, dest
22220 * andl (cf-ct),dest 1:
22221 * addl ct,dest
22222 *
22223 * Size 20. Size 14.
22224 *
22225 * This is reasonably steep, but branch mispredict costs are
22226 * high on modern cpus, so consider failing only if optimizing
22227 * for space.
22228 */
22229
22230 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22231 && BRANCH_COST (optimize_insn_for_speed_p (),
22232 false) >= 2)
22233 {
22234 if (cf == 0)
22235 {
22236 machine_mode cmp_mode = GET_MODE (op0);
22237 enum rtx_code new_code;
22238
22239 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22240 {
22241 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22242
22243 /* We may be reversing unordered compare to normal compare,
22244 that is not valid in general (we may convert non-trapping
22245 condition to trapping one), however on i386 we currently
22246 emit all comparisons unordered. */
22247 new_code = reverse_condition_maybe_unordered (code);
22248 }
22249 else
22250 {
22251 new_code = ix86_reverse_condition (code, cmp_mode);
22252 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22253 compare_code = reverse_condition (compare_code);
22254 }
22255
22256 if (new_code != UNKNOWN)
22257 {
22258 cf = ct;
22259 ct = 0;
22260 code = new_code;
22261 }
22262 }
22263
22264 if (compare_code != UNKNOWN)
22265 {
22266 /* notl op1 (if needed)
22267 sarl $31, op1
22268 andl (cf-ct), op1
22269 addl ct, op1
22270
22271 For x < 0 (resp. x <= -1) there will be no notl,
22272 so if possible swap the constants to get rid of the
22273 complement.
22274 True/false will be -1/0 while code below (store flag
22275 followed by decrement) is 0/-1, so the constants need
22276 to be exchanged once more. */
22277
22278 if (compare_code == GE || !cf)
22279 {
22280 code = reverse_condition (code);
22281 compare_code = LT;
22282 }
22283 else
22284 std::swap (ct, cf);
22285
22286 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22287 }
22288 else
22289 {
22290 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22291
22292 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22293 constm1_rtx,
22294 copy_rtx (out), 1, OPTAB_DIRECT);
22295 }
22296
22297 out = expand_simple_binop (mode, AND, copy_rtx (out),
22298 gen_int_mode (cf - ct, mode),
22299 copy_rtx (out), 1, OPTAB_DIRECT);
22300 if (ct)
22301 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22302 copy_rtx (out), 1, OPTAB_DIRECT);
22303 if (!rtx_equal_p (out, operands[0]))
22304 emit_move_insn (operands[0], copy_rtx (out));
22305
22306 return true;
22307 }
22308 }
22309
22310 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22311 {
22312 /* Try a few things more with specific constants and a variable. */
22313
22314 optab op;
22315 rtx var, orig_out, out, tmp;
22316
22317 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22318 return false;
22319
22320 /* If one of the two operands is an interesting constant, load a
22321 constant with the above and mask it in with a logical operation. */
22322
22323 if (CONST_INT_P (operands[2]))
22324 {
22325 var = operands[3];
22326 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22327 operands[3] = constm1_rtx, op = and_optab;
22328 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22329 operands[3] = const0_rtx, op = ior_optab;
22330 else
22331 return false;
22332 }
22333 else if (CONST_INT_P (operands[3]))
22334 {
22335 var = operands[2];
22336 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22337 operands[2] = constm1_rtx, op = and_optab;
22338 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22339 operands[2] = const0_rtx, op = ior_optab;
22340 else
22341 return false;
22342 }
22343 else
22344 return false;
22345
22346 orig_out = operands[0];
22347 tmp = gen_reg_rtx (mode);
22348 operands[0] = tmp;
22349
22350 /* Recurse to get the constant loaded. */
22351 if (!ix86_expand_int_movcc (operands))
22352 return false;
22353
22354 /* Mask in the interesting variable. */
22355 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22356 OPTAB_WIDEN);
22357 if (!rtx_equal_p (out, orig_out))
22358 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22359
22360 return true;
22361 }
22362
22363 /*
22364 * For comparison with above,
22365 *
22366 * movl cf,dest
22367 * movl ct,tmp
22368 * cmpl op1,op2
22369 * cmovcc tmp,dest
22370 *
22371 * Size 15.
22372 */
22373
22374 if (! nonimmediate_operand (operands[2], mode))
22375 operands[2] = force_reg (mode, operands[2]);
22376 if (! nonimmediate_operand (operands[3], mode))
22377 operands[3] = force_reg (mode, operands[3]);
22378
22379 if (! register_operand (operands[2], VOIDmode)
22380 && (mode == QImode
22381 || ! register_operand (operands[3], VOIDmode)))
22382 operands[2] = force_reg (mode, operands[2]);
22383
22384 if (mode == QImode
22385 && ! register_operand (operands[3], VOIDmode))
22386 operands[3] = force_reg (mode, operands[3]);
22387
22388 emit_insn (compare_seq);
22389 emit_insn (gen_rtx_SET (operands[0],
22390 gen_rtx_IF_THEN_ELSE (mode,
22391 compare_op, operands[2],
22392 operands[3])));
22393 return true;
22394 }
22395
22396 /* Swap, force into registers, or otherwise massage the two operands
22397 to an sse comparison with a mask result. Thus we differ a bit from
22398 ix86_prepare_fp_compare_args which expects to produce a flags result.
22399
22400 The DEST operand exists to help determine whether to commute commutative
22401 operators. The POP0/POP1 operands are updated in place. The new
22402 comparison code is returned, or UNKNOWN if not implementable. */
22403
22404 static enum rtx_code
22405 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22406 rtx *pop0, rtx *pop1)
22407 {
22408 switch (code)
22409 {
22410 case LTGT:
22411 case UNEQ:
22412 /* AVX supports all the needed comparisons. */
22413 if (TARGET_AVX)
22414 break;
22415 /* We have no LTGT as an operator. We could implement it with
22416 NE & ORDERED, but this requires an extra temporary. It's
22417 not clear that it's worth it. */
22418 return UNKNOWN;
22419
22420 case LT:
22421 case LE:
22422 case UNGT:
22423 case UNGE:
22424 /* These are supported directly. */
22425 break;
22426
22427 case EQ:
22428 case NE:
22429 case UNORDERED:
22430 case ORDERED:
22431 /* AVX has 3 operand comparisons, no need to swap anything. */
22432 if (TARGET_AVX)
22433 break;
22434 /* For commutative operators, try to canonicalize the destination
22435 operand to be first in the comparison - this helps reload to
22436 avoid extra moves. */
22437 if (!dest || !rtx_equal_p (dest, *pop1))
22438 break;
22439 /* FALLTHRU */
22440
22441 case GE:
22442 case GT:
22443 case UNLE:
22444 case UNLT:
22445 /* These are not supported directly before AVX, and furthermore
22446 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22447 comparison operands to transform into something that is
22448 supported. */
22449 std::swap (*pop0, *pop1);
22450 code = swap_condition (code);
22451 break;
22452
22453 default:
22454 gcc_unreachable ();
22455 }
22456
22457 return code;
22458 }
22459
22460 /* Detect conditional moves that exactly match min/max operational
22461 semantics. Note that this is IEEE safe, as long as we don't
22462 interchange the operands.
22463
22464 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22465 and TRUE if the operation is successful and instructions are emitted. */
22466
22467 static bool
22468 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22469 rtx cmp_op1, rtx if_true, rtx if_false)
22470 {
22471 machine_mode mode;
22472 bool is_min;
22473 rtx tmp;
22474
22475 if (code == LT)
22476 ;
22477 else if (code == UNGE)
22478 std::swap (if_true, if_false);
22479 else
22480 return false;
22481
22482 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22483 is_min = true;
22484 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22485 is_min = false;
22486 else
22487 return false;
22488
22489 mode = GET_MODE (dest);
22490
22491 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22492 but MODE may be a vector mode and thus not appropriate. */
22493 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22494 {
22495 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22496 rtvec v;
22497
22498 if_true = force_reg (mode, if_true);
22499 v = gen_rtvec (2, if_true, if_false);
22500 tmp = gen_rtx_UNSPEC (mode, v, u);
22501 }
22502 else
22503 {
22504 code = is_min ? SMIN : SMAX;
22505 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22506 }
22507
22508 emit_insn (gen_rtx_SET (dest, tmp));
22509 return true;
22510 }
22511
22512 /* Expand an sse vector comparison. Return the register with the result. */
22513
22514 static rtx
22515 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22516 rtx op_true, rtx op_false)
22517 {
22518 machine_mode mode = GET_MODE (dest);
22519 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22520
22521 /* In general case result of comparison can differ from operands' type. */
22522 machine_mode cmp_mode;
22523
22524 /* In AVX512F the result of comparison is an integer mask. */
22525 bool maskcmp = false;
22526 rtx x;
22527
22528 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22529 {
22530 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22531 gcc_assert (cmp_mode != BLKmode);
22532
22533 maskcmp = true;
22534 }
22535 else
22536 cmp_mode = cmp_ops_mode;
22537
22538
22539 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22540 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22541 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22542
22543 if (optimize
22544 || reg_overlap_mentioned_p (dest, op_true)
22545 || reg_overlap_mentioned_p (dest, op_false))
22546 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22547
22548 /* Compare patterns for int modes are unspec in AVX512F only. */
22549 if (maskcmp && (code == GT || code == EQ))
22550 {
22551 rtx (*gen)(rtx, rtx, rtx);
22552
22553 switch (cmp_ops_mode)
22554 {
22555 case V64QImode:
22556 gcc_assert (TARGET_AVX512BW);
22557 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22558 break;
22559 case V32HImode:
22560 gcc_assert (TARGET_AVX512BW);
22561 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22562 break;
22563 case V16SImode:
22564 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22565 break;
22566 case V8DImode:
22567 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22568 break;
22569 default:
22570 gen = NULL;
22571 }
22572
22573 if (gen)
22574 {
22575 emit_insn (gen (dest, cmp_op0, cmp_op1));
22576 return dest;
22577 }
22578 }
22579 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22580
22581 if (cmp_mode != mode && !maskcmp)
22582 {
22583 x = force_reg (cmp_ops_mode, x);
22584 convert_move (dest, x, false);
22585 }
22586 else
22587 emit_insn (gen_rtx_SET (dest, x));
22588
22589 return dest;
22590 }
22591
22592 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22593 operations. This is used for both scalar and vector conditional moves. */
22594
22595 static void
22596 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22597 {
22598 machine_mode mode = GET_MODE (dest);
22599 machine_mode cmpmode = GET_MODE (cmp);
22600
22601 /* In AVX512F the result of comparison is an integer mask. */
22602 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22603
22604 rtx t2, t3, x;
22605
22606 if (vector_all_ones_operand (op_true, mode)
22607 && rtx_equal_p (op_false, CONST0_RTX (mode))
22608 && !maskcmp)
22609 {
22610 emit_insn (gen_rtx_SET (dest, cmp));
22611 }
22612 else if (op_false == CONST0_RTX (mode)
22613 && !maskcmp)
22614 {
22615 op_true = force_reg (mode, op_true);
22616 x = gen_rtx_AND (mode, cmp, op_true);
22617 emit_insn (gen_rtx_SET (dest, x));
22618 }
22619 else if (op_true == CONST0_RTX (mode)
22620 && !maskcmp)
22621 {
22622 op_false = force_reg (mode, op_false);
22623 x = gen_rtx_NOT (mode, cmp);
22624 x = gen_rtx_AND (mode, x, op_false);
22625 emit_insn (gen_rtx_SET (dest, x));
22626 }
22627 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22628 && !maskcmp)
22629 {
22630 op_false = force_reg (mode, op_false);
22631 x = gen_rtx_IOR (mode, cmp, op_false);
22632 emit_insn (gen_rtx_SET (dest, x));
22633 }
22634 else if (TARGET_XOP
22635 && !maskcmp)
22636 {
22637 op_true = force_reg (mode, op_true);
22638
22639 if (!nonimmediate_operand (op_false, mode))
22640 op_false = force_reg (mode, op_false);
22641
22642 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22643 op_true,
22644 op_false)));
22645 }
22646 else
22647 {
22648 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22649 rtx d = dest;
22650
22651 if (!nonimmediate_operand (op_true, mode))
22652 op_true = force_reg (mode, op_true);
22653
22654 op_false = force_reg (mode, op_false);
22655
22656 switch (mode)
22657 {
22658 case V4SFmode:
22659 if (TARGET_SSE4_1)
22660 gen = gen_sse4_1_blendvps;
22661 break;
22662 case V2DFmode:
22663 if (TARGET_SSE4_1)
22664 gen = gen_sse4_1_blendvpd;
22665 break;
22666 case V16QImode:
22667 case V8HImode:
22668 case V4SImode:
22669 case V2DImode:
22670 if (TARGET_SSE4_1)
22671 {
22672 gen = gen_sse4_1_pblendvb;
22673 if (mode != V16QImode)
22674 d = gen_reg_rtx (V16QImode);
22675 op_false = gen_lowpart (V16QImode, op_false);
22676 op_true = gen_lowpart (V16QImode, op_true);
22677 cmp = gen_lowpart (V16QImode, cmp);
22678 }
22679 break;
22680 case V8SFmode:
22681 if (TARGET_AVX)
22682 gen = gen_avx_blendvps256;
22683 break;
22684 case V4DFmode:
22685 if (TARGET_AVX)
22686 gen = gen_avx_blendvpd256;
22687 break;
22688 case V32QImode:
22689 case V16HImode:
22690 case V8SImode:
22691 case V4DImode:
22692 if (TARGET_AVX2)
22693 {
22694 gen = gen_avx2_pblendvb;
22695 if (mode != V32QImode)
22696 d = gen_reg_rtx (V32QImode);
22697 op_false = gen_lowpart (V32QImode, op_false);
22698 op_true = gen_lowpart (V32QImode, op_true);
22699 cmp = gen_lowpart (V32QImode, cmp);
22700 }
22701 break;
22702
22703 case V64QImode:
22704 gen = gen_avx512bw_blendmv64qi;
22705 break;
22706 case V32HImode:
22707 gen = gen_avx512bw_blendmv32hi;
22708 break;
22709 case V16SImode:
22710 gen = gen_avx512f_blendmv16si;
22711 break;
22712 case V8DImode:
22713 gen = gen_avx512f_blendmv8di;
22714 break;
22715 case V8DFmode:
22716 gen = gen_avx512f_blendmv8df;
22717 break;
22718 case V16SFmode:
22719 gen = gen_avx512f_blendmv16sf;
22720 break;
22721
22722 default:
22723 break;
22724 }
22725
22726 if (gen != NULL)
22727 {
22728 emit_insn (gen (d, op_false, op_true, cmp));
22729 if (d != dest)
22730 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22731 }
22732 else
22733 {
22734 op_true = force_reg (mode, op_true);
22735
22736 t2 = gen_reg_rtx (mode);
22737 if (optimize)
22738 t3 = gen_reg_rtx (mode);
22739 else
22740 t3 = dest;
22741
22742 x = gen_rtx_AND (mode, op_true, cmp);
22743 emit_insn (gen_rtx_SET (t2, x));
22744
22745 x = gen_rtx_NOT (mode, cmp);
22746 x = gen_rtx_AND (mode, x, op_false);
22747 emit_insn (gen_rtx_SET (t3, x));
22748
22749 x = gen_rtx_IOR (mode, t3, t2);
22750 emit_insn (gen_rtx_SET (dest, x));
22751 }
22752 }
22753 }
22754
22755 /* Expand a floating-point conditional move. Return true if successful. */
22756
22757 bool
22758 ix86_expand_fp_movcc (rtx operands[])
22759 {
22760 machine_mode mode = GET_MODE (operands[0]);
22761 enum rtx_code code = GET_CODE (operands[1]);
22762 rtx tmp, compare_op;
22763 rtx op0 = XEXP (operands[1], 0);
22764 rtx op1 = XEXP (operands[1], 1);
22765
22766 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22767 {
22768 machine_mode cmode;
22769
22770 /* Since we've no cmove for sse registers, don't force bad register
22771 allocation just to gain access to it. Deny movcc when the
22772 comparison mode doesn't match the move mode. */
22773 cmode = GET_MODE (op0);
22774 if (cmode == VOIDmode)
22775 cmode = GET_MODE (op1);
22776 if (cmode != mode)
22777 return false;
22778
22779 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22780 if (code == UNKNOWN)
22781 return false;
22782
22783 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22784 operands[2], operands[3]))
22785 return true;
22786
22787 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22788 operands[2], operands[3]);
22789 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22790 return true;
22791 }
22792
22793 if (GET_MODE (op0) == TImode
22794 || (GET_MODE (op0) == DImode
22795 && !TARGET_64BIT))
22796 return false;
22797
22798 /* The floating point conditional move instructions don't directly
22799 support conditions resulting from a signed integer comparison. */
22800
22801 compare_op = ix86_expand_compare (code, op0, op1);
22802 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22803 {
22804 tmp = gen_reg_rtx (QImode);
22805 ix86_expand_setcc (tmp, code, op0, op1);
22806
22807 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22808 }
22809
22810 emit_insn (gen_rtx_SET (operands[0],
22811 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22812 operands[2], operands[3])));
22813
22814 return true;
22815 }
22816
22817 /* Expand a floating-point vector conditional move; a vcond operation
22818 rather than a movcc operation. */
22819
22820 bool
22821 ix86_expand_fp_vcond (rtx operands[])
22822 {
22823 enum rtx_code code = GET_CODE (operands[3]);
22824 rtx cmp;
22825
22826 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
22827 &operands[4], &operands[5]);
22828 if (code == UNKNOWN)
22829 {
22830 rtx temp;
22831 switch (GET_CODE (operands[3]))
22832 {
22833 case LTGT:
22834 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
22835 operands[5], operands[0], operands[0]);
22836 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
22837 operands[5], operands[1], operands[2]);
22838 code = AND;
22839 break;
22840 case UNEQ:
22841 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
22842 operands[5], operands[0], operands[0]);
22843 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
22844 operands[5], operands[1], operands[2]);
22845 code = IOR;
22846 break;
22847 default:
22848 gcc_unreachable ();
22849 }
22850 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
22851 OPTAB_DIRECT);
22852 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
22853 return true;
22854 }
22855
22856 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
22857 operands[5], operands[1], operands[2]))
22858 return true;
22859
22860 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
22861 operands[1], operands[2]);
22862 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
22863 return true;
22864 }
22865
22866 /* Expand a signed/unsigned integral vector conditional move. */
22867
22868 bool
22869 ix86_expand_int_vcond (rtx operands[])
22870 {
22871 machine_mode data_mode = GET_MODE (operands[0]);
22872 machine_mode mode = GET_MODE (operands[4]);
22873 enum rtx_code code = GET_CODE (operands[3]);
22874 bool negate = false;
22875 rtx x, cop0, cop1;
22876
22877 cop0 = operands[4];
22878 cop1 = operands[5];
22879
22880 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
22881 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
22882 if ((code == LT || code == GE)
22883 && data_mode == mode
22884 && cop1 == CONST0_RTX (mode)
22885 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
22886 && GET_MODE_UNIT_SIZE (data_mode) > 1
22887 && GET_MODE_UNIT_SIZE (data_mode) <= 8
22888 && (GET_MODE_SIZE (data_mode) == 16
22889 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
22890 {
22891 rtx negop = operands[2 - (code == LT)];
22892 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
22893 if (negop == CONST1_RTX (data_mode))
22894 {
22895 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
22896 operands[0], 1, OPTAB_DIRECT);
22897 if (res != operands[0])
22898 emit_move_insn (operands[0], res);
22899 return true;
22900 }
22901 else if (GET_MODE_INNER (data_mode) != DImode
22902 && vector_all_ones_operand (negop, data_mode))
22903 {
22904 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
22905 operands[0], 0, OPTAB_DIRECT);
22906 if (res != operands[0])
22907 emit_move_insn (operands[0], res);
22908 return true;
22909 }
22910 }
22911
22912 if (!nonimmediate_operand (cop1, mode))
22913 cop1 = force_reg (mode, cop1);
22914 if (!general_operand (operands[1], data_mode))
22915 operands[1] = force_reg (data_mode, operands[1]);
22916 if (!general_operand (operands[2], data_mode))
22917 operands[2] = force_reg (data_mode, operands[2]);
22918
22919 /* XOP supports all of the comparisons on all 128-bit vector int types. */
22920 if (TARGET_XOP
22921 && (mode == V16QImode || mode == V8HImode
22922 || mode == V4SImode || mode == V2DImode))
22923 ;
22924 else
22925 {
22926 /* Canonicalize the comparison to EQ, GT, GTU. */
22927 switch (code)
22928 {
22929 case EQ:
22930 case GT:
22931 case GTU:
22932 break;
22933
22934 case NE:
22935 case LE:
22936 case LEU:
22937 code = reverse_condition (code);
22938 negate = true;
22939 break;
22940
22941 case GE:
22942 case GEU:
22943 code = reverse_condition (code);
22944 negate = true;
22945 /* FALLTHRU */
22946
22947 case LT:
22948 case LTU:
22949 std::swap (cop0, cop1);
22950 code = swap_condition (code);
22951 break;
22952
22953 default:
22954 gcc_unreachable ();
22955 }
22956
22957 /* Only SSE4.1/SSE4.2 supports V2DImode. */
22958 if (mode == V2DImode)
22959 {
22960 switch (code)
22961 {
22962 case EQ:
22963 /* SSE4.1 supports EQ. */
22964 if (!TARGET_SSE4_1)
22965 return false;
22966 break;
22967
22968 case GT:
22969 case GTU:
22970 /* SSE4.2 supports GT/GTU. */
22971 if (!TARGET_SSE4_2)
22972 return false;
22973 break;
22974
22975 default:
22976 gcc_unreachable ();
22977 }
22978 }
22979
22980 /* Unsigned parallel compare is not supported by the hardware.
22981 Play some tricks to turn this into a signed comparison
22982 against 0. */
22983 if (code == GTU)
22984 {
22985 cop0 = force_reg (mode, cop0);
22986
22987 switch (mode)
22988 {
22989 case V16SImode:
22990 case V8DImode:
22991 case V8SImode:
22992 case V4DImode:
22993 case V4SImode:
22994 case V2DImode:
22995 {
22996 rtx t1, t2, mask;
22997 rtx (*gen_sub3) (rtx, rtx, rtx);
22998
22999 switch (mode)
23000 {
23001 case V16SImode: gen_sub3 = gen_subv16si3; break;
23002 case V8DImode: gen_sub3 = gen_subv8di3; break;
23003 case V8SImode: gen_sub3 = gen_subv8si3; break;
23004 case V4DImode: gen_sub3 = gen_subv4di3; break;
23005 case V4SImode: gen_sub3 = gen_subv4si3; break;
23006 case V2DImode: gen_sub3 = gen_subv2di3; break;
23007 default:
23008 gcc_unreachable ();
23009 }
23010 /* Subtract (-(INT MAX) - 1) from both operands to make
23011 them signed. */
23012 mask = ix86_build_signbit_mask (mode, true, false);
23013 t1 = gen_reg_rtx (mode);
23014 emit_insn (gen_sub3 (t1, cop0, mask));
23015
23016 t2 = gen_reg_rtx (mode);
23017 emit_insn (gen_sub3 (t2, cop1, mask));
23018
23019 cop0 = t1;
23020 cop1 = t2;
23021 code = GT;
23022 }
23023 break;
23024
23025 case V64QImode:
23026 case V32HImode:
23027 case V32QImode:
23028 case V16HImode:
23029 case V16QImode:
23030 case V8HImode:
23031 /* Perform a parallel unsigned saturating subtraction. */
23032 x = gen_reg_rtx (mode);
23033 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
23034
23035 cop0 = x;
23036 cop1 = CONST0_RTX (mode);
23037 code = EQ;
23038 negate = !negate;
23039 break;
23040
23041 default:
23042 gcc_unreachable ();
23043 }
23044 }
23045 }
23046
23047 /* Allow the comparison to be done in one mode, but the movcc to
23048 happen in another mode. */
23049 if (data_mode == mode)
23050 {
23051 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
23052 operands[1+negate], operands[2-negate]);
23053 }
23054 else
23055 {
23056 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23057 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23058 operands[1+negate], operands[2-negate]);
23059 if (GET_MODE (x) == mode)
23060 x = gen_lowpart (data_mode, x);
23061 }
23062
23063 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23064 operands[2-negate]);
23065 return true;
23066 }
23067
23068 /* AVX512F does support 64-byte integer vector operations,
23069 thus the longest vector we are faced with is V64QImode. */
23070 #define MAX_VECT_LEN 64
23071
23072 struct expand_vec_perm_d
23073 {
23074 rtx target, op0, op1;
23075 unsigned char perm[MAX_VECT_LEN];
23076 machine_mode vmode;
23077 unsigned char nelt;
23078 bool one_operand_p;
23079 bool testing_p;
23080 };
23081
23082 static bool
23083 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23084 struct expand_vec_perm_d *d)
23085 {
23086 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23087 expander, so args are either in d, or in op0, op1 etc. */
23088 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23089 machine_mode maskmode = mode;
23090 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23091
23092 switch (mode)
23093 {
23094 case V8HImode:
23095 if (TARGET_AVX512VL && TARGET_AVX512BW)
23096 gen = gen_avx512vl_vpermi2varv8hi3;
23097 break;
23098 case V16HImode:
23099 if (TARGET_AVX512VL && TARGET_AVX512BW)
23100 gen = gen_avx512vl_vpermi2varv16hi3;
23101 break;
23102 case V64QImode:
23103 if (TARGET_AVX512VBMI)
23104 gen = gen_avx512bw_vpermi2varv64qi3;
23105 break;
23106 case V32HImode:
23107 if (TARGET_AVX512BW)
23108 gen = gen_avx512bw_vpermi2varv32hi3;
23109 break;
23110 case V4SImode:
23111 if (TARGET_AVX512VL)
23112 gen = gen_avx512vl_vpermi2varv4si3;
23113 break;
23114 case V8SImode:
23115 if (TARGET_AVX512VL)
23116 gen = gen_avx512vl_vpermi2varv8si3;
23117 break;
23118 case V16SImode:
23119 if (TARGET_AVX512F)
23120 gen = gen_avx512f_vpermi2varv16si3;
23121 break;
23122 case V4SFmode:
23123 if (TARGET_AVX512VL)
23124 {
23125 gen = gen_avx512vl_vpermi2varv4sf3;
23126 maskmode = V4SImode;
23127 }
23128 break;
23129 case V8SFmode:
23130 if (TARGET_AVX512VL)
23131 {
23132 gen = gen_avx512vl_vpermi2varv8sf3;
23133 maskmode = V8SImode;
23134 }
23135 break;
23136 case V16SFmode:
23137 if (TARGET_AVX512F)
23138 {
23139 gen = gen_avx512f_vpermi2varv16sf3;
23140 maskmode = V16SImode;
23141 }
23142 break;
23143 case V2DImode:
23144 if (TARGET_AVX512VL)
23145 gen = gen_avx512vl_vpermi2varv2di3;
23146 break;
23147 case V4DImode:
23148 if (TARGET_AVX512VL)
23149 gen = gen_avx512vl_vpermi2varv4di3;
23150 break;
23151 case V8DImode:
23152 if (TARGET_AVX512F)
23153 gen = gen_avx512f_vpermi2varv8di3;
23154 break;
23155 case V2DFmode:
23156 if (TARGET_AVX512VL)
23157 {
23158 gen = gen_avx512vl_vpermi2varv2df3;
23159 maskmode = V2DImode;
23160 }
23161 break;
23162 case V4DFmode:
23163 if (TARGET_AVX512VL)
23164 {
23165 gen = gen_avx512vl_vpermi2varv4df3;
23166 maskmode = V4DImode;
23167 }
23168 break;
23169 case V8DFmode:
23170 if (TARGET_AVX512F)
23171 {
23172 gen = gen_avx512f_vpermi2varv8df3;
23173 maskmode = V8DImode;
23174 }
23175 break;
23176 default:
23177 break;
23178 }
23179
23180 if (gen == NULL)
23181 return false;
23182
23183 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23184 expander, so args are either in d, or in op0, op1 etc. */
23185 if (d)
23186 {
23187 rtx vec[64];
23188 target = d->target;
23189 op0 = d->op0;
23190 op1 = d->op1;
23191 for (int i = 0; i < d->nelt; ++i)
23192 vec[i] = GEN_INT (d->perm[i]);
23193 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23194 }
23195
23196 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23197 return true;
23198 }
23199
23200 /* Expand a variable vector permutation. */
23201
23202 void
23203 ix86_expand_vec_perm (rtx operands[])
23204 {
23205 rtx target = operands[0];
23206 rtx op0 = operands[1];
23207 rtx op1 = operands[2];
23208 rtx mask = operands[3];
23209 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23210 machine_mode mode = GET_MODE (op0);
23211 machine_mode maskmode = GET_MODE (mask);
23212 int w, e, i;
23213 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23214
23215 /* Number of elements in the vector. */
23216 w = GET_MODE_NUNITS (mode);
23217 e = GET_MODE_UNIT_SIZE (mode);
23218 gcc_assert (w <= 64);
23219
23220 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23221 return;
23222
23223 if (TARGET_AVX2)
23224 {
23225 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23226 {
23227 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23228 an constant shuffle operand. With a tiny bit of effort we can
23229 use VPERMD instead. A re-interpretation stall for V4DFmode is
23230 unfortunate but there's no avoiding it.
23231 Similarly for V16HImode we don't have instructions for variable
23232 shuffling, while for V32QImode we can use after preparing suitable
23233 masks vpshufb; vpshufb; vpermq; vpor. */
23234
23235 if (mode == V16HImode)
23236 {
23237 maskmode = mode = V32QImode;
23238 w = 32;
23239 e = 1;
23240 }
23241 else
23242 {
23243 maskmode = mode = V8SImode;
23244 w = 8;
23245 e = 4;
23246 }
23247 t1 = gen_reg_rtx (maskmode);
23248
23249 /* Replicate the low bits of the V4DImode mask into V8SImode:
23250 mask = { A B C D }
23251 t1 = { A A B B C C D D }. */
23252 for (i = 0; i < w / 2; ++i)
23253 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23254 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23255 vt = force_reg (maskmode, vt);
23256 mask = gen_lowpart (maskmode, mask);
23257 if (maskmode == V8SImode)
23258 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23259 else
23260 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23261
23262 /* Multiply the shuffle indicies by two. */
23263 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23264 OPTAB_DIRECT);
23265
23266 /* Add one to the odd shuffle indicies:
23267 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23268 for (i = 0; i < w / 2; ++i)
23269 {
23270 vec[i * 2] = const0_rtx;
23271 vec[i * 2 + 1] = const1_rtx;
23272 }
23273 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23274 vt = validize_mem (force_const_mem (maskmode, vt));
23275 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23276 OPTAB_DIRECT);
23277
23278 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23279 operands[3] = mask = t1;
23280 target = gen_reg_rtx (mode);
23281 op0 = gen_lowpart (mode, op0);
23282 op1 = gen_lowpart (mode, op1);
23283 }
23284
23285 switch (mode)
23286 {
23287 case V8SImode:
23288 /* The VPERMD and VPERMPS instructions already properly ignore
23289 the high bits of the shuffle elements. No need for us to
23290 perform an AND ourselves. */
23291 if (one_operand_shuffle)
23292 {
23293 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23294 if (target != operands[0])
23295 emit_move_insn (operands[0],
23296 gen_lowpart (GET_MODE (operands[0]), target));
23297 }
23298 else
23299 {
23300 t1 = gen_reg_rtx (V8SImode);
23301 t2 = gen_reg_rtx (V8SImode);
23302 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23303 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23304 goto merge_two;
23305 }
23306 return;
23307
23308 case V8SFmode:
23309 mask = gen_lowpart (V8SImode, mask);
23310 if (one_operand_shuffle)
23311 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23312 else
23313 {
23314 t1 = gen_reg_rtx (V8SFmode);
23315 t2 = gen_reg_rtx (V8SFmode);
23316 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23317 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23318 goto merge_two;
23319 }
23320 return;
23321
23322 case V4SImode:
23323 /* By combining the two 128-bit input vectors into one 256-bit
23324 input vector, we can use VPERMD and VPERMPS for the full
23325 two-operand shuffle. */
23326 t1 = gen_reg_rtx (V8SImode);
23327 t2 = gen_reg_rtx (V8SImode);
23328 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23329 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23330 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23331 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23332 return;
23333
23334 case V4SFmode:
23335 t1 = gen_reg_rtx (V8SFmode);
23336 t2 = gen_reg_rtx (V8SImode);
23337 mask = gen_lowpart (V4SImode, mask);
23338 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23339 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23340 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23341 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23342 return;
23343
23344 case V32QImode:
23345 t1 = gen_reg_rtx (V32QImode);
23346 t2 = gen_reg_rtx (V32QImode);
23347 t3 = gen_reg_rtx (V32QImode);
23348 vt2 = GEN_INT (-128);
23349 for (i = 0; i < 32; i++)
23350 vec[i] = vt2;
23351 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23352 vt = force_reg (V32QImode, vt);
23353 for (i = 0; i < 32; i++)
23354 vec[i] = i < 16 ? vt2 : const0_rtx;
23355 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23356 vt2 = force_reg (V32QImode, vt2);
23357 /* From mask create two adjusted masks, which contain the same
23358 bits as mask in the low 7 bits of each vector element.
23359 The first mask will have the most significant bit clear
23360 if it requests element from the same 128-bit lane
23361 and MSB set if it requests element from the other 128-bit lane.
23362 The second mask will have the opposite values of the MSB,
23363 and additionally will have its 128-bit lanes swapped.
23364 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23365 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23366 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23367 stands for other 12 bytes. */
23368 /* The bit whether element is from the same lane or the other
23369 lane is bit 4, so shift it up by 3 to the MSB position. */
23370 t5 = gen_reg_rtx (V4DImode);
23371 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23372 GEN_INT (3)));
23373 /* Clear MSB bits from the mask just in case it had them set. */
23374 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23375 /* After this t1 will have MSB set for elements from other lane. */
23376 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23377 /* Clear bits other than MSB. */
23378 emit_insn (gen_andv32qi3 (t1, t1, vt));
23379 /* Or in the lower bits from mask into t3. */
23380 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23381 /* And invert MSB bits in t1, so MSB is set for elements from the same
23382 lane. */
23383 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23384 /* Swap 128-bit lanes in t3. */
23385 t6 = gen_reg_rtx (V4DImode);
23386 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23387 const2_rtx, GEN_INT (3),
23388 const0_rtx, const1_rtx));
23389 /* And or in the lower bits from mask into t1. */
23390 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23391 if (one_operand_shuffle)
23392 {
23393 /* Each of these shuffles will put 0s in places where
23394 element from the other 128-bit lane is needed, otherwise
23395 will shuffle in the requested value. */
23396 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23397 gen_lowpart (V32QImode, t6)));
23398 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23399 /* For t3 the 128-bit lanes are swapped again. */
23400 t7 = gen_reg_rtx (V4DImode);
23401 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23402 const2_rtx, GEN_INT (3),
23403 const0_rtx, const1_rtx));
23404 /* And oring both together leads to the result. */
23405 emit_insn (gen_iorv32qi3 (target, t1,
23406 gen_lowpart (V32QImode, t7)));
23407 if (target != operands[0])
23408 emit_move_insn (operands[0],
23409 gen_lowpart (GET_MODE (operands[0]), target));
23410 return;
23411 }
23412
23413 t4 = gen_reg_rtx (V32QImode);
23414 /* Similarly to the above one_operand_shuffle code,
23415 just for repeated twice for each operand. merge_two:
23416 code will merge the two results together. */
23417 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23418 gen_lowpart (V32QImode, t6)));
23419 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23420 gen_lowpart (V32QImode, t6)));
23421 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23422 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23423 t7 = gen_reg_rtx (V4DImode);
23424 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23425 const2_rtx, GEN_INT (3),
23426 const0_rtx, const1_rtx));
23427 t8 = gen_reg_rtx (V4DImode);
23428 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23429 const2_rtx, GEN_INT (3),
23430 const0_rtx, const1_rtx));
23431 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23432 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23433 t1 = t4;
23434 t2 = t3;
23435 goto merge_two;
23436
23437 default:
23438 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23439 break;
23440 }
23441 }
23442
23443 if (TARGET_XOP)
23444 {
23445 /* The XOP VPPERM insn supports three inputs. By ignoring the
23446 one_operand_shuffle special case, we avoid creating another
23447 set of constant vectors in memory. */
23448 one_operand_shuffle = false;
23449
23450 /* mask = mask & {2*w-1, ...} */
23451 vt = GEN_INT (2*w - 1);
23452 }
23453 else
23454 {
23455 /* mask = mask & {w-1, ...} */
23456 vt = GEN_INT (w - 1);
23457 }
23458
23459 for (i = 0; i < w; i++)
23460 vec[i] = vt;
23461 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23462 mask = expand_simple_binop (maskmode, AND, mask, vt,
23463 NULL_RTX, 0, OPTAB_DIRECT);
23464
23465 /* For non-QImode operations, convert the word permutation control
23466 into a byte permutation control. */
23467 if (mode != V16QImode)
23468 {
23469 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23470 GEN_INT (exact_log2 (e)),
23471 NULL_RTX, 0, OPTAB_DIRECT);
23472
23473 /* Convert mask to vector of chars. */
23474 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23475
23476 /* Replicate each of the input bytes into byte positions:
23477 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23478 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23479 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23480 for (i = 0; i < 16; ++i)
23481 vec[i] = GEN_INT (i/e * e);
23482 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23483 vt = validize_mem (force_const_mem (V16QImode, vt));
23484 if (TARGET_XOP)
23485 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23486 else
23487 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23488
23489 /* Convert it into the byte positions by doing
23490 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23491 for (i = 0; i < 16; ++i)
23492 vec[i] = GEN_INT (i % e);
23493 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23494 vt = validize_mem (force_const_mem (V16QImode, vt));
23495 emit_insn (gen_addv16qi3 (mask, mask, vt));
23496 }
23497
23498 /* The actual shuffle operations all operate on V16QImode. */
23499 op0 = gen_lowpart (V16QImode, op0);
23500 op1 = gen_lowpart (V16QImode, op1);
23501
23502 if (TARGET_XOP)
23503 {
23504 if (GET_MODE (target) != V16QImode)
23505 target = gen_reg_rtx (V16QImode);
23506 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23507 if (target != operands[0])
23508 emit_move_insn (operands[0],
23509 gen_lowpart (GET_MODE (operands[0]), target));
23510 }
23511 else if (one_operand_shuffle)
23512 {
23513 if (GET_MODE (target) != V16QImode)
23514 target = gen_reg_rtx (V16QImode);
23515 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23516 if (target != operands[0])
23517 emit_move_insn (operands[0],
23518 gen_lowpart (GET_MODE (operands[0]), target));
23519 }
23520 else
23521 {
23522 rtx xops[6];
23523 bool ok;
23524
23525 /* Shuffle the two input vectors independently. */
23526 t1 = gen_reg_rtx (V16QImode);
23527 t2 = gen_reg_rtx (V16QImode);
23528 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23529 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23530
23531 merge_two:
23532 /* Then merge them together. The key is whether any given control
23533 element contained a bit set that indicates the second word. */
23534 mask = operands[3];
23535 vt = GEN_INT (w);
23536 if (maskmode == V2DImode && !TARGET_SSE4_1)
23537 {
23538 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23539 more shuffle to convert the V2DI input mask into a V4SI
23540 input mask. At which point the masking that expand_int_vcond
23541 will work as desired. */
23542 rtx t3 = gen_reg_rtx (V4SImode);
23543 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23544 const0_rtx, const0_rtx,
23545 const2_rtx, const2_rtx));
23546 mask = t3;
23547 maskmode = V4SImode;
23548 e = w = 4;
23549 }
23550
23551 for (i = 0; i < w; i++)
23552 vec[i] = vt;
23553 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23554 vt = force_reg (maskmode, vt);
23555 mask = expand_simple_binop (maskmode, AND, mask, vt,
23556 NULL_RTX, 0, OPTAB_DIRECT);
23557
23558 if (GET_MODE (target) != mode)
23559 target = gen_reg_rtx (mode);
23560 xops[0] = target;
23561 xops[1] = gen_lowpart (mode, t2);
23562 xops[2] = gen_lowpart (mode, t1);
23563 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23564 xops[4] = mask;
23565 xops[5] = vt;
23566 ok = ix86_expand_int_vcond (xops);
23567 gcc_assert (ok);
23568 if (target != operands[0])
23569 emit_move_insn (operands[0],
23570 gen_lowpart (GET_MODE (operands[0]), target));
23571 }
23572 }
23573
23574 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23575 true if we should do zero extension, else sign extension. HIGH_P is
23576 true if we want the N/2 high elements, else the low elements. */
23577
23578 void
23579 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23580 {
23581 machine_mode imode = GET_MODE (src);
23582 rtx tmp;
23583
23584 if (TARGET_SSE4_1)
23585 {
23586 rtx (*unpack)(rtx, rtx);
23587 rtx (*extract)(rtx, rtx) = NULL;
23588 machine_mode halfmode = BLKmode;
23589
23590 switch (imode)
23591 {
23592 case V64QImode:
23593 if (unsigned_p)
23594 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23595 else
23596 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23597 halfmode = V32QImode;
23598 extract
23599 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23600 break;
23601 case V32QImode:
23602 if (unsigned_p)
23603 unpack = gen_avx2_zero_extendv16qiv16hi2;
23604 else
23605 unpack = gen_avx2_sign_extendv16qiv16hi2;
23606 halfmode = V16QImode;
23607 extract
23608 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23609 break;
23610 case V32HImode:
23611 if (unsigned_p)
23612 unpack = gen_avx512f_zero_extendv16hiv16si2;
23613 else
23614 unpack = gen_avx512f_sign_extendv16hiv16si2;
23615 halfmode = V16HImode;
23616 extract
23617 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23618 break;
23619 case V16HImode:
23620 if (unsigned_p)
23621 unpack = gen_avx2_zero_extendv8hiv8si2;
23622 else
23623 unpack = gen_avx2_sign_extendv8hiv8si2;
23624 halfmode = V8HImode;
23625 extract
23626 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23627 break;
23628 case V16SImode:
23629 if (unsigned_p)
23630 unpack = gen_avx512f_zero_extendv8siv8di2;
23631 else
23632 unpack = gen_avx512f_sign_extendv8siv8di2;
23633 halfmode = V8SImode;
23634 extract
23635 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23636 break;
23637 case V8SImode:
23638 if (unsigned_p)
23639 unpack = gen_avx2_zero_extendv4siv4di2;
23640 else
23641 unpack = gen_avx2_sign_extendv4siv4di2;
23642 halfmode = V4SImode;
23643 extract
23644 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23645 break;
23646 case V16QImode:
23647 if (unsigned_p)
23648 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23649 else
23650 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
23651 break;
23652 case V8HImode:
23653 if (unsigned_p)
23654 unpack = gen_sse4_1_zero_extendv4hiv4si2;
23655 else
23656 unpack = gen_sse4_1_sign_extendv4hiv4si2;
23657 break;
23658 case V4SImode:
23659 if (unsigned_p)
23660 unpack = gen_sse4_1_zero_extendv2siv2di2;
23661 else
23662 unpack = gen_sse4_1_sign_extendv2siv2di2;
23663 break;
23664 default:
23665 gcc_unreachable ();
23666 }
23667
23668 if (GET_MODE_SIZE (imode) >= 32)
23669 {
23670 tmp = gen_reg_rtx (halfmode);
23671 emit_insn (extract (tmp, src));
23672 }
23673 else if (high_p)
23674 {
23675 /* Shift higher 8 bytes to lower 8 bytes. */
23676 tmp = gen_reg_rtx (V1TImode);
23677 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
23678 GEN_INT (64)));
23679 tmp = gen_lowpart (imode, tmp);
23680 }
23681 else
23682 tmp = src;
23683
23684 emit_insn (unpack (dest, tmp));
23685 }
23686 else
23687 {
23688 rtx (*unpack)(rtx, rtx, rtx);
23689
23690 switch (imode)
23691 {
23692 case V16QImode:
23693 if (high_p)
23694 unpack = gen_vec_interleave_highv16qi;
23695 else
23696 unpack = gen_vec_interleave_lowv16qi;
23697 break;
23698 case V8HImode:
23699 if (high_p)
23700 unpack = gen_vec_interleave_highv8hi;
23701 else
23702 unpack = gen_vec_interleave_lowv8hi;
23703 break;
23704 case V4SImode:
23705 if (high_p)
23706 unpack = gen_vec_interleave_highv4si;
23707 else
23708 unpack = gen_vec_interleave_lowv4si;
23709 break;
23710 default:
23711 gcc_unreachable ();
23712 }
23713
23714 if (unsigned_p)
23715 tmp = force_reg (imode, CONST0_RTX (imode));
23716 else
23717 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
23718 src, pc_rtx, pc_rtx);
23719
23720 rtx tmp2 = gen_reg_rtx (imode);
23721 emit_insn (unpack (tmp2, src, tmp));
23722 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
23723 }
23724 }
23725
23726 /* Expand conditional increment or decrement using adb/sbb instructions.
23727 The default case using setcc followed by the conditional move can be
23728 done by generic code. */
23729 bool
23730 ix86_expand_int_addcc (rtx operands[])
23731 {
23732 enum rtx_code code = GET_CODE (operands[1]);
23733 rtx flags;
23734 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
23735 rtx compare_op;
23736 rtx val = const0_rtx;
23737 bool fpcmp = false;
23738 machine_mode mode;
23739 rtx op0 = XEXP (operands[1], 0);
23740 rtx op1 = XEXP (operands[1], 1);
23741
23742 if (operands[3] != const1_rtx
23743 && operands[3] != constm1_rtx)
23744 return false;
23745 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
23746 return false;
23747 code = GET_CODE (compare_op);
23748
23749 flags = XEXP (compare_op, 0);
23750
23751 if (GET_MODE (flags) == CCFPmode
23752 || GET_MODE (flags) == CCFPUmode)
23753 {
23754 fpcmp = true;
23755 code = ix86_fp_compare_code_to_integer (code);
23756 }
23757
23758 if (code != LTU)
23759 {
23760 val = constm1_rtx;
23761 if (fpcmp)
23762 PUT_CODE (compare_op,
23763 reverse_condition_maybe_unordered
23764 (GET_CODE (compare_op)));
23765 else
23766 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
23767 }
23768
23769 mode = GET_MODE (operands[0]);
23770
23771 /* Construct either adc or sbb insn. */
23772 if ((code == LTU) == (operands[3] == constm1_rtx))
23773 {
23774 switch (mode)
23775 {
23776 case QImode:
23777 insn = gen_subqi3_carry;
23778 break;
23779 case HImode:
23780 insn = gen_subhi3_carry;
23781 break;
23782 case SImode:
23783 insn = gen_subsi3_carry;
23784 break;
23785 case DImode:
23786 insn = gen_subdi3_carry;
23787 break;
23788 default:
23789 gcc_unreachable ();
23790 }
23791 }
23792 else
23793 {
23794 switch (mode)
23795 {
23796 case QImode:
23797 insn = gen_addqi3_carry;
23798 break;
23799 case HImode:
23800 insn = gen_addhi3_carry;
23801 break;
23802 case SImode:
23803 insn = gen_addsi3_carry;
23804 break;
23805 case DImode:
23806 insn = gen_adddi3_carry;
23807 break;
23808 default:
23809 gcc_unreachable ();
23810 }
23811 }
23812 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
23813
23814 return true;
23815 }
23816
23817
23818 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
23819 but works for floating pointer parameters and nonoffsetable memories.
23820 For pushes, it returns just stack offsets; the values will be saved
23821 in the right order. Maximally three parts are generated. */
23822
23823 static int
23824 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
23825 {
23826 int size;
23827
23828 if (!TARGET_64BIT)
23829 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
23830 else
23831 size = (GET_MODE_SIZE (mode) + 4) / 8;
23832
23833 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
23834 gcc_assert (size >= 2 && size <= 4);
23835
23836 /* Optimize constant pool reference to immediates. This is used by fp
23837 moves, that force all constants to memory to allow combining. */
23838 if (MEM_P (operand) && MEM_READONLY_P (operand))
23839 {
23840 rtx tmp = maybe_get_pool_constant (operand);
23841 if (tmp)
23842 operand = tmp;
23843 }
23844
23845 if (MEM_P (operand) && !offsettable_memref_p (operand))
23846 {
23847 /* The only non-offsetable memories we handle are pushes. */
23848 int ok = push_operand (operand, VOIDmode);
23849
23850 gcc_assert (ok);
23851
23852 operand = copy_rtx (operand);
23853 PUT_MODE (operand, word_mode);
23854 parts[0] = parts[1] = parts[2] = parts[3] = operand;
23855 return size;
23856 }
23857
23858 if (GET_CODE (operand) == CONST_VECTOR)
23859 {
23860 machine_mode imode = int_mode_for_mode (mode);
23861 /* Caution: if we looked through a constant pool memory above,
23862 the operand may actually have a different mode now. That's
23863 ok, since we want to pun this all the way back to an integer. */
23864 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
23865 gcc_assert (operand != NULL);
23866 mode = imode;
23867 }
23868
23869 if (!TARGET_64BIT)
23870 {
23871 if (mode == DImode)
23872 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
23873 else
23874 {
23875 int i;
23876
23877 if (REG_P (operand))
23878 {
23879 gcc_assert (reload_completed);
23880 for (i = 0; i < size; i++)
23881 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
23882 }
23883 else if (offsettable_memref_p (operand))
23884 {
23885 operand = adjust_address (operand, SImode, 0);
23886 parts[0] = operand;
23887 for (i = 1; i < size; i++)
23888 parts[i] = adjust_address (operand, SImode, 4 * i);
23889 }
23890 else if (CONST_DOUBLE_P (operand))
23891 {
23892 const REAL_VALUE_TYPE *r;
23893 long l[4];
23894
23895 r = CONST_DOUBLE_REAL_VALUE (operand);
23896 switch (mode)
23897 {
23898 case TFmode:
23899 real_to_target (l, r, mode);
23900 parts[3] = gen_int_mode (l[3], SImode);
23901 parts[2] = gen_int_mode (l[2], SImode);
23902 break;
23903 case XFmode:
23904 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
23905 long double may not be 80-bit. */
23906 real_to_target (l, r, mode);
23907 parts[2] = gen_int_mode (l[2], SImode);
23908 break;
23909 case DFmode:
23910 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
23911 break;
23912 default:
23913 gcc_unreachable ();
23914 }
23915 parts[1] = gen_int_mode (l[1], SImode);
23916 parts[0] = gen_int_mode (l[0], SImode);
23917 }
23918 else
23919 gcc_unreachable ();
23920 }
23921 }
23922 else
23923 {
23924 if (mode == TImode)
23925 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
23926 if (mode == XFmode || mode == TFmode)
23927 {
23928 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
23929 if (REG_P (operand))
23930 {
23931 gcc_assert (reload_completed);
23932 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
23933 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
23934 }
23935 else if (offsettable_memref_p (operand))
23936 {
23937 operand = adjust_address (operand, DImode, 0);
23938 parts[0] = operand;
23939 parts[1] = adjust_address (operand, upper_mode, 8);
23940 }
23941 else if (CONST_DOUBLE_P (operand))
23942 {
23943 long l[4];
23944
23945 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
23946
23947 /* real_to_target puts 32-bit pieces in each long. */
23948 parts[0] =
23949 gen_int_mode
23950 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
23951 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
23952 DImode);
23953
23954 if (upper_mode == SImode)
23955 parts[1] = gen_int_mode (l[2], SImode);
23956 else
23957 parts[1] =
23958 gen_int_mode
23959 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
23960 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
23961 DImode);
23962 }
23963 else
23964 gcc_unreachable ();
23965 }
23966 }
23967
23968 return size;
23969 }
23970
23971 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
23972 Return false when normal moves are needed; true when all required
23973 insns have been emitted. Operands 2-4 contain the input values
23974 int the correct order; operands 5-7 contain the output values. */
23975
23976 void
23977 ix86_split_long_move (rtx operands[])
23978 {
23979 rtx part[2][4];
23980 int nparts, i, j;
23981 int push = 0;
23982 int collisions = 0;
23983 machine_mode mode = GET_MODE (operands[0]);
23984 bool collisionparts[4];
23985
23986 /* The DFmode expanders may ask us to move double.
23987 For 64bit target this is single move. By hiding the fact
23988 here we simplify i386.md splitters. */
23989 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
23990 {
23991 /* Optimize constant pool reference to immediates. This is used by
23992 fp moves, that force all constants to memory to allow combining. */
23993
23994 if (MEM_P (operands[1])
23995 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
23996 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
23997 operands[1] = get_pool_constant (XEXP (operands[1], 0));
23998 if (push_operand (operands[0], VOIDmode))
23999 {
24000 operands[0] = copy_rtx (operands[0]);
24001 PUT_MODE (operands[0], word_mode);
24002 }
24003 else
24004 operands[0] = gen_lowpart (DImode, operands[0]);
24005 operands[1] = gen_lowpart (DImode, operands[1]);
24006 emit_move_insn (operands[0], operands[1]);
24007 return;
24008 }
24009
24010 /* The only non-offsettable memory we handle is push. */
24011 if (push_operand (operands[0], VOIDmode))
24012 push = 1;
24013 else
24014 gcc_assert (!MEM_P (operands[0])
24015 || offsettable_memref_p (operands[0]));
24016
24017 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24018 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24019
24020 /* When emitting push, take care for source operands on the stack. */
24021 if (push && MEM_P (operands[1])
24022 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24023 {
24024 rtx src_base = XEXP (part[1][nparts - 1], 0);
24025
24026 /* Compensate for the stack decrement by 4. */
24027 if (!TARGET_64BIT && nparts == 3
24028 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24029 src_base = plus_constant (Pmode, src_base, 4);
24030
24031 /* src_base refers to the stack pointer and is
24032 automatically decreased by emitted push. */
24033 for (i = 0; i < nparts; i++)
24034 part[1][i] = change_address (part[1][i],
24035 GET_MODE (part[1][i]), src_base);
24036 }
24037
24038 /* We need to do copy in the right order in case an address register
24039 of the source overlaps the destination. */
24040 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24041 {
24042 rtx tmp;
24043
24044 for (i = 0; i < nparts; i++)
24045 {
24046 collisionparts[i]
24047 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24048 if (collisionparts[i])
24049 collisions++;
24050 }
24051
24052 /* Collision in the middle part can be handled by reordering. */
24053 if (collisions == 1 && nparts == 3 && collisionparts [1])
24054 {
24055 std::swap (part[0][1], part[0][2]);
24056 std::swap (part[1][1], part[1][2]);
24057 }
24058 else if (collisions == 1
24059 && nparts == 4
24060 && (collisionparts [1] || collisionparts [2]))
24061 {
24062 if (collisionparts [1])
24063 {
24064 std::swap (part[0][1], part[0][2]);
24065 std::swap (part[1][1], part[1][2]);
24066 }
24067 else
24068 {
24069 std::swap (part[0][2], part[0][3]);
24070 std::swap (part[1][2], part[1][3]);
24071 }
24072 }
24073
24074 /* If there are more collisions, we can't handle it by reordering.
24075 Do an lea to the last part and use only one colliding move. */
24076 else if (collisions > 1)
24077 {
24078 rtx base, addr, tls_base = NULL_RTX;
24079
24080 collisions = 1;
24081
24082 base = part[0][nparts - 1];
24083
24084 /* Handle the case when the last part isn't valid for lea.
24085 Happens in 64-bit mode storing the 12-byte XFmode. */
24086 if (GET_MODE (base) != Pmode)
24087 base = gen_rtx_REG (Pmode, REGNO (base));
24088
24089 addr = XEXP (part[1][0], 0);
24090 if (TARGET_TLS_DIRECT_SEG_REFS)
24091 {
24092 struct ix86_address parts;
24093 int ok = ix86_decompose_address (addr, &parts);
24094 gcc_assert (ok);
24095 if (parts.seg == DEFAULT_TLS_SEG_REG)
24096 {
24097 /* It is not valid to use %gs: or %fs: in
24098 lea though, so we need to remove it from the
24099 address used for lea and add it to each individual
24100 memory loads instead. */
24101 addr = copy_rtx (addr);
24102 rtx *x = &addr;
24103 while (GET_CODE (*x) == PLUS)
24104 {
24105 for (i = 0; i < 2; i++)
24106 {
24107 rtx u = XEXP (*x, i);
24108 if (GET_CODE (u) == ZERO_EXTEND)
24109 u = XEXP (u, 0);
24110 if (GET_CODE (u) == UNSPEC
24111 && XINT (u, 1) == UNSPEC_TP)
24112 {
24113 tls_base = XEXP (*x, i);
24114 *x = XEXP (*x, 1 - i);
24115 break;
24116 }
24117 }
24118 if (tls_base)
24119 break;
24120 x = &XEXP (*x, 0);
24121 }
24122 gcc_assert (tls_base);
24123 }
24124 }
24125 emit_insn (gen_rtx_SET (base, addr));
24126 if (tls_base)
24127 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24128 part[1][0] = replace_equiv_address (part[1][0], base);
24129 for (i = 1; i < nparts; i++)
24130 {
24131 if (tls_base)
24132 base = copy_rtx (base);
24133 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24134 part[1][i] = replace_equiv_address (part[1][i], tmp);
24135 }
24136 }
24137 }
24138
24139 if (push)
24140 {
24141 if (!TARGET_64BIT)
24142 {
24143 if (nparts == 3)
24144 {
24145 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24146 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24147 stack_pointer_rtx, GEN_INT (-4)));
24148 emit_move_insn (part[0][2], part[1][2]);
24149 }
24150 else if (nparts == 4)
24151 {
24152 emit_move_insn (part[0][3], part[1][3]);
24153 emit_move_insn (part[0][2], part[1][2]);
24154 }
24155 }
24156 else
24157 {
24158 /* In 64bit mode we don't have 32bit push available. In case this is
24159 register, it is OK - we will just use larger counterpart. We also
24160 retype memory - these comes from attempt to avoid REX prefix on
24161 moving of second half of TFmode value. */
24162 if (GET_MODE (part[1][1]) == SImode)
24163 {
24164 switch (GET_CODE (part[1][1]))
24165 {
24166 case MEM:
24167 part[1][1] = adjust_address (part[1][1], DImode, 0);
24168 break;
24169
24170 case REG:
24171 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24172 break;
24173
24174 default:
24175 gcc_unreachable ();
24176 }
24177
24178 if (GET_MODE (part[1][0]) == SImode)
24179 part[1][0] = part[1][1];
24180 }
24181 }
24182 emit_move_insn (part[0][1], part[1][1]);
24183 emit_move_insn (part[0][0], part[1][0]);
24184 return;
24185 }
24186
24187 /* Choose correct order to not overwrite the source before it is copied. */
24188 if ((REG_P (part[0][0])
24189 && REG_P (part[1][1])
24190 && (REGNO (part[0][0]) == REGNO (part[1][1])
24191 || (nparts == 3
24192 && REGNO (part[0][0]) == REGNO (part[1][2]))
24193 || (nparts == 4
24194 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24195 || (collisions > 0
24196 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24197 {
24198 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24199 {
24200 operands[2 + i] = part[0][j];
24201 operands[6 + i] = part[1][j];
24202 }
24203 }
24204 else
24205 {
24206 for (i = 0; i < nparts; i++)
24207 {
24208 operands[2 + i] = part[0][i];
24209 operands[6 + i] = part[1][i];
24210 }
24211 }
24212
24213 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24214 if (optimize_insn_for_size_p ())
24215 {
24216 for (j = 0; j < nparts - 1; j++)
24217 if (CONST_INT_P (operands[6 + j])
24218 && operands[6 + j] != const0_rtx
24219 && REG_P (operands[2 + j]))
24220 for (i = j; i < nparts - 1; i++)
24221 if (CONST_INT_P (operands[7 + i])
24222 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24223 operands[7 + i] = operands[2 + j];
24224 }
24225
24226 for (i = 0; i < nparts; i++)
24227 emit_move_insn (operands[2 + i], operands[6 + i]);
24228
24229 return;
24230 }
24231
24232 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24233 left shift by a constant, either using a single shift or
24234 a sequence of add instructions. */
24235
24236 static void
24237 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24238 {
24239 rtx (*insn)(rtx, rtx, rtx);
24240
24241 if (count == 1
24242 || (count * ix86_cost->add <= ix86_cost->shift_const
24243 && !optimize_insn_for_size_p ()))
24244 {
24245 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24246 while (count-- > 0)
24247 emit_insn (insn (operand, operand, operand));
24248 }
24249 else
24250 {
24251 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24252 emit_insn (insn (operand, operand, GEN_INT (count)));
24253 }
24254 }
24255
24256 void
24257 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24258 {
24259 rtx (*gen_ashl3)(rtx, rtx, rtx);
24260 rtx (*gen_shld)(rtx, rtx, rtx);
24261 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24262
24263 rtx low[2], high[2];
24264 int count;
24265
24266 if (CONST_INT_P (operands[2]))
24267 {
24268 split_double_mode (mode, operands, 2, low, high);
24269 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24270
24271 if (count >= half_width)
24272 {
24273 emit_move_insn (high[0], low[1]);
24274 emit_move_insn (low[0], const0_rtx);
24275
24276 if (count > half_width)
24277 ix86_expand_ashl_const (high[0], count - half_width, mode);
24278 }
24279 else
24280 {
24281 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24282
24283 if (!rtx_equal_p (operands[0], operands[1]))
24284 emit_move_insn (operands[0], operands[1]);
24285
24286 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24287 ix86_expand_ashl_const (low[0], count, mode);
24288 }
24289 return;
24290 }
24291
24292 split_double_mode (mode, operands, 1, low, high);
24293
24294 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24295
24296 if (operands[1] == const1_rtx)
24297 {
24298 /* Assuming we've chosen a QImode capable registers, then 1 << N
24299 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24300 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24301 {
24302 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24303
24304 ix86_expand_clear (low[0]);
24305 ix86_expand_clear (high[0]);
24306 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24307
24308 d = gen_lowpart (QImode, low[0]);
24309 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24310 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24311 emit_insn (gen_rtx_SET (d, s));
24312
24313 d = gen_lowpart (QImode, high[0]);
24314 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24315 s = gen_rtx_NE (QImode, flags, const0_rtx);
24316 emit_insn (gen_rtx_SET (d, s));
24317 }
24318
24319 /* Otherwise, we can get the same results by manually performing
24320 a bit extract operation on bit 5/6, and then performing the two
24321 shifts. The two methods of getting 0/1 into low/high are exactly
24322 the same size. Avoiding the shift in the bit extract case helps
24323 pentium4 a bit; no one else seems to care much either way. */
24324 else
24325 {
24326 machine_mode half_mode;
24327 rtx (*gen_lshr3)(rtx, rtx, rtx);
24328 rtx (*gen_and3)(rtx, rtx, rtx);
24329 rtx (*gen_xor3)(rtx, rtx, rtx);
24330 HOST_WIDE_INT bits;
24331 rtx x;
24332
24333 if (mode == DImode)
24334 {
24335 half_mode = SImode;
24336 gen_lshr3 = gen_lshrsi3;
24337 gen_and3 = gen_andsi3;
24338 gen_xor3 = gen_xorsi3;
24339 bits = 5;
24340 }
24341 else
24342 {
24343 half_mode = DImode;
24344 gen_lshr3 = gen_lshrdi3;
24345 gen_and3 = gen_anddi3;
24346 gen_xor3 = gen_xordi3;
24347 bits = 6;
24348 }
24349
24350 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24351 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24352 else
24353 x = gen_lowpart (half_mode, operands[2]);
24354 emit_insn (gen_rtx_SET (high[0], x));
24355
24356 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24357 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24358 emit_move_insn (low[0], high[0]);
24359 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24360 }
24361
24362 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24363 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24364 return;
24365 }
24366
24367 if (operands[1] == constm1_rtx)
24368 {
24369 /* For -1 << N, we can avoid the shld instruction, because we
24370 know that we're shifting 0...31/63 ones into a -1. */
24371 emit_move_insn (low[0], constm1_rtx);
24372 if (optimize_insn_for_size_p ())
24373 emit_move_insn (high[0], low[0]);
24374 else
24375 emit_move_insn (high[0], constm1_rtx);
24376 }
24377 else
24378 {
24379 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24380
24381 if (!rtx_equal_p (operands[0], operands[1]))
24382 emit_move_insn (operands[0], operands[1]);
24383
24384 split_double_mode (mode, operands, 1, low, high);
24385 emit_insn (gen_shld (high[0], low[0], operands[2]));
24386 }
24387
24388 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24389
24390 if (TARGET_CMOVE && scratch)
24391 {
24392 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24393 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24394
24395 ix86_expand_clear (scratch);
24396 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24397 }
24398 else
24399 {
24400 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24401 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24402
24403 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24404 }
24405 }
24406
24407 void
24408 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24409 {
24410 rtx (*gen_ashr3)(rtx, rtx, rtx)
24411 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24412 rtx (*gen_shrd)(rtx, rtx, rtx);
24413 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24414
24415 rtx low[2], high[2];
24416 int count;
24417
24418 if (CONST_INT_P (operands[2]))
24419 {
24420 split_double_mode (mode, operands, 2, low, high);
24421 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24422
24423 if (count == GET_MODE_BITSIZE (mode) - 1)
24424 {
24425 emit_move_insn (high[0], high[1]);
24426 emit_insn (gen_ashr3 (high[0], high[0],
24427 GEN_INT (half_width - 1)));
24428 emit_move_insn (low[0], high[0]);
24429
24430 }
24431 else if (count >= half_width)
24432 {
24433 emit_move_insn (low[0], high[1]);
24434 emit_move_insn (high[0], low[0]);
24435 emit_insn (gen_ashr3 (high[0], high[0],
24436 GEN_INT (half_width - 1)));
24437
24438 if (count > half_width)
24439 emit_insn (gen_ashr3 (low[0], low[0],
24440 GEN_INT (count - half_width)));
24441 }
24442 else
24443 {
24444 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24445
24446 if (!rtx_equal_p (operands[0], operands[1]))
24447 emit_move_insn (operands[0], operands[1]);
24448
24449 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24450 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24451 }
24452 }
24453 else
24454 {
24455 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24456
24457 if (!rtx_equal_p (operands[0], operands[1]))
24458 emit_move_insn (operands[0], operands[1]);
24459
24460 split_double_mode (mode, operands, 1, low, high);
24461
24462 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24463 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24464
24465 if (TARGET_CMOVE && scratch)
24466 {
24467 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24468 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24469
24470 emit_move_insn (scratch, high[0]);
24471 emit_insn (gen_ashr3 (scratch, scratch,
24472 GEN_INT (half_width - 1)));
24473 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24474 scratch));
24475 }
24476 else
24477 {
24478 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24479 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24480
24481 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24482 }
24483 }
24484 }
24485
24486 void
24487 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24488 {
24489 rtx (*gen_lshr3)(rtx, rtx, rtx)
24490 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24491 rtx (*gen_shrd)(rtx, rtx, rtx);
24492 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24493
24494 rtx low[2], high[2];
24495 int count;
24496
24497 if (CONST_INT_P (operands[2]))
24498 {
24499 split_double_mode (mode, operands, 2, low, high);
24500 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24501
24502 if (count >= half_width)
24503 {
24504 emit_move_insn (low[0], high[1]);
24505 ix86_expand_clear (high[0]);
24506
24507 if (count > half_width)
24508 emit_insn (gen_lshr3 (low[0], low[0],
24509 GEN_INT (count - half_width)));
24510 }
24511 else
24512 {
24513 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24514
24515 if (!rtx_equal_p (operands[0], operands[1]))
24516 emit_move_insn (operands[0], operands[1]);
24517
24518 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24519 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24520 }
24521 }
24522 else
24523 {
24524 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24525
24526 if (!rtx_equal_p (operands[0], operands[1]))
24527 emit_move_insn (operands[0], operands[1]);
24528
24529 split_double_mode (mode, operands, 1, low, high);
24530
24531 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24532 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24533
24534 if (TARGET_CMOVE && scratch)
24535 {
24536 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24537 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24538
24539 ix86_expand_clear (scratch);
24540 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24541 scratch));
24542 }
24543 else
24544 {
24545 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24546 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24547
24548 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24549 }
24550 }
24551 }
24552
24553 /* Predict just emitted jump instruction to be taken with probability PROB. */
24554 static void
24555 predict_jump (int prob)
24556 {
24557 rtx insn = get_last_insn ();
24558 gcc_assert (JUMP_P (insn));
24559 add_int_reg_note (insn, REG_BR_PROB, prob);
24560 }
24561
24562 /* Helper function for the string operations below. Dest VARIABLE whether
24563 it is aligned to VALUE bytes. If true, jump to the label. */
24564 static rtx_code_label *
24565 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24566 {
24567 rtx_code_label *label = gen_label_rtx ();
24568 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24569 if (GET_MODE (variable) == DImode)
24570 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24571 else
24572 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24573 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24574 1, label);
24575 if (epilogue)
24576 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24577 else
24578 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24579 return label;
24580 }
24581
24582 /* Adjust COUNTER by the VALUE. */
24583 static void
24584 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24585 {
24586 rtx (*gen_add)(rtx, rtx, rtx)
24587 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24588
24589 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24590 }
24591
24592 /* Zero extend possibly SImode EXP to Pmode register. */
24593 rtx
24594 ix86_zero_extend_to_Pmode (rtx exp)
24595 {
24596 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24597 }
24598
24599 /* Divide COUNTREG by SCALE. */
24600 static rtx
24601 scale_counter (rtx countreg, int scale)
24602 {
24603 rtx sc;
24604
24605 if (scale == 1)
24606 return countreg;
24607 if (CONST_INT_P (countreg))
24608 return GEN_INT (INTVAL (countreg) / scale);
24609 gcc_assert (REG_P (countreg));
24610
24611 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24612 GEN_INT (exact_log2 (scale)),
24613 NULL, 1, OPTAB_DIRECT);
24614 return sc;
24615 }
24616
24617 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24618 DImode for constant loop counts. */
24619
24620 static machine_mode
24621 counter_mode (rtx count_exp)
24622 {
24623 if (GET_MODE (count_exp) != VOIDmode)
24624 return GET_MODE (count_exp);
24625 if (!CONST_INT_P (count_exp))
24626 return Pmode;
24627 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24628 return DImode;
24629 return SImode;
24630 }
24631
24632 /* Copy the address to a Pmode register. This is used for x32 to
24633 truncate DImode TLS address to a SImode register. */
24634
24635 static rtx
24636 ix86_copy_addr_to_reg (rtx addr)
24637 {
24638 rtx reg;
24639 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24640 {
24641 reg = copy_addr_to_reg (addr);
24642 REG_POINTER (reg) = 1;
24643 return reg;
24644 }
24645 else
24646 {
24647 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24648 reg = copy_to_mode_reg (DImode, addr);
24649 REG_POINTER (reg) = 1;
24650 return gen_rtx_SUBREG (SImode, reg, 0);
24651 }
24652 }
24653
24654 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
24655 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
24656 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
24657 memory by VALUE (supposed to be in MODE).
24658
24659 The size is rounded down to whole number of chunk size moved at once.
24660 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
24661
24662
24663 static void
24664 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
24665 rtx destptr, rtx srcptr, rtx value,
24666 rtx count, machine_mode mode, int unroll,
24667 int expected_size, bool issetmem)
24668 {
24669 rtx_code_label *out_label, *top_label;
24670 rtx iter, tmp;
24671 machine_mode iter_mode = counter_mode (count);
24672 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
24673 rtx piece_size = GEN_INT (piece_size_n);
24674 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
24675 rtx size;
24676 int i;
24677
24678 top_label = gen_label_rtx ();
24679 out_label = gen_label_rtx ();
24680 iter = gen_reg_rtx (iter_mode);
24681
24682 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
24683 NULL, 1, OPTAB_DIRECT);
24684 /* Those two should combine. */
24685 if (piece_size == const1_rtx)
24686 {
24687 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
24688 true, out_label);
24689 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24690 }
24691 emit_move_insn (iter, const0_rtx);
24692
24693 emit_label (top_label);
24694
24695 tmp = convert_modes (Pmode, iter_mode, iter, true);
24696
24697 /* This assert could be relaxed - in this case we'll need to compute
24698 smallest power of two, containing in PIECE_SIZE_N and pass it to
24699 offset_address. */
24700 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
24701 destmem = offset_address (destmem, tmp, piece_size_n);
24702 destmem = adjust_address (destmem, mode, 0);
24703
24704 if (!issetmem)
24705 {
24706 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
24707 srcmem = adjust_address (srcmem, mode, 0);
24708
24709 /* When unrolling for chips that reorder memory reads and writes,
24710 we can save registers by using single temporary.
24711 Also using 4 temporaries is overkill in 32bit mode. */
24712 if (!TARGET_64BIT && 0)
24713 {
24714 for (i = 0; i < unroll; i++)
24715 {
24716 if (i)
24717 {
24718 destmem =
24719 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
24720 srcmem =
24721 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
24722 }
24723 emit_move_insn (destmem, srcmem);
24724 }
24725 }
24726 else
24727 {
24728 rtx tmpreg[4];
24729 gcc_assert (unroll <= 4);
24730 for (i = 0; i < unroll; i++)
24731 {
24732 tmpreg[i] = gen_reg_rtx (mode);
24733 if (i)
24734 {
24735 srcmem =
24736 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
24737 }
24738 emit_move_insn (tmpreg[i], srcmem);
24739 }
24740 for (i = 0; i < unroll; i++)
24741 {
24742 if (i)
24743 {
24744 destmem =
24745 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
24746 }
24747 emit_move_insn (destmem, tmpreg[i]);
24748 }
24749 }
24750 }
24751 else
24752 for (i = 0; i < unroll; i++)
24753 {
24754 if (i)
24755 destmem =
24756 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
24757 emit_move_insn (destmem, value);
24758 }
24759
24760 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
24761 true, OPTAB_LIB_WIDEN);
24762 if (tmp != iter)
24763 emit_move_insn (iter, tmp);
24764
24765 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
24766 true, top_label);
24767 if (expected_size != -1)
24768 {
24769 expected_size /= GET_MODE_SIZE (mode) * unroll;
24770 if (expected_size == 0)
24771 predict_jump (0);
24772 else if (expected_size > REG_BR_PROB_BASE)
24773 predict_jump (REG_BR_PROB_BASE - 1);
24774 else
24775 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
24776 }
24777 else
24778 predict_jump (REG_BR_PROB_BASE * 80 / 100);
24779 iter = ix86_zero_extend_to_Pmode (iter);
24780 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
24781 true, OPTAB_LIB_WIDEN);
24782 if (tmp != destptr)
24783 emit_move_insn (destptr, tmp);
24784 if (!issetmem)
24785 {
24786 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
24787 true, OPTAB_LIB_WIDEN);
24788 if (tmp != srcptr)
24789 emit_move_insn (srcptr, tmp);
24790 }
24791 emit_label (out_label);
24792 }
24793
24794 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
24795 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
24796 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
24797 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
24798 ORIG_VALUE is the original value passed to memset to fill the memory with.
24799 Other arguments have same meaning as for previous function. */
24800
24801 static void
24802 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
24803 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
24804 rtx count,
24805 machine_mode mode, bool issetmem)
24806 {
24807 rtx destexp;
24808 rtx srcexp;
24809 rtx countreg;
24810 HOST_WIDE_INT rounded_count;
24811
24812 /* If possible, it is shorter to use rep movs.
24813 TODO: Maybe it is better to move this logic to decide_alg. */
24814 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
24815 && (!issetmem || orig_value == const0_rtx))
24816 mode = SImode;
24817
24818 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
24819 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
24820
24821 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
24822 GET_MODE_SIZE (mode)));
24823 if (mode != QImode)
24824 {
24825 destexp = gen_rtx_ASHIFT (Pmode, countreg,
24826 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
24827 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
24828 }
24829 else
24830 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
24831 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
24832 {
24833 rounded_count
24834 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
24835 destmem = shallow_copy_rtx (destmem);
24836 set_mem_size (destmem, rounded_count);
24837 }
24838 else if (MEM_SIZE_KNOWN_P (destmem))
24839 clear_mem_size (destmem);
24840
24841 if (issetmem)
24842 {
24843 value = force_reg (mode, gen_lowpart (mode, value));
24844 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
24845 }
24846 else
24847 {
24848 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
24849 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
24850 if (mode != QImode)
24851 {
24852 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
24853 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
24854 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
24855 }
24856 else
24857 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
24858 if (CONST_INT_P (count))
24859 {
24860 rounded_count
24861 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
24862 srcmem = shallow_copy_rtx (srcmem);
24863 set_mem_size (srcmem, rounded_count);
24864 }
24865 else
24866 {
24867 if (MEM_SIZE_KNOWN_P (srcmem))
24868 clear_mem_size (srcmem);
24869 }
24870 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
24871 destexp, srcexp));
24872 }
24873 }
24874
24875 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
24876 DESTMEM.
24877 SRC is passed by pointer to be updated on return.
24878 Return value is updated DST. */
24879 static rtx
24880 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
24881 HOST_WIDE_INT size_to_move)
24882 {
24883 rtx dst = destmem, src = *srcmem, adjust, tempreg;
24884 enum insn_code code;
24885 machine_mode move_mode;
24886 int piece_size, i;
24887
24888 /* Find the widest mode in which we could perform moves.
24889 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
24890 it until move of such size is supported. */
24891 piece_size = 1 << floor_log2 (size_to_move);
24892 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
24893 code = optab_handler (mov_optab, move_mode);
24894 while (code == CODE_FOR_nothing && piece_size > 1)
24895 {
24896 piece_size >>= 1;
24897 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
24898 code = optab_handler (mov_optab, move_mode);
24899 }
24900
24901 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24902 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24903 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24904 {
24905 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24906 move_mode = mode_for_vector (word_mode, nunits);
24907 code = optab_handler (mov_optab, move_mode);
24908 if (code == CODE_FOR_nothing)
24909 {
24910 move_mode = word_mode;
24911 piece_size = GET_MODE_SIZE (move_mode);
24912 code = optab_handler (mov_optab, move_mode);
24913 }
24914 }
24915 gcc_assert (code != CODE_FOR_nothing);
24916
24917 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24918 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
24919
24920 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
24921 gcc_assert (size_to_move % piece_size == 0);
24922 adjust = GEN_INT (piece_size);
24923 for (i = 0; i < size_to_move; i += piece_size)
24924 {
24925 /* We move from memory to memory, so we'll need to do it via
24926 a temporary register. */
24927 tempreg = gen_reg_rtx (move_mode);
24928 emit_insn (GEN_FCN (code) (tempreg, src));
24929 emit_insn (GEN_FCN (code) (dst, tempreg));
24930
24931 emit_move_insn (destptr,
24932 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24933 emit_move_insn (srcptr,
24934 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
24935
24936 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24937 piece_size);
24938 src = adjust_automodify_address_nv (src, move_mode, srcptr,
24939 piece_size);
24940 }
24941
24942 /* Update DST and SRC rtx. */
24943 *srcmem = src;
24944 return dst;
24945 }
24946
24947 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
24948 static void
24949 expand_movmem_epilogue (rtx destmem, rtx srcmem,
24950 rtx destptr, rtx srcptr, rtx count, int max_size)
24951 {
24952 rtx src, dest;
24953 if (CONST_INT_P (count))
24954 {
24955 HOST_WIDE_INT countval = INTVAL (count);
24956 HOST_WIDE_INT epilogue_size = countval % max_size;
24957 int i;
24958
24959 /* For now MAX_SIZE should be a power of 2. This assert could be
24960 relaxed, but it'll require a bit more complicated epilogue
24961 expanding. */
24962 gcc_assert ((max_size & (max_size - 1)) == 0);
24963 for (i = max_size; i >= 1; i >>= 1)
24964 {
24965 if (epilogue_size & i)
24966 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24967 }
24968 return;
24969 }
24970 if (max_size > 8)
24971 {
24972 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
24973 count, 1, OPTAB_DIRECT);
24974 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
24975 count, QImode, 1, 4, false);
24976 return;
24977 }
24978
24979 /* When there are stringops, we can cheaply increase dest and src pointers.
24980 Otherwise we save code size by maintaining offset (zero is readily
24981 available from preceding rep operation) and using x86 addressing modes.
24982 */
24983 if (TARGET_SINGLE_STRINGOP)
24984 {
24985 if (max_size > 4)
24986 {
24987 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24988 src = change_address (srcmem, SImode, srcptr);
24989 dest = change_address (destmem, SImode, destptr);
24990 emit_insn (gen_strmov (destptr, dest, srcptr, src));
24991 emit_label (label);
24992 LABEL_NUSES (label) = 1;
24993 }
24994 if (max_size > 2)
24995 {
24996 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24997 src = change_address (srcmem, HImode, srcptr);
24998 dest = change_address (destmem, HImode, destptr);
24999 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25000 emit_label (label);
25001 LABEL_NUSES (label) = 1;
25002 }
25003 if (max_size > 1)
25004 {
25005 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25006 src = change_address (srcmem, QImode, srcptr);
25007 dest = change_address (destmem, QImode, destptr);
25008 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25009 emit_label (label);
25010 LABEL_NUSES (label) = 1;
25011 }
25012 }
25013 else
25014 {
25015 rtx offset = force_reg (Pmode, const0_rtx);
25016 rtx tmp;
25017
25018 if (max_size > 4)
25019 {
25020 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25021 src = change_address (srcmem, SImode, srcptr);
25022 dest = change_address (destmem, SImode, destptr);
25023 emit_move_insn (dest, src);
25024 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25025 true, OPTAB_LIB_WIDEN);
25026 if (tmp != offset)
25027 emit_move_insn (offset, tmp);
25028 emit_label (label);
25029 LABEL_NUSES (label) = 1;
25030 }
25031 if (max_size > 2)
25032 {
25033 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25034 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25035 src = change_address (srcmem, HImode, tmp);
25036 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25037 dest = change_address (destmem, HImode, tmp);
25038 emit_move_insn (dest, src);
25039 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25040 true, OPTAB_LIB_WIDEN);
25041 if (tmp != offset)
25042 emit_move_insn (offset, tmp);
25043 emit_label (label);
25044 LABEL_NUSES (label) = 1;
25045 }
25046 if (max_size > 1)
25047 {
25048 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25049 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25050 src = change_address (srcmem, QImode, tmp);
25051 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25052 dest = change_address (destmem, QImode, tmp);
25053 emit_move_insn (dest, src);
25054 emit_label (label);
25055 LABEL_NUSES (label) = 1;
25056 }
25057 }
25058 }
25059
25060 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25061 with value PROMOTED_VAL.
25062 SRC is passed by pointer to be updated on return.
25063 Return value is updated DST. */
25064 static rtx
25065 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25066 HOST_WIDE_INT size_to_move)
25067 {
25068 rtx dst = destmem, adjust;
25069 enum insn_code code;
25070 machine_mode move_mode;
25071 int piece_size, i;
25072
25073 /* Find the widest mode in which we could perform moves.
25074 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25075 it until move of such size is supported. */
25076 move_mode = GET_MODE (promoted_val);
25077 if (move_mode == VOIDmode)
25078 move_mode = QImode;
25079 if (size_to_move < GET_MODE_SIZE (move_mode))
25080 {
25081 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25082 promoted_val = gen_lowpart (move_mode, promoted_val);
25083 }
25084 piece_size = GET_MODE_SIZE (move_mode);
25085 code = optab_handler (mov_optab, move_mode);
25086 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25087
25088 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25089
25090 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25091 gcc_assert (size_to_move % piece_size == 0);
25092 adjust = GEN_INT (piece_size);
25093 for (i = 0; i < size_to_move; i += piece_size)
25094 {
25095 if (piece_size <= GET_MODE_SIZE (word_mode))
25096 {
25097 emit_insn (gen_strset (destptr, dst, promoted_val));
25098 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25099 piece_size);
25100 continue;
25101 }
25102
25103 emit_insn (GEN_FCN (code) (dst, promoted_val));
25104
25105 emit_move_insn (destptr,
25106 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25107
25108 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25109 piece_size);
25110 }
25111
25112 /* Update DST rtx. */
25113 return dst;
25114 }
25115 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25116 static void
25117 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25118 rtx count, int max_size)
25119 {
25120 count =
25121 expand_simple_binop (counter_mode (count), AND, count,
25122 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25123 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25124 gen_lowpart (QImode, value), count, QImode,
25125 1, max_size / 2, true);
25126 }
25127
25128 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25129 static void
25130 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25131 rtx count, int max_size)
25132 {
25133 rtx dest;
25134
25135 if (CONST_INT_P (count))
25136 {
25137 HOST_WIDE_INT countval = INTVAL (count);
25138 HOST_WIDE_INT epilogue_size = countval % max_size;
25139 int i;
25140
25141 /* For now MAX_SIZE should be a power of 2. This assert could be
25142 relaxed, but it'll require a bit more complicated epilogue
25143 expanding. */
25144 gcc_assert ((max_size & (max_size - 1)) == 0);
25145 for (i = max_size; i >= 1; i >>= 1)
25146 {
25147 if (epilogue_size & i)
25148 {
25149 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25150 destmem = emit_memset (destmem, destptr, vec_value, i);
25151 else
25152 destmem = emit_memset (destmem, destptr, value, i);
25153 }
25154 }
25155 return;
25156 }
25157 if (max_size > 32)
25158 {
25159 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25160 return;
25161 }
25162 if (max_size > 16)
25163 {
25164 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25165 if (TARGET_64BIT)
25166 {
25167 dest = change_address (destmem, DImode, destptr);
25168 emit_insn (gen_strset (destptr, dest, value));
25169 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25170 emit_insn (gen_strset (destptr, dest, value));
25171 }
25172 else
25173 {
25174 dest = change_address (destmem, SImode, destptr);
25175 emit_insn (gen_strset (destptr, dest, value));
25176 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25177 emit_insn (gen_strset (destptr, dest, value));
25178 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25179 emit_insn (gen_strset (destptr, dest, value));
25180 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25181 emit_insn (gen_strset (destptr, dest, value));
25182 }
25183 emit_label (label);
25184 LABEL_NUSES (label) = 1;
25185 }
25186 if (max_size > 8)
25187 {
25188 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25189 if (TARGET_64BIT)
25190 {
25191 dest = change_address (destmem, DImode, destptr);
25192 emit_insn (gen_strset (destptr, dest, value));
25193 }
25194 else
25195 {
25196 dest = change_address (destmem, SImode, destptr);
25197 emit_insn (gen_strset (destptr, dest, value));
25198 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25199 emit_insn (gen_strset (destptr, dest, value));
25200 }
25201 emit_label (label);
25202 LABEL_NUSES (label) = 1;
25203 }
25204 if (max_size > 4)
25205 {
25206 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25207 dest = change_address (destmem, SImode, destptr);
25208 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25209 emit_label (label);
25210 LABEL_NUSES (label) = 1;
25211 }
25212 if (max_size > 2)
25213 {
25214 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25215 dest = change_address (destmem, HImode, destptr);
25216 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25217 emit_label (label);
25218 LABEL_NUSES (label) = 1;
25219 }
25220 if (max_size > 1)
25221 {
25222 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25223 dest = change_address (destmem, QImode, destptr);
25224 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25225 emit_label (label);
25226 LABEL_NUSES (label) = 1;
25227 }
25228 }
25229
25230 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25231 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25232 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25233 ignored.
25234 Return value is updated DESTMEM. */
25235 static rtx
25236 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25237 rtx destptr, rtx srcptr, rtx value,
25238 rtx vec_value, rtx count, int align,
25239 int desired_alignment, bool issetmem)
25240 {
25241 int i;
25242 for (i = 1; i < desired_alignment; i <<= 1)
25243 {
25244 if (align <= i)
25245 {
25246 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25247 if (issetmem)
25248 {
25249 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25250 destmem = emit_memset (destmem, destptr, vec_value, i);
25251 else
25252 destmem = emit_memset (destmem, destptr, value, i);
25253 }
25254 else
25255 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25256 ix86_adjust_counter (count, i);
25257 emit_label (label);
25258 LABEL_NUSES (label) = 1;
25259 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25260 }
25261 }
25262 return destmem;
25263 }
25264
25265 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25266 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25267 and jump to DONE_LABEL. */
25268 static void
25269 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25270 rtx destptr, rtx srcptr,
25271 rtx value, rtx vec_value,
25272 rtx count, int size,
25273 rtx done_label, bool issetmem)
25274 {
25275 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25276 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25277 rtx modesize;
25278 int n;
25279
25280 /* If we do not have vector value to copy, we must reduce size. */
25281 if (issetmem)
25282 {
25283 if (!vec_value)
25284 {
25285 if (GET_MODE (value) == VOIDmode && size > 8)
25286 mode = Pmode;
25287 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25288 mode = GET_MODE (value);
25289 }
25290 else
25291 mode = GET_MODE (vec_value), value = vec_value;
25292 }
25293 else
25294 {
25295 /* Choose appropriate vector mode. */
25296 if (size >= 32)
25297 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25298 else if (size >= 16)
25299 mode = TARGET_SSE ? V16QImode : DImode;
25300 srcmem = change_address (srcmem, mode, srcptr);
25301 }
25302 destmem = change_address (destmem, mode, destptr);
25303 modesize = GEN_INT (GET_MODE_SIZE (mode));
25304 gcc_assert (GET_MODE_SIZE (mode) <= size);
25305 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25306 {
25307 if (issetmem)
25308 emit_move_insn (destmem, gen_lowpart (mode, value));
25309 else
25310 {
25311 emit_move_insn (destmem, srcmem);
25312 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25313 }
25314 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25315 }
25316
25317 destmem = offset_address (destmem, count, 1);
25318 destmem = offset_address (destmem, GEN_INT (-2 * size),
25319 GET_MODE_SIZE (mode));
25320 if (!issetmem)
25321 {
25322 srcmem = offset_address (srcmem, count, 1);
25323 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25324 GET_MODE_SIZE (mode));
25325 }
25326 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25327 {
25328 if (issetmem)
25329 emit_move_insn (destmem, gen_lowpart (mode, value));
25330 else
25331 {
25332 emit_move_insn (destmem, srcmem);
25333 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25334 }
25335 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25336 }
25337 emit_jump_insn (gen_jump (done_label));
25338 emit_barrier ();
25339
25340 emit_label (label);
25341 LABEL_NUSES (label) = 1;
25342 }
25343
25344 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25345 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25346 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25347 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25348 DONE_LABEL is a label after the whole copying sequence. The label is created
25349 on demand if *DONE_LABEL is NULL.
25350 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25351 bounds after the initial copies.
25352
25353 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25354 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25355 we will dispatch to a library call for large blocks.
25356
25357 In pseudocode we do:
25358
25359 if (COUNT < SIZE)
25360 {
25361 Assume that SIZE is 4. Bigger sizes are handled analogously
25362 if (COUNT & 4)
25363 {
25364 copy 4 bytes from SRCPTR to DESTPTR
25365 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25366 goto done_label
25367 }
25368 if (!COUNT)
25369 goto done_label;
25370 copy 1 byte from SRCPTR to DESTPTR
25371 if (COUNT & 2)
25372 {
25373 copy 2 bytes from SRCPTR to DESTPTR
25374 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25375 }
25376 }
25377 else
25378 {
25379 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25380 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25381
25382 OLD_DESPTR = DESTPTR;
25383 Align DESTPTR up to DESIRED_ALIGN
25384 SRCPTR += DESTPTR - OLD_DESTPTR
25385 COUNT -= DEST_PTR - OLD_DESTPTR
25386 if (DYNAMIC_CHECK)
25387 Round COUNT down to multiple of SIZE
25388 << optional caller supplied zero size guard is here >>
25389 << optional caller suppplied dynamic check is here >>
25390 << caller supplied main copy loop is here >>
25391 }
25392 done_label:
25393 */
25394 static void
25395 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25396 rtx *destptr, rtx *srcptr,
25397 machine_mode mode,
25398 rtx value, rtx vec_value,
25399 rtx *count,
25400 rtx_code_label **done_label,
25401 int size,
25402 int desired_align,
25403 int align,
25404 unsigned HOST_WIDE_INT *min_size,
25405 bool dynamic_check,
25406 bool issetmem)
25407 {
25408 rtx_code_label *loop_label = NULL, *label;
25409 int n;
25410 rtx modesize;
25411 int prolog_size = 0;
25412 rtx mode_value;
25413
25414 /* Chose proper value to copy. */
25415 if (issetmem && VECTOR_MODE_P (mode))
25416 mode_value = vec_value;
25417 else
25418 mode_value = value;
25419 gcc_assert (GET_MODE_SIZE (mode) <= size);
25420
25421 /* See if block is big or small, handle small blocks. */
25422 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25423 {
25424 int size2 = size;
25425 loop_label = gen_label_rtx ();
25426
25427 if (!*done_label)
25428 *done_label = gen_label_rtx ();
25429
25430 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25431 1, loop_label);
25432 size2 >>= 1;
25433
25434 /* Handle sizes > 3. */
25435 for (;size2 > 2; size2 >>= 1)
25436 expand_small_movmem_or_setmem (destmem, srcmem,
25437 *destptr, *srcptr,
25438 value, vec_value,
25439 *count,
25440 size2, *done_label, issetmem);
25441 /* Nothing to copy? Jump to DONE_LABEL if so */
25442 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25443 1, *done_label);
25444
25445 /* Do a byte copy. */
25446 destmem = change_address (destmem, QImode, *destptr);
25447 if (issetmem)
25448 emit_move_insn (destmem, gen_lowpart (QImode, value));
25449 else
25450 {
25451 srcmem = change_address (srcmem, QImode, *srcptr);
25452 emit_move_insn (destmem, srcmem);
25453 }
25454
25455 /* Handle sizes 2 and 3. */
25456 label = ix86_expand_aligntest (*count, 2, false);
25457 destmem = change_address (destmem, HImode, *destptr);
25458 destmem = offset_address (destmem, *count, 1);
25459 destmem = offset_address (destmem, GEN_INT (-2), 2);
25460 if (issetmem)
25461 emit_move_insn (destmem, gen_lowpart (HImode, value));
25462 else
25463 {
25464 srcmem = change_address (srcmem, HImode, *srcptr);
25465 srcmem = offset_address (srcmem, *count, 1);
25466 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25467 emit_move_insn (destmem, srcmem);
25468 }
25469
25470 emit_label (label);
25471 LABEL_NUSES (label) = 1;
25472 emit_jump_insn (gen_jump (*done_label));
25473 emit_barrier ();
25474 }
25475 else
25476 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25477 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25478
25479 /* Start memcpy for COUNT >= SIZE. */
25480 if (loop_label)
25481 {
25482 emit_label (loop_label);
25483 LABEL_NUSES (loop_label) = 1;
25484 }
25485
25486 /* Copy first desired_align bytes. */
25487 if (!issetmem)
25488 srcmem = change_address (srcmem, mode, *srcptr);
25489 destmem = change_address (destmem, mode, *destptr);
25490 modesize = GEN_INT (GET_MODE_SIZE (mode));
25491 for (n = 0; prolog_size < desired_align - align; n++)
25492 {
25493 if (issetmem)
25494 emit_move_insn (destmem, mode_value);
25495 else
25496 {
25497 emit_move_insn (destmem, srcmem);
25498 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25499 }
25500 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25501 prolog_size += GET_MODE_SIZE (mode);
25502 }
25503
25504
25505 /* Copy last SIZE bytes. */
25506 destmem = offset_address (destmem, *count, 1);
25507 destmem = offset_address (destmem,
25508 GEN_INT (-size - prolog_size),
25509 1);
25510 if (issetmem)
25511 emit_move_insn (destmem, mode_value);
25512 else
25513 {
25514 srcmem = offset_address (srcmem, *count, 1);
25515 srcmem = offset_address (srcmem,
25516 GEN_INT (-size - prolog_size),
25517 1);
25518 emit_move_insn (destmem, srcmem);
25519 }
25520 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25521 {
25522 destmem = offset_address (destmem, modesize, 1);
25523 if (issetmem)
25524 emit_move_insn (destmem, mode_value);
25525 else
25526 {
25527 srcmem = offset_address (srcmem, modesize, 1);
25528 emit_move_insn (destmem, srcmem);
25529 }
25530 }
25531
25532 /* Align destination. */
25533 if (desired_align > 1 && desired_align > align)
25534 {
25535 rtx saveddest = *destptr;
25536
25537 gcc_assert (desired_align <= size);
25538 /* Align destptr up, place it to new register. */
25539 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25540 GEN_INT (prolog_size),
25541 NULL_RTX, 1, OPTAB_DIRECT);
25542 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25543 REG_POINTER (*destptr) = 1;
25544 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25545 GEN_INT (-desired_align),
25546 *destptr, 1, OPTAB_DIRECT);
25547 /* See how many bytes we skipped. */
25548 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25549 *destptr,
25550 saveddest, 1, OPTAB_DIRECT);
25551 /* Adjust srcptr and count. */
25552 if (!issetmem)
25553 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25554 saveddest, *srcptr, 1, OPTAB_DIRECT);
25555 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25556 saveddest, *count, 1, OPTAB_DIRECT);
25557 /* We copied at most size + prolog_size. */
25558 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25559 *min_size
25560 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25561 else
25562 *min_size = 0;
25563
25564 /* Our loops always round down the bock size, but for dispatch to library
25565 we need precise value. */
25566 if (dynamic_check)
25567 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25568 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25569 }
25570 else
25571 {
25572 gcc_assert (prolog_size == 0);
25573 /* Decrease count, so we won't end up copying last word twice. */
25574 if (!CONST_INT_P (*count))
25575 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25576 constm1_rtx, *count, 1, OPTAB_DIRECT);
25577 else
25578 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25579 (unsigned HOST_WIDE_INT)size));
25580 if (*min_size)
25581 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25582 }
25583 }
25584
25585
25586 /* This function is like the previous one, except here we know how many bytes
25587 need to be copied. That allows us to update alignment not only of DST, which
25588 is returned, but also of SRC, which is passed as a pointer for that
25589 reason. */
25590 static rtx
25591 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25592 rtx srcreg, rtx value, rtx vec_value,
25593 int desired_align, int align_bytes,
25594 bool issetmem)
25595 {
25596 rtx src = NULL;
25597 rtx orig_dst = dst;
25598 rtx orig_src = NULL;
25599 int piece_size = 1;
25600 int copied_bytes = 0;
25601
25602 if (!issetmem)
25603 {
25604 gcc_assert (srcp != NULL);
25605 src = *srcp;
25606 orig_src = src;
25607 }
25608
25609 for (piece_size = 1;
25610 piece_size <= desired_align && copied_bytes < align_bytes;
25611 piece_size <<= 1)
25612 {
25613 if (align_bytes & piece_size)
25614 {
25615 if (issetmem)
25616 {
25617 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25618 dst = emit_memset (dst, destreg, vec_value, piece_size);
25619 else
25620 dst = emit_memset (dst, destreg, value, piece_size);
25621 }
25622 else
25623 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25624 copied_bytes += piece_size;
25625 }
25626 }
25627 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25628 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25629 if (MEM_SIZE_KNOWN_P (orig_dst))
25630 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25631
25632 if (!issetmem)
25633 {
25634 int src_align_bytes = get_mem_align_offset (src, desired_align
25635 * BITS_PER_UNIT);
25636 if (src_align_bytes >= 0)
25637 src_align_bytes = desired_align - src_align_bytes;
25638 if (src_align_bytes >= 0)
25639 {
25640 unsigned int src_align;
25641 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25642 {
25643 if ((src_align_bytes & (src_align - 1))
25644 == (align_bytes & (src_align - 1)))
25645 break;
25646 }
25647 if (src_align > (unsigned int) desired_align)
25648 src_align = desired_align;
25649 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25650 set_mem_align (src, src_align * BITS_PER_UNIT);
25651 }
25652 if (MEM_SIZE_KNOWN_P (orig_src))
25653 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
25654 *srcp = src;
25655 }
25656
25657 return dst;
25658 }
25659
25660 /* Return true if ALG can be used in current context.
25661 Assume we expand memset if MEMSET is true. */
25662 static bool
25663 alg_usable_p (enum stringop_alg alg, bool memset)
25664 {
25665 if (alg == no_stringop)
25666 return false;
25667 if (alg == vector_loop)
25668 return TARGET_SSE || TARGET_AVX;
25669 /* Algorithms using the rep prefix want at least edi and ecx;
25670 additionally, memset wants eax and memcpy wants esi. Don't
25671 consider such algorithms if the user has appropriated those
25672 registers for their own purposes. */
25673 if (alg == rep_prefix_1_byte
25674 || alg == rep_prefix_4_byte
25675 || alg == rep_prefix_8_byte)
25676 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
25677 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
25678 return true;
25679 }
25680
25681 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
25682 static enum stringop_alg
25683 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
25684 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
25685 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
25686 {
25687 const struct stringop_algs * algs;
25688 bool optimize_for_speed;
25689 int max = 0;
25690 const struct processor_costs *cost;
25691 int i;
25692 bool any_alg_usable_p = false;
25693
25694 *noalign = false;
25695 *dynamic_check = -1;
25696
25697 /* Even if the string operation call is cold, we still might spend a lot
25698 of time processing large blocks. */
25699 if (optimize_function_for_size_p (cfun)
25700 || (optimize_insn_for_size_p ()
25701 && (max_size < 256
25702 || (expected_size != -1 && expected_size < 256))))
25703 optimize_for_speed = false;
25704 else
25705 optimize_for_speed = true;
25706
25707 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
25708 if (memset)
25709 algs = &cost->memset[TARGET_64BIT != 0];
25710 else
25711 algs = &cost->memcpy[TARGET_64BIT != 0];
25712
25713 /* See maximal size for user defined algorithm. */
25714 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
25715 {
25716 enum stringop_alg candidate = algs->size[i].alg;
25717 bool usable = alg_usable_p (candidate, memset);
25718 any_alg_usable_p |= usable;
25719
25720 if (candidate != libcall && candidate && usable)
25721 max = algs->size[i].max;
25722 }
25723
25724 /* If expected size is not known but max size is small enough
25725 so inline version is a win, set expected size into
25726 the range. */
25727 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
25728 && expected_size == -1)
25729 expected_size = min_size / 2 + max_size / 2;
25730
25731 /* If user specified the algorithm, honnor it if possible. */
25732 if (ix86_stringop_alg != no_stringop
25733 && alg_usable_p (ix86_stringop_alg, memset))
25734 return ix86_stringop_alg;
25735 /* rep; movq or rep; movl is the smallest variant. */
25736 else if (!optimize_for_speed)
25737 {
25738 *noalign = true;
25739 if (!count || (count & 3) || (memset && !zero_memset))
25740 return alg_usable_p (rep_prefix_1_byte, memset)
25741 ? rep_prefix_1_byte : loop_1_byte;
25742 else
25743 return alg_usable_p (rep_prefix_4_byte, memset)
25744 ? rep_prefix_4_byte : loop;
25745 }
25746 /* Very tiny blocks are best handled via the loop, REP is expensive to
25747 setup. */
25748 else if (expected_size != -1 && expected_size < 4)
25749 return loop_1_byte;
25750 else if (expected_size != -1)
25751 {
25752 enum stringop_alg alg = libcall;
25753 bool alg_noalign = false;
25754 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
25755 {
25756 /* We get here if the algorithms that were not libcall-based
25757 were rep-prefix based and we are unable to use rep prefixes
25758 based on global register usage. Break out of the loop and
25759 use the heuristic below. */
25760 if (algs->size[i].max == 0)
25761 break;
25762 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
25763 {
25764 enum stringop_alg candidate = algs->size[i].alg;
25765
25766 if (candidate != libcall && alg_usable_p (candidate, memset))
25767 {
25768 alg = candidate;
25769 alg_noalign = algs->size[i].noalign;
25770 }
25771 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
25772 last non-libcall inline algorithm. */
25773 if (TARGET_INLINE_ALL_STRINGOPS)
25774 {
25775 /* When the current size is best to be copied by a libcall,
25776 but we are still forced to inline, run the heuristic below
25777 that will pick code for medium sized blocks. */
25778 if (alg != libcall)
25779 {
25780 *noalign = alg_noalign;
25781 return alg;
25782 }
25783 else if (!any_alg_usable_p)
25784 break;
25785 }
25786 else if (alg_usable_p (candidate, memset))
25787 {
25788 *noalign = algs->size[i].noalign;
25789 return candidate;
25790 }
25791 }
25792 }
25793 }
25794 /* When asked to inline the call anyway, try to pick meaningful choice.
25795 We look for maximal size of block that is faster to copy by hand and
25796 take blocks of at most of that size guessing that average size will
25797 be roughly half of the block.
25798
25799 If this turns out to be bad, we might simply specify the preferred
25800 choice in ix86_costs. */
25801 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
25802 && (algs->unknown_size == libcall
25803 || !alg_usable_p (algs->unknown_size, memset)))
25804 {
25805 enum stringop_alg alg;
25806
25807 /* If there aren't any usable algorithms, then recursing on
25808 smaller sizes isn't going to find anything. Just return the
25809 simple byte-at-a-time copy loop. */
25810 if (!any_alg_usable_p)
25811 {
25812 /* Pick something reasonable. */
25813 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
25814 *dynamic_check = 128;
25815 return loop_1_byte;
25816 }
25817 if (max <= 0)
25818 max = 4096;
25819 alg = decide_alg (count, max / 2, min_size, max_size, memset,
25820 zero_memset, dynamic_check, noalign);
25821 gcc_assert (*dynamic_check == -1);
25822 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
25823 *dynamic_check = max;
25824 else
25825 gcc_assert (alg != libcall);
25826 return alg;
25827 }
25828 return (alg_usable_p (algs->unknown_size, memset)
25829 ? algs->unknown_size : libcall);
25830 }
25831
25832 /* Decide on alignment. We know that the operand is already aligned to ALIGN
25833 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
25834 static int
25835 decide_alignment (int align,
25836 enum stringop_alg alg,
25837 int expected_size,
25838 machine_mode move_mode)
25839 {
25840 int desired_align = 0;
25841
25842 gcc_assert (alg != no_stringop);
25843
25844 if (alg == libcall)
25845 return 0;
25846 if (move_mode == VOIDmode)
25847 return 0;
25848
25849 desired_align = GET_MODE_SIZE (move_mode);
25850 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
25851 copying whole cacheline at once. */
25852 if (TARGET_PENTIUMPRO
25853 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
25854 desired_align = 8;
25855
25856 if (optimize_size)
25857 desired_align = 1;
25858 if (desired_align < align)
25859 desired_align = align;
25860 if (expected_size != -1 && expected_size < 4)
25861 desired_align = align;
25862
25863 return desired_align;
25864 }
25865
25866
25867 /* Helper function for memcpy. For QImode value 0xXY produce
25868 0xXYXYXYXY of wide specified by MODE. This is essentially
25869 a * 0x10101010, but we can do slightly better than
25870 synth_mult by unwinding the sequence by hand on CPUs with
25871 slow multiply. */
25872 static rtx
25873 promote_duplicated_reg (machine_mode mode, rtx val)
25874 {
25875 machine_mode valmode = GET_MODE (val);
25876 rtx tmp;
25877 int nops = mode == DImode ? 3 : 2;
25878
25879 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
25880 if (val == const0_rtx)
25881 return copy_to_mode_reg (mode, CONST0_RTX (mode));
25882 if (CONST_INT_P (val))
25883 {
25884 HOST_WIDE_INT v = INTVAL (val) & 255;
25885
25886 v |= v << 8;
25887 v |= v << 16;
25888 if (mode == DImode)
25889 v |= (v << 16) << 16;
25890 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
25891 }
25892
25893 if (valmode == VOIDmode)
25894 valmode = QImode;
25895 if (valmode != QImode)
25896 val = gen_lowpart (QImode, val);
25897 if (mode == QImode)
25898 return val;
25899 if (!TARGET_PARTIAL_REG_STALL)
25900 nops--;
25901 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
25902 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
25903 <= (ix86_cost->shift_const + ix86_cost->add) * nops
25904 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
25905 {
25906 rtx reg = convert_modes (mode, QImode, val, true);
25907 tmp = promote_duplicated_reg (mode, const1_rtx);
25908 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
25909 OPTAB_DIRECT);
25910 }
25911 else
25912 {
25913 rtx reg = convert_modes (mode, QImode, val, true);
25914
25915 if (!TARGET_PARTIAL_REG_STALL)
25916 if (mode == SImode)
25917 emit_insn (gen_insvsi_1 (reg, reg));
25918 else
25919 emit_insn (gen_insvdi_1 (reg, reg));
25920 else
25921 {
25922 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
25923 NULL, 1, OPTAB_DIRECT);
25924 reg =
25925 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
25926 }
25927 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
25928 NULL, 1, OPTAB_DIRECT);
25929 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
25930 if (mode == SImode)
25931 return reg;
25932 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
25933 NULL, 1, OPTAB_DIRECT);
25934 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
25935 return reg;
25936 }
25937 }
25938
25939 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
25940 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
25941 alignment from ALIGN to DESIRED_ALIGN. */
25942 static rtx
25943 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
25944 int align)
25945 {
25946 rtx promoted_val;
25947
25948 if (TARGET_64BIT
25949 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
25950 promoted_val = promote_duplicated_reg (DImode, val);
25951 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
25952 promoted_val = promote_duplicated_reg (SImode, val);
25953 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
25954 promoted_val = promote_duplicated_reg (HImode, val);
25955 else
25956 promoted_val = val;
25957
25958 return promoted_val;
25959 }
25960
25961 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
25962 operations when profitable. The code depends upon architecture, block size
25963 and alignment, but always has one of the following overall structures:
25964
25965 Aligned move sequence:
25966
25967 1) Prologue guard: Conditional that jumps up to epilogues for small
25968 blocks that can be handled by epilogue alone. This is faster
25969 but also needed for correctness, since prologue assume the block
25970 is larger than the desired alignment.
25971
25972 Optional dynamic check for size and libcall for large
25973 blocks is emitted here too, with -minline-stringops-dynamically.
25974
25975 2) Prologue: copy first few bytes in order to get destination
25976 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
25977 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
25978 copied. We emit either a jump tree on power of two sized
25979 blocks, or a byte loop.
25980
25981 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
25982 with specified algorithm.
25983
25984 4) Epilogue: code copying tail of the block that is too small to be
25985 handled by main body (or up to size guarded by prologue guard).
25986
25987 Misaligned move sequence
25988
25989 1) missaligned move prologue/epilogue containing:
25990 a) Prologue handling small memory blocks and jumping to done_label
25991 (skipped if blocks are known to be large enough)
25992 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
25993 needed by single possibly misaligned move
25994 (skipped if alignment is not needed)
25995 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
25996
25997 2) Zero size guard dispatching to done_label, if needed
25998
25999 3) dispatch to library call, if needed,
26000
26001 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26002 with specified algorithm. */
26003 bool
26004 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26005 rtx align_exp, rtx expected_align_exp,
26006 rtx expected_size_exp, rtx min_size_exp,
26007 rtx max_size_exp, rtx probable_max_size_exp,
26008 bool issetmem)
26009 {
26010 rtx destreg;
26011 rtx srcreg = NULL;
26012 rtx_code_label *label = NULL;
26013 rtx tmp;
26014 rtx_code_label *jump_around_label = NULL;
26015 HOST_WIDE_INT align = 1;
26016 unsigned HOST_WIDE_INT count = 0;
26017 HOST_WIDE_INT expected_size = -1;
26018 int size_needed = 0, epilogue_size_needed;
26019 int desired_align = 0, align_bytes = 0;
26020 enum stringop_alg alg;
26021 rtx promoted_val = NULL;
26022 rtx vec_promoted_val = NULL;
26023 bool force_loopy_epilogue = false;
26024 int dynamic_check;
26025 bool need_zero_guard = false;
26026 bool noalign;
26027 machine_mode move_mode = VOIDmode;
26028 int unroll_factor = 1;
26029 /* TODO: Once value ranges are available, fill in proper data. */
26030 unsigned HOST_WIDE_INT min_size = 0;
26031 unsigned HOST_WIDE_INT max_size = -1;
26032 unsigned HOST_WIDE_INT probable_max_size = -1;
26033 bool misaligned_prologue_used = false;
26034
26035 if (CONST_INT_P (align_exp))
26036 align = INTVAL (align_exp);
26037 /* i386 can do misaligned access on reasonably increased cost. */
26038 if (CONST_INT_P (expected_align_exp)
26039 && INTVAL (expected_align_exp) > align)
26040 align = INTVAL (expected_align_exp);
26041 /* ALIGN is the minimum of destination and source alignment, but we care here
26042 just about destination alignment. */
26043 else if (!issetmem
26044 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26045 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26046
26047 if (CONST_INT_P (count_exp))
26048 {
26049 min_size = max_size = probable_max_size = count = expected_size
26050 = INTVAL (count_exp);
26051 /* When COUNT is 0, there is nothing to do. */
26052 if (!count)
26053 return true;
26054 }
26055 else
26056 {
26057 if (min_size_exp)
26058 min_size = INTVAL (min_size_exp);
26059 if (max_size_exp)
26060 max_size = INTVAL (max_size_exp);
26061 if (probable_max_size_exp)
26062 probable_max_size = INTVAL (probable_max_size_exp);
26063 if (CONST_INT_P (expected_size_exp))
26064 expected_size = INTVAL (expected_size_exp);
26065 }
26066
26067 /* Make sure we don't need to care about overflow later on. */
26068 if (count > (HOST_WIDE_INT_1U << 30))
26069 return false;
26070
26071 /* Step 0: Decide on preferred algorithm, desired alignment and
26072 size of chunks to be copied by main loop. */
26073 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26074 issetmem,
26075 issetmem && val_exp == const0_rtx,
26076 &dynamic_check, &noalign);
26077 if (alg == libcall)
26078 return false;
26079 gcc_assert (alg != no_stringop);
26080
26081 /* For now vector-version of memset is generated only for memory zeroing, as
26082 creating of promoted vector value is very cheap in this case. */
26083 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26084 alg = unrolled_loop;
26085
26086 if (!count)
26087 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26088 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26089 if (!issetmem)
26090 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26091
26092 unroll_factor = 1;
26093 move_mode = word_mode;
26094 switch (alg)
26095 {
26096 case libcall:
26097 case no_stringop:
26098 case last_alg:
26099 gcc_unreachable ();
26100 case loop_1_byte:
26101 need_zero_guard = true;
26102 move_mode = QImode;
26103 break;
26104 case loop:
26105 need_zero_guard = true;
26106 break;
26107 case unrolled_loop:
26108 need_zero_guard = true;
26109 unroll_factor = (TARGET_64BIT ? 4 : 2);
26110 break;
26111 case vector_loop:
26112 need_zero_guard = true;
26113 unroll_factor = 4;
26114 /* Find the widest supported mode. */
26115 move_mode = word_mode;
26116 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26117 != CODE_FOR_nothing)
26118 move_mode = GET_MODE_WIDER_MODE (move_mode);
26119
26120 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26121 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26122 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26123 {
26124 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26125 move_mode = mode_for_vector (word_mode, nunits);
26126 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26127 move_mode = word_mode;
26128 }
26129 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26130 break;
26131 case rep_prefix_8_byte:
26132 move_mode = DImode;
26133 break;
26134 case rep_prefix_4_byte:
26135 move_mode = SImode;
26136 break;
26137 case rep_prefix_1_byte:
26138 move_mode = QImode;
26139 break;
26140 }
26141 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26142 epilogue_size_needed = size_needed;
26143
26144 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26145 if (!TARGET_ALIGN_STRINGOPS || noalign)
26146 align = desired_align;
26147
26148 /* Step 1: Prologue guard. */
26149
26150 /* Alignment code needs count to be in register. */
26151 if (CONST_INT_P (count_exp) && desired_align > align)
26152 {
26153 if (INTVAL (count_exp) > desired_align
26154 && INTVAL (count_exp) > size_needed)
26155 {
26156 align_bytes
26157 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26158 if (align_bytes <= 0)
26159 align_bytes = 0;
26160 else
26161 align_bytes = desired_align - align_bytes;
26162 }
26163 if (align_bytes == 0)
26164 count_exp = force_reg (counter_mode (count_exp), count_exp);
26165 }
26166 gcc_assert (desired_align >= 1 && align >= 1);
26167
26168 /* Misaligned move sequences handle both prologue and epilogue at once.
26169 Default code generation results in a smaller code for large alignments
26170 and also avoids redundant job when sizes are known precisely. */
26171 misaligned_prologue_used
26172 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26173 && MAX (desired_align, epilogue_size_needed) <= 32
26174 && desired_align <= epilogue_size_needed
26175 && ((desired_align > align && !align_bytes)
26176 || (!count && epilogue_size_needed > 1)));
26177
26178 /* Do the cheap promotion to allow better CSE across the
26179 main loop and epilogue (ie one load of the big constant in the
26180 front of all code.
26181 For now the misaligned move sequences do not have fast path
26182 without broadcasting. */
26183 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26184 {
26185 if (alg == vector_loop)
26186 {
26187 gcc_assert (val_exp == const0_rtx);
26188 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26189 promoted_val = promote_duplicated_reg_to_size (val_exp,
26190 GET_MODE_SIZE (word_mode),
26191 desired_align, align);
26192 }
26193 else
26194 {
26195 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26196 desired_align, align);
26197 }
26198 }
26199 /* Misaligned move sequences handles both prologues and epilogues at once.
26200 Default code generation results in smaller code for large alignments and
26201 also avoids redundant job when sizes are known precisely. */
26202 if (misaligned_prologue_used)
26203 {
26204 /* Misaligned move prologue handled small blocks by itself. */
26205 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26206 (dst, src, &destreg, &srcreg,
26207 move_mode, promoted_val, vec_promoted_val,
26208 &count_exp,
26209 &jump_around_label,
26210 desired_align < align
26211 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26212 desired_align, align, &min_size, dynamic_check, issetmem);
26213 if (!issetmem)
26214 src = change_address (src, BLKmode, srcreg);
26215 dst = change_address (dst, BLKmode, destreg);
26216 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26217 epilogue_size_needed = 0;
26218 if (need_zero_guard
26219 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26220 {
26221 /* It is possible that we copied enough so the main loop will not
26222 execute. */
26223 gcc_assert (size_needed > 1);
26224 if (jump_around_label == NULL_RTX)
26225 jump_around_label = gen_label_rtx ();
26226 emit_cmp_and_jump_insns (count_exp,
26227 GEN_INT (size_needed),
26228 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26229 if (expected_size == -1
26230 || expected_size < (desired_align - align) / 2 + size_needed)
26231 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26232 else
26233 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26234 }
26235 }
26236 /* Ensure that alignment prologue won't copy past end of block. */
26237 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26238 {
26239 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26240 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26241 Make sure it is power of 2. */
26242 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26243
26244 /* To improve performance of small blocks, we jump around the VAL
26245 promoting mode. This mean that if the promoted VAL is not constant,
26246 we might not use it in the epilogue and have to use byte
26247 loop variant. */
26248 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26249 force_loopy_epilogue = true;
26250 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26251 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26252 {
26253 /* If main algorithm works on QImode, no epilogue is needed.
26254 For small sizes just don't align anything. */
26255 if (size_needed == 1)
26256 desired_align = align;
26257 else
26258 goto epilogue;
26259 }
26260 else if (!count
26261 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26262 {
26263 label = gen_label_rtx ();
26264 emit_cmp_and_jump_insns (count_exp,
26265 GEN_INT (epilogue_size_needed),
26266 LTU, 0, counter_mode (count_exp), 1, label);
26267 if (expected_size == -1 || expected_size < epilogue_size_needed)
26268 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26269 else
26270 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26271 }
26272 }
26273
26274 /* Emit code to decide on runtime whether library call or inline should be
26275 used. */
26276 if (dynamic_check != -1)
26277 {
26278 if (!issetmem && CONST_INT_P (count_exp))
26279 {
26280 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26281 {
26282 emit_block_move_via_libcall (dst, src, count_exp, false);
26283 count_exp = const0_rtx;
26284 goto epilogue;
26285 }
26286 }
26287 else
26288 {
26289 rtx_code_label *hot_label = gen_label_rtx ();
26290 if (jump_around_label == NULL_RTX)
26291 jump_around_label = gen_label_rtx ();
26292 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26293 LEU, 0, counter_mode (count_exp),
26294 1, hot_label);
26295 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26296 if (issetmem)
26297 set_storage_via_libcall (dst, count_exp, val_exp, false);
26298 else
26299 emit_block_move_via_libcall (dst, src, count_exp, false);
26300 emit_jump (jump_around_label);
26301 emit_label (hot_label);
26302 }
26303 }
26304
26305 /* Step 2: Alignment prologue. */
26306 /* Do the expensive promotion once we branched off the small blocks. */
26307 if (issetmem && !promoted_val)
26308 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26309 desired_align, align);
26310
26311 if (desired_align > align && !misaligned_prologue_used)
26312 {
26313 if (align_bytes == 0)
26314 {
26315 /* Except for the first move in prologue, we no longer know
26316 constant offset in aliasing info. It don't seems to worth
26317 the pain to maintain it for the first move, so throw away
26318 the info early. */
26319 dst = change_address (dst, BLKmode, destreg);
26320 if (!issetmem)
26321 src = change_address (src, BLKmode, srcreg);
26322 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26323 promoted_val, vec_promoted_val,
26324 count_exp, align, desired_align,
26325 issetmem);
26326 /* At most desired_align - align bytes are copied. */
26327 if (min_size < (unsigned)(desired_align - align))
26328 min_size = 0;
26329 else
26330 min_size -= desired_align - align;
26331 }
26332 else
26333 {
26334 /* If we know how many bytes need to be stored before dst is
26335 sufficiently aligned, maintain aliasing info accurately. */
26336 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26337 srcreg,
26338 promoted_val,
26339 vec_promoted_val,
26340 desired_align,
26341 align_bytes,
26342 issetmem);
26343
26344 count_exp = plus_constant (counter_mode (count_exp),
26345 count_exp, -align_bytes);
26346 count -= align_bytes;
26347 min_size -= align_bytes;
26348 max_size -= align_bytes;
26349 }
26350 if (need_zero_guard
26351 && min_size < (unsigned HOST_WIDE_INT) size_needed
26352 && (count < (unsigned HOST_WIDE_INT) size_needed
26353 || (align_bytes == 0
26354 && count < ((unsigned HOST_WIDE_INT) size_needed
26355 + desired_align - align))))
26356 {
26357 /* It is possible that we copied enough so the main loop will not
26358 execute. */
26359 gcc_assert (size_needed > 1);
26360 if (label == NULL_RTX)
26361 label = gen_label_rtx ();
26362 emit_cmp_and_jump_insns (count_exp,
26363 GEN_INT (size_needed),
26364 LTU, 0, counter_mode (count_exp), 1, label);
26365 if (expected_size == -1
26366 || expected_size < (desired_align - align) / 2 + size_needed)
26367 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26368 else
26369 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26370 }
26371 }
26372 if (label && size_needed == 1)
26373 {
26374 emit_label (label);
26375 LABEL_NUSES (label) = 1;
26376 label = NULL;
26377 epilogue_size_needed = 1;
26378 if (issetmem)
26379 promoted_val = val_exp;
26380 }
26381 else if (label == NULL_RTX && !misaligned_prologue_used)
26382 epilogue_size_needed = size_needed;
26383
26384 /* Step 3: Main loop. */
26385
26386 switch (alg)
26387 {
26388 case libcall:
26389 case no_stringop:
26390 case last_alg:
26391 gcc_unreachable ();
26392 case loop_1_byte:
26393 case loop:
26394 case unrolled_loop:
26395 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26396 count_exp, move_mode, unroll_factor,
26397 expected_size, issetmem);
26398 break;
26399 case vector_loop:
26400 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26401 vec_promoted_val, count_exp, move_mode,
26402 unroll_factor, expected_size, issetmem);
26403 break;
26404 case rep_prefix_8_byte:
26405 case rep_prefix_4_byte:
26406 case rep_prefix_1_byte:
26407 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26408 val_exp, count_exp, move_mode, issetmem);
26409 break;
26410 }
26411 /* Adjust properly the offset of src and dest memory for aliasing. */
26412 if (CONST_INT_P (count_exp))
26413 {
26414 if (!issetmem)
26415 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26416 (count / size_needed) * size_needed);
26417 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26418 (count / size_needed) * size_needed);
26419 }
26420 else
26421 {
26422 if (!issetmem)
26423 src = change_address (src, BLKmode, srcreg);
26424 dst = change_address (dst, BLKmode, destreg);
26425 }
26426
26427 /* Step 4: Epilogue to copy the remaining bytes. */
26428 epilogue:
26429 if (label)
26430 {
26431 /* When the main loop is done, COUNT_EXP might hold original count,
26432 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26433 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26434 bytes. Compensate if needed. */
26435
26436 if (size_needed < epilogue_size_needed)
26437 {
26438 tmp =
26439 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26440 GEN_INT (size_needed - 1), count_exp, 1,
26441 OPTAB_DIRECT);
26442 if (tmp != count_exp)
26443 emit_move_insn (count_exp, tmp);
26444 }
26445 emit_label (label);
26446 LABEL_NUSES (label) = 1;
26447 }
26448
26449 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26450 {
26451 if (force_loopy_epilogue)
26452 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26453 epilogue_size_needed);
26454 else
26455 {
26456 if (issetmem)
26457 expand_setmem_epilogue (dst, destreg, promoted_val,
26458 vec_promoted_val, count_exp,
26459 epilogue_size_needed);
26460 else
26461 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26462 epilogue_size_needed);
26463 }
26464 }
26465 if (jump_around_label)
26466 emit_label (jump_around_label);
26467 return true;
26468 }
26469
26470
26471 /* Expand the appropriate insns for doing strlen if not just doing
26472 repnz; scasb
26473
26474 out = result, initialized with the start address
26475 align_rtx = alignment of the address.
26476 scratch = scratch register, initialized with the startaddress when
26477 not aligned, otherwise undefined
26478
26479 This is just the body. It needs the initializations mentioned above and
26480 some address computing at the end. These things are done in i386.md. */
26481
26482 static void
26483 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26484 {
26485 int align;
26486 rtx tmp;
26487 rtx_code_label *align_2_label = NULL;
26488 rtx_code_label *align_3_label = NULL;
26489 rtx_code_label *align_4_label = gen_label_rtx ();
26490 rtx_code_label *end_0_label = gen_label_rtx ();
26491 rtx mem;
26492 rtx tmpreg = gen_reg_rtx (SImode);
26493 rtx scratch = gen_reg_rtx (SImode);
26494 rtx cmp;
26495
26496 align = 0;
26497 if (CONST_INT_P (align_rtx))
26498 align = INTVAL (align_rtx);
26499
26500 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26501
26502 /* Is there a known alignment and is it less than 4? */
26503 if (align < 4)
26504 {
26505 rtx scratch1 = gen_reg_rtx (Pmode);
26506 emit_move_insn (scratch1, out);
26507 /* Is there a known alignment and is it not 2? */
26508 if (align != 2)
26509 {
26510 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26511 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26512
26513 /* Leave just the 3 lower bits. */
26514 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26515 NULL_RTX, 0, OPTAB_WIDEN);
26516
26517 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26518 Pmode, 1, align_4_label);
26519 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26520 Pmode, 1, align_2_label);
26521 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26522 Pmode, 1, align_3_label);
26523 }
26524 else
26525 {
26526 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26527 check if is aligned to 4 - byte. */
26528
26529 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26530 NULL_RTX, 0, OPTAB_WIDEN);
26531
26532 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26533 Pmode, 1, align_4_label);
26534 }
26535
26536 mem = change_address (src, QImode, out);
26537
26538 /* Now compare the bytes. */
26539
26540 /* Compare the first n unaligned byte on a byte per byte basis. */
26541 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26542 QImode, 1, end_0_label);
26543
26544 /* Increment the address. */
26545 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26546
26547 /* Not needed with an alignment of 2 */
26548 if (align != 2)
26549 {
26550 emit_label (align_2_label);
26551
26552 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26553 end_0_label);
26554
26555 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26556
26557 emit_label (align_3_label);
26558 }
26559
26560 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26561 end_0_label);
26562
26563 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26564 }
26565
26566 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26567 align this loop. It gives only huge programs, but does not help to
26568 speed up. */
26569 emit_label (align_4_label);
26570
26571 mem = change_address (src, SImode, out);
26572 emit_move_insn (scratch, mem);
26573 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26574
26575 /* This formula yields a nonzero result iff one of the bytes is zero.
26576 This saves three branches inside loop and many cycles. */
26577
26578 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26579 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26580 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26581 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26582 gen_int_mode (0x80808080, SImode)));
26583 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26584 align_4_label);
26585
26586 if (TARGET_CMOVE)
26587 {
26588 rtx reg = gen_reg_rtx (SImode);
26589 rtx reg2 = gen_reg_rtx (Pmode);
26590 emit_move_insn (reg, tmpreg);
26591 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26592
26593 /* If zero is not in the first two bytes, move two bytes forward. */
26594 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26595 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26596 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26597 emit_insn (gen_rtx_SET (tmpreg,
26598 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26599 reg,
26600 tmpreg)));
26601 /* Emit lea manually to avoid clobbering of flags. */
26602 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26603
26604 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26605 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26606 emit_insn (gen_rtx_SET (out,
26607 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26608 reg2,
26609 out)));
26610 }
26611 else
26612 {
26613 rtx_code_label *end_2_label = gen_label_rtx ();
26614 /* Is zero in the first two bytes? */
26615
26616 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26617 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26618 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26619 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26620 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26621 pc_rtx);
26622 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26623 JUMP_LABEL (tmp) = end_2_label;
26624
26625 /* Not in the first two. Move two bytes forward. */
26626 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26627 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26628
26629 emit_label (end_2_label);
26630
26631 }
26632
26633 /* Avoid branch in fixing the byte. */
26634 tmpreg = gen_lowpart (QImode, tmpreg);
26635 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
26636 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
26637 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
26638 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
26639
26640 emit_label (end_0_label);
26641 }
26642
26643 /* Expand strlen. */
26644
26645 bool
26646 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
26647 {
26648 rtx addr, scratch1, scratch2, scratch3, scratch4;
26649
26650 /* The generic case of strlen expander is long. Avoid it's
26651 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
26652
26653 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26654 && !TARGET_INLINE_ALL_STRINGOPS
26655 && !optimize_insn_for_size_p ()
26656 && (!CONST_INT_P (align) || INTVAL (align) < 4))
26657 return false;
26658
26659 addr = force_reg (Pmode, XEXP (src, 0));
26660 scratch1 = gen_reg_rtx (Pmode);
26661
26662 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26663 && !optimize_insn_for_size_p ())
26664 {
26665 /* Well it seems that some optimizer does not combine a call like
26666 foo(strlen(bar), strlen(bar));
26667 when the move and the subtraction is done here. It does calculate
26668 the length just once when these instructions are done inside of
26669 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
26670 often used and I use one fewer register for the lifetime of
26671 output_strlen_unroll() this is better. */
26672
26673 emit_move_insn (out, addr);
26674
26675 ix86_expand_strlensi_unroll_1 (out, src, align);
26676
26677 /* strlensi_unroll_1 returns the address of the zero at the end of
26678 the string, like memchr(), so compute the length by subtracting
26679 the start address. */
26680 emit_insn (ix86_gen_sub3 (out, out, addr));
26681 }
26682 else
26683 {
26684 rtx unspec;
26685
26686 /* Can't use this if the user has appropriated eax, ecx, or edi. */
26687 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
26688 return false;
26689
26690 scratch2 = gen_reg_rtx (Pmode);
26691 scratch3 = gen_reg_rtx (Pmode);
26692 scratch4 = force_reg (Pmode, constm1_rtx);
26693
26694 emit_move_insn (scratch3, addr);
26695 eoschar = force_reg (QImode, eoschar);
26696
26697 src = replace_equiv_address_nv (src, scratch3);
26698
26699 /* If .md starts supporting :P, this can be done in .md. */
26700 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
26701 scratch4), UNSPEC_SCAS);
26702 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
26703 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
26704 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
26705 }
26706 return true;
26707 }
26708
26709 /* For given symbol (function) construct code to compute address of it's PLT
26710 entry in large x86-64 PIC model. */
26711 static rtx
26712 construct_plt_address (rtx symbol)
26713 {
26714 rtx tmp, unspec;
26715
26716 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
26717 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
26718 gcc_assert (Pmode == DImode);
26719
26720 tmp = gen_reg_rtx (Pmode);
26721 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
26722
26723 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
26724 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
26725 return tmp;
26726 }
26727
26728 rtx
26729 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
26730 rtx callarg2,
26731 rtx pop, bool sibcall)
26732 {
26733 rtx vec[3];
26734 rtx use = NULL, call;
26735 unsigned int vec_len = 0;
26736
26737 if (pop == const0_rtx)
26738 pop = NULL;
26739 gcc_assert (!TARGET_64BIT || !pop);
26740
26741 if (TARGET_MACHO && !TARGET_64BIT)
26742 {
26743 #if TARGET_MACHO
26744 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
26745 fnaddr = machopic_indirect_call_target (fnaddr);
26746 #endif
26747 }
26748 else
26749 {
26750 /* Static functions and indirect calls don't need the pic register. Also,
26751 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
26752 it an indirect call. */
26753 if (flag_pic
26754 && (!TARGET_64BIT
26755 || (ix86_cmodel == CM_LARGE_PIC
26756 && DEFAULT_ABI != MS_ABI))
26757 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
26758 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
26759 && flag_plt
26760 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
26761 || !lookup_attribute ("noplt",
26762 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
26763 {
26764 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
26765 if (ix86_use_pseudo_pic_reg ())
26766 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
26767 pic_offset_table_rtx);
26768 }
26769 }
26770
26771 /* Skip setting up RAX register for -mskip-rax-setup when there are no
26772 parameters passed in vector registers. */
26773 if (TARGET_64BIT
26774 && (INTVAL (callarg2) > 0
26775 || (INTVAL (callarg2) == 0
26776 && (TARGET_SSE || !flag_skip_rax_setup))))
26777 {
26778 rtx al = gen_rtx_REG (QImode, AX_REG);
26779 emit_move_insn (al, callarg2);
26780 use_reg (&use, al);
26781 }
26782
26783 if (ix86_cmodel == CM_LARGE_PIC
26784 && !TARGET_PECOFF
26785 && MEM_P (fnaddr)
26786 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
26787 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
26788 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
26789 else if (sibcall
26790 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
26791 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
26792 {
26793 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
26794 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
26795 }
26796
26797 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
26798
26799 if (retval)
26800 {
26801 /* We should add bounds as destination register in case
26802 pointer with bounds may be returned. */
26803 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
26804 {
26805 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
26806 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
26807 if (GET_CODE (retval) == PARALLEL)
26808 {
26809 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
26810 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
26811 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
26812 retval = chkp_join_splitted_slot (retval, par);
26813 }
26814 else
26815 {
26816 retval = gen_rtx_PARALLEL (VOIDmode,
26817 gen_rtvec (3, retval, b0, b1));
26818 chkp_put_regs_to_expr_list (retval);
26819 }
26820 }
26821
26822 call = gen_rtx_SET (retval, call);
26823 }
26824 vec[vec_len++] = call;
26825
26826 if (pop)
26827 {
26828 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
26829 pop = gen_rtx_SET (stack_pointer_rtx, pop);
26830 vec[vec_len++] = pop;
26831 }
26832
26833 if (TARGET_64BIT_MS_ABI
26834 && (!callarg2 || INTVAL (callarg2) != -2))
26835 {
26836 int const cregs_size
26837 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
26838 int i;
26839
26840 for (i = 0; i < cregs_size; i++)
26841 {
26842 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
26843 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
26844
26845 clobber_reg (&use, gen_rtx_REG (mode, regno));
26846 }
26847 }
26848
26849 if (vec_len > 1)
26850 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
26851 call = emit_call_insn (call);
26852 if (use)
26853 CALL_INSN_FUNCTION_USAGE (call) = use;
26854
26855 return call;
26856 }
26857
26858 /* Return true if the function being called was marked with attribute "noplt"
26859 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
26860 handle the non-PIC case in the backend because there is no easy interface
26861 for the front-end to force non-PLT calls to use the GOT. This is currently
26862 used only with 64-bit ELF targets to call the function marked "noplt"
26863 indirectly. */
26864
26865 static bool
26866 ix86_nopic_noplt_attribute_p (rtx call_op)
26867 {
26868 if (flag_pic || ix86_cmodel == CM_LARGE
26869 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
26870 || SYMBOL_REF_LOCAL_P (call_op))
26871 return false;
26872
26873 tree symbol_decl = SYMBOL_REF_DECL (call_op);
26874
26875 if (!flag_plt
26876 || (symbol_decl != NULL_TREE
26877 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
26878 return true;
26879
26880 return false;
26881 }
26882
26883 /* Output the assembly for a call instruction. */
26884
26885 const char *
26886 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
26887 {
26888 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
26889 bool seh_nop_p = false;
26890 const char *xasm;
26891
26892 if (SIBLING_CALL_P (insn))
26893 {
26894 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
26895 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
26896 else if (direct_p)
26897 xasm = "%!jmp\t%P0";
26898 /* SEH epilogue detection requires the indirect branch case
26899 to include REX.W. */
26900 else if (TARGET_SEH)
26901 xasm = "%!rex.W jmp %A0";
26902 else
26903 xasm = "%!jmp\t%A0";
26904
26905 output_asm_insn (xasm, &call_op);
26906 return "";
26907 }
26908
26909 /* SEH unwinding can require an extra nop to be emitted in several
26910 circumstances. Determine if we have one of those. */
26911 if (TARGET_SEH)
26912 {
26913 rtx_insn *i;
26914
26915 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
26916 {
26917 /* If we get to another real insn, we don't need the nop. */
26918 if (INSN_P (i))
26919 break;
26920
26921 /* If we get to the epilogue note, prevent a catch region from
26922 being adjacent to the standard epilogue sequence. If non-
26923 call-exceptions, we'll have done this during epilogue emission. */
26924 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
26925 && !flag_non_call_exceptions
26926 && !can_throw_internal (insn))
26927 {
26928 seh_nop_p = true;
26929 break;
26930 }
26931 }
26932
26933 /* If we didn't find a real insn following the call, prevent the
26934 unwinder from looking into the next function. */
26935 if (i == NULL)
26936 seh_nop_p = true;
26937 }
26938
26939 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
26940 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
26941 else if (direct_p)
26942 xasm = "%!call\t%P0";
26943 else
26944 xasm = "%!call\t%A0";
26945
26946 output_asm_insn (xasm, &call_op);
26947
26948 if (seh_nop_p)
26949 return "nop";
26950
26951 return "";
26952 }
26953 \f
26954 /* Clear stack slot assignments remembered from previous functions.
26955 This is called from INIT_EXPANDERS once before RTL is emitted for each
26956 function. */
26957
26958 static struct machine_function *
26959 ix86_init_machine_status (void)
26960 {
26961 struct machine_function *f;
26962
26963 f = ggc_cleared_alloc<machine_function> ();
26964 f->use_fast_prologue_epilogue_nregs = -1;
26965 f->call_abi = ix86_abi;
26966
26967 return f;
26968 }
26969
26970 /* Return a MEM corresponding to a stack slot with mode MODE.
26971 Allocate a new slot if necessary.
26972
26973 The RTL for a function can have several slots available: N is
26974 which slot to use. */
26975
26976 rtx
26977 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
26978 {
26979 struct stack_local_entry *s;
26980
26981 gcc_assert (n < MAX_386_STACK_LOCALS);
26982
26983 for (s = ix86_stack_locals; s; s = s->next)
26984 if (s->mode == mode && s->n == n)
26985 return validize_mem (copy_rtx (s->rtl));
26986
26987 s = ggc_alloc<stack_local_entry> ();
26988 s->n = n;
26989 s->mode = mode;
26990 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
26991
26992 s->next = ix86_stack_locals;
26993 ix86_stack_locals = s;
26994 return validize_mem (copy_rtx (s->rtl));
26995 }
26996
26997 static void
26998 ix86_instantiate_decls (void)
26999 {
27000 struct stack_local_entry *s;
27001
27002 for (s = ix86_stack_locals; s; s = s->next)
27003 if (s->rtl != NULL_RTX)
27004 instantiate_decl_rtl (s->rtl);
27005 }
27006 \f
27007 /* Check whether x86 address PARTS is a pc-relative address. */
27008
27009 static bool
27010 rip_relative_addr_p (struct ix86_address *parts)
27011 {
27012 rtx base, index, disp;
27013
27014 base = parts->base;
27015 index = parts->index;
27016 disp = parts->disp;
27017
27018 if (disp && !base && !index)
27019 {
27020 if (TARGET_64BIT)
27021 {
27022 rtx symbol = disp;
27023
27024 if (GET_CODE (disp) == CONST)
27025 symbol = XEXP (disp, 0);
27026 if (GET_CODE (symbol) == PLUS
27027 && CONST_INT_P (XEXP (symbol, 1)))
27028 symbol = XEXP (symbol, 0);
27029
27030 if (GET_CODE (symbol) == LABEL_REF
27031 || (GET_CODE (symbol) == SYMBOL_REF
27032 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27033 || (GET_CODE (symbol) == UNSPEC
27034 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27035 || XINT (symbol, 1) == UNSPEC_PCREL
27036 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27037 return true;
27038 }
27039 }
27040 return false;
27041 }
27042
27043 /* Calculate the length of the memory address in the instruction encoding.
27044 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27045 or other prefixes. We never generate addr32 prefix for LEA insn. */
27046
27047 int
27048 memory_address_length (rtx addr, bool lea)
27049 {
27050 struct ix86_address parts;
27051 rtx base, index, disp;
27052 int len;
27053 int ok;
27054
27055 if (GET_CODE (addr) == PRE_DEC
27056 || GET_CODE (addr) == POST_INC
27057 || GET_CODE (addr) == PRE_MODIFY
27058 || GET_CODE (addr) == POST_MODIFY)
27059 return 0;
27060
27061 ok = ix86_decompose_address (addr, &parts);
27062 gcc_assert (ok);
27063
27064 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
27065
27066 /* If this is not LEA instruction, add the length of addr32 prefix. */
27067 if (TARGET_64BIT && !lea
27068 && (SImode_address_operand (addr, VOIDmode)
27069 || (parts.base && GET_MODE (parts.base) == SImode)
27070 || (parts.index && GET_MODE (parts.index) == SImode)))
27071 len++;
27072
27073 base = parts.base;
27074 index = parts.index;
27075 disp = parts.disp;
27076
27077 if (base && SUBREG_P (base))
27078 base = SUBREG_REG (base);
27079 if (index && SUBREG_P (index))
27080 index = SUBREG_REG (index);
27081
27082 gcc_assert (base == NULL_RTX || REG_P (base));
27083 gcc_assert (index == NULL_RTX || REG_P (index));
27084
27085 /* Rule of thumb:
27086 - esp as the base always wants an index,
27087 - ebp as the base always wants a displacement,
27088 - r12 as the base always wants an index,
27089 - r13 as the base always wants a displacement. */
27090
27091 /* Register Indirect. */
27092 if (base && !index && !disp)
27093 {
27094 /* esp (for its index) and ebp (for its displacement) need
27095 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27096 code. */
27097 if (base == arg_pointer_rtx
27098 || base == frame_pointer_rtx
27099 || REGNO (base) == SP_REG
27100 || REGNO (base) == BP_REG
27101 || REGNO (base) == R12_REG
27102 || REGNO (base) == R13_REG)
27103 len++;
27104 }
27105
27106 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27107 is not disp32, but disp32(%rip), so for disp32
27108 SIB byte is needed, unless print_operand_address
27109 optimizes it into disp32(%rip) or (%rip) is implied
27110 by UNSPEC. */
27111 else if (disp && !base && !index)
27112 {
27113 len += 4;
27114 if (rip_relative_addr_p (&parts))
27115 len++;
27116 }
27117 else
27118 {
27119 /* Find the length of the displacement constant. */
27120 if (disp)
27121 {
27122 if (base && satisfies_constraint_K (disp))
27123 len += 1;
27124 else
27125 len += 4;
27126 }
27127 /* ebp always wants a displacement. Similarly r13. */
27128 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27129 len++;
27130
27131 /* An index requires the two-byte modrm form.... */
27132 if (index
27133 /* ...like esp (or r12), which always wants an index. */
27134 || base == arg_pointer_rtx
27135 || base == frame_pointer_rtx
27136 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27137 len++;
27138 }
27139
27140 return len;
27141 }
27142
27143 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27144 is set, expect that insn have 8bit immediate alternative. */
27145 int
27146 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27147 {
27148 int len = 0;
27149 int i;
27150 extract_insn_cached (insn);
27151 for (i = recog_data.n_operands - 1; i >= 0; --i)
27152 if (CONSTANT_P (recog_data.operand[i]))
27153 {
27154 enum attr_mode mode = get_attr_mode (insn);
27155
27156 gcc_assert (!len);
27157 if (shortform && CONST_INT_P (recog_data.operand[i]))
27158 {
27159 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27160 switch (mode)
27161 {
27162 case MODE_QI:
27163 len = 1;
27164 continue;
27165 case MODE_HI:
27166 ival = trunc_int_for_mode (ival, HImode);
27167 break;
27168 case MODE_SI:
27169 ival = trunc_int_for_mode (ival, SImode);
27170 break;
27171 default:
27172 break;
27173 }
27174 if (IN_RANGE (ival, -128, 127))
27175 {
27176 len = 1;
27177 continue;
27178 }
27179 }
27180 switch (mode)
27181 {
27182 case MODE_QI:
27183 len = 1;
27184 break;
27185 case MODE_HI:
27186 len = 2;
27187 break;
27188 case MODE_SI:
27189 len = 4;
27190 break;
27191 /* Immediates for DImode instructions are encoded
27192 as 32bit sign extended values. */
27193 case MODE_DI:
27194 len = 4;
27195 break;
27196 default:
27197 fatal_insn ("unknown insn mode", insn);
27198 }
27199 }
27200 return len;
27201 }
27202
27203 /* Compute default value for "length_address" attribute. */
27204 int
27205 ix86_attr_length_address_default (rtx_insn *insn)
27206 {
27207 int i;
27208
27209 if (get_attr_type (insn) == TYPE_LEA)
27210 {
27211 rtx set = PATTERN (insn), addr;
27212
27213 if (GET_CODE (set) == PARALLEL)
27214 set = XVECEXP (set, 0, 0);
27215
27216 gcc_assert (GET_CODE (set) == SET);
27217
27218 addr = SET_SRC (set);
27219
27220 return memory_address_length (addr, true);
27221 }
27222
27223 extract_insn_cached (insn);
27224 for (i = recog_data.n_operands - 1; i >= 0; --i)
27225 if (MEM_P (recog_data.operand[i]))
27226 {
27227 constrain_operands_cached (insn, reload_completed);
27228 if (which_alternative != -1)
27229 {
27230 const char *constraints = recog_data.constraints[i];
27231 int alt = which_alternative;
27232
27233 while (*constraints == '=' || *constraints == '+')
27234 constraints++;
27235 while (alt-- > 0)
27236 while (*constraints++ != ',')
27237 ;
27238 /* Skip ignored operands. */
27239 if (*constraints == 'X')
27240 continue;
27241 }
27242 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
27243 }
27244 return 0;
27245 }
27246
27247 /* Compute default value for "length_vex" attribute. It includes
27248 2 or 3 byte VEX prefix and 1 opcode byte. */
27249
27250 int
27251 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27252 bool has_vex_w)
27253 {
27254 int i;
27255
27256 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27257 byte VEX prefix. */
27258 if (!has_0f_opcode || has_vex_w)
27259 return 3 + 1;
27260
27261 /* We can always use 2 byte VEX prefix in 32bit. */
27262 if (!TARGET_64BIT)
27263 return 2 + 1;
27264
27265 extract_insn_cached (insn);
27266
27267 for (i = recog_data.n_operands - 1; i >= 0; --i)
27268 if (REG_P (recog_data.operand[i]))
27269 {
27270 /* REX.W bit uses 3 byte VEX prefix. */
27271 if (GET_MODE (recog_data.operand[i]) == DImode
27272 && GENERAL_REG_P (recog_data.operand[i]))
27273 return 3 + 1;
27274 }
27275 else
27276 {
27277 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27278 if (MEM_P (recog_data.operand[i])
27279 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27280 return 3 + 1;
27281 }
27282
27283 return 2 + 1;
27284 }
27285 \f
27286 /* Return the maximum number of instructions a cpu can issue. */
27287
27288 static int
27289 ix86_issue_rate (void)
27290 {
27291 switch (ix86_tune)
27292 {
27293 case PROCESSOR_PENTIUM:
27294 case PROCESSOR_LAKEMONT:
27295 case PROCESSOR_BONNELL:
27296 case PROCESSOR_SILVERMONT:
27297 case PROCESSOR_KNL:
27298 case PROCESSOR_INTEL:
27299 case PROCESSOR_K6:
27300 case PROCESSOR_BTVER2:
27301 case PROCESSOR_PENTIUM4:
27302 case PROCESSOR_NOCONA:
27303 return 2;
27304
27305 case PROCESSOR_PENTIUMPRO:
27306 case PROCESSOR_ATHLON:
27307 case PROCESSOR_K8:
27308 case PROCESSOR_AMDFAM10:
27309 case PROCESSOR_GENERIC:
27310 case PROCESSOR_BTVER1:
27311 return 3;
27312
27313 case PROCESSOR_BDVER1:
27314 case PROCESSOR_BDVER2:
27315 case PROCESSOR_BDVER3:
27316 case PROCESSOR_BDVER4:
27317 case PROCESSOR_ZNVER1:
27318 case PROCESSOR_CORE2:
27319 case PROCESSOR_NEHALEM:
27320 case PROCESSOR_SANDYBRIDGE:
27321 case PROCESSOR_HASWELL:
27322 return 4;
27323
27324 default:
27325 return 1;
27326 }
27327 }
27328
27329 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27330 by DEP_INSN and nothing set by DEP_INSN. */
27331
27332 static bool
27333 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27334 {
27335 rtx set, set2;
27336
27337 /* Simplify the test for uninteresting insns. */
27338 if (insn_type != TYPE_SETCC
27339 && insn_type != TYPE_ICMOV
27340 && insn_type != TYPE_FCMOV
27341 && insn_type != TYPE_IBR)
27342 return false;
27343
27344 if ((set = single_set (dep_insn)) != 0)
27345 {
27346 set = SET_DEST (set);
27347 set2 = NULL_RTX;
27348 }
27349 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27350 && XVECLEN (PATTERN (dep_insn), 0) == 2
27351 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27352 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27353 {
27354 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27355 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27356 }
27357 else
27358 return false;
27359
27360 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27361 return false;
27362
27363 /* This test is true if the dependent insn reads the flags but
27364 not any other potentially set register. */
27365 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27366 return false;
27367
27368 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27369 return false;
27370
27371 return true;
27372 }
27373
27374 /* Return true iff USE_INSN has a memory address with operands set by
27375 SET_INSN. */
27376
27377 bool
27378 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27379 {
27380 int i;
27381 extract_insn_cached (use_insn);
27382 for (i = recog_data.n_operands - 1; i >= 0; --i)
27383 if (MEM_P (recog_data.operand[i]))
27384 {
27385 rtx addr = XEXP (recog_data.operand[i], 0);
27386 return modified_in_p (addr, set_insn) != 0;
27387 }
27388 return false;
27389 }
27390
27391 /* Helper function for exact_store_load_dependency.
27392 Return true if addr is found in insn. */
27393 static bool
27394 exact_dependency_1 (rtx addr, rtx insn)
27395 {
27396 enum rtx_code code;
27397 const char *format_ptr;
27398 int i, j;
27399
27400 code = GET_CODE (insn);
27401 switch (code)
27402 {
27403 case MEM:
27404 if (rtx_equal_p (addr, insn))
27405 return true;
27406 break;
27407 case REG:
27408 CASE_CONST_ANY:
27409 case SYMBOL_REF:
27410 case CODE_LABEL:
27411 case PC:
27412 case CC0:
27413 case EXPR_LIST:
27414 return false;
27415 default:
27416 break;
27417 }
27418
27419 format_ptr = GET_RTX_FORMAT (code);
27420 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27421 {
27422 switch (*format_ptr++)
27423 {
27424 case 'e':
27425 if (exact_dependency_1 (addr, XEXP (insn, i)))
27426 return true;
27427 break;
27428 case 'E':
27429 for (j = 0; j < XVECLEN (insn, i); j++)
27430 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27431 return true;
27432 break;
27433 }
27434 }
27435 return false;
27436 }
27437
27438 /* Return true if there exists exact dependency for store & load, i.e.
27439 the same memory address is used in them. */
27440 static bool
27441 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27442 {
27443 rtx set1, set2;
27444
27445 set1 = single_set (store);
27446 if (!set1)
27447 return false;
27448 if (!MEM_P (SET_DEST (set1)))
27449 return false;
27450 set2 = single_set (load);
27451 if (!set2)
27452 return false;
27453 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27454 return true;
27455 return false;
27456 }
27457
27458 static int
27459 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27460 {
27461 enum attr_type insn_type, dep_insn_type;
27462 enum attr_memory memory;
27463 rtx set, set2;
27464 int dep_insn_code_number;
27465
27466 /* Anti and output dependencies have zero cost on all CPUs. */
27467 if (REG_NOTE_KIND (link) != 0)
27468 return 0;
27469
27470 dep_insn_code_number = recog_memoized (dep_insn);
27471
27472 /* If we can't recognize the insns, we can't really do anything. */
27473 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27474 return cost;
27475
27476 insn_type = get_attr_type (insn);
27477 dep_insn_type = get_attr_type (dep_insn);
27478
27479 switch (ix86_tune)
27480 {
27481 case PROCESSOR_PENTIUM:
27482 case PROCESSOR_LAKEMONT:
27483 /* Address Generation Interlock adds a cycle of latency. */
27484 if (insn_type == TYPE_LEA)
27485 {
27486 rtx addr = PATTERN (insn);
27487
27488 if (GET_CODE (addr) == PARALLEL)
27489 addr = XVECEXP (addr, 0, 0);
27490
27491 gcc_assert (GET_CODE (addr) == SET);
27492
27493 addr = SET_SRC (addr);
27494 if (modified_in_p (addr, dep_insn))
27495 cost += 1;
27496 }
27497 else if (ix86_agi_dependent (dep_insn, insn))
27498 cost += 1;
27499
27500 /* ??? Compares pair with jump/setcc. */
27501 if (ix86_flags_dependent (insn, dep_insn, insn_type))
27502 cost = 0;
27503
27504 /* Floating point stores require value to be ready one cycle earlier. */
27505 if (insn_type == TYPE_FMOV
27506 && get_attr_memory (insn) == MEMORY_STORE
27507 && !ix86_agi_dependent (dep_insn, insn))
27508 cost += 1;
27509 break;
27510
27511 case PROCESSOR_PENTIUMPRO:
27512 /* INT->FP conversion is expensive. */
27513 if (get_attr_fp_int_src (dep_insn))
27514 cost += 5;
27515
27516 /* There is one cycle extra latency between an FP op and a store. */
27517 if (insn_type == TYPE_FMOV
27518 && (set = single_set (dep_insn)) != NULL_RTX
27519 && (set2 = single_set (insn)) != NULL_RTX
27520 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
27521 && MEM_P (SET_DEST (set2)))
27522 cost += 1;
27523
27524 memory = get_attr_memory (insn);
27525
27526 /* Show ability of reorder buffer to hide latency of load by executing
27527 in parallel with previous instruction in case
27528 previous instruction is not needed to compute the address. */
27529 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27530 && !ix86_agi_dependent (dep_insn, insn))
27531 {
27532 /* Claim moves to take one cycle, as core can issue one load
27533 at time and the next load can start cycle later. */
27534 if (dep_insn_type == TYPE_IMOV
27535 || dep_insn_type == TYPE_FMOV)
27536 cost = 1;
27537 else if (cost > 1)
27538 cost--;
27539 }
27540 break;
27541
27542 case PROCESSOR_K6:
27543 /* The esp dependency is resolved before
27544 the instruction is really finished. */
27545 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27546 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
27547 return 1;
27548
27549 /* INT->FP conversion is expensive. */
27550 if (get_attr_fp_int_src (dep_insn))
27551 cost += 5;
27552
27553 memory = get_attr_memory (insn);
27554
27555 /* Show ability of reorder buffer to hide latency of load by executing
27556 in parallel with previous instruction in case
27557 previous instruction is not needed to compute the address. */
27558 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27559 && !ix86_agi_dependent (dep_insn, insn))
27560 {
27561 /* Claim moves to take one cycle, as core can issue one load
27562 at time and the next load can start cycle later. */
27563 if (dep_insn_type == TYPE_IMOV
27564 || dep_insn_type == TYPE_FMOV)
27565 cost = 1;
27566 else if (cost > 2)
27567 cost -= 2;
27568 else
27569 cost = 1;
27570 }
27571 break;
27572
27573 case PROCESSOR_AMDFAM10:
27574 case PROCESSOR_BDVER1:
27575 case PROCESSOR_BDVER2:
27576 case PROCESSOR_BDVER3:
27577 case PROCESSOR_BDVER4:
27578 case PROCESSOR_ZNVER1:
27579 case PROCESSOR_BTVER1:
27580 case PROCESSOR_BTVER2:
27581 case PROCESSOR_GENERIC:
27582 /* Stack engine allows to execute push&pop instructions in parall. */
27583 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27584 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
27585 return 0;
27586 /* FALLTHRU */
27587
27588 case PROCESSOR_ATHLON:
27589 case PROCESSOR_K8:
27590 memory = get_attr_memory (insn);
27591
27592 /* Show ability of reorder buffer to hide latency of load by executing
27593 in parallel with previous instruction in case
27594 previous instruction is not needed to compute the address. */
27595 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27596 && !ix86_agi_dependent (dep_insn, insn))
27597 {
27598 enum attr_unit unit = get_attr_unit (insn);
27599 int loadcost = 3;
27600
27601 /* Because of the difference between the length of integer and
27602 floating unit pipeline preparation stages, the memory operands
27603 for floating point are cheaper.
27604
27605 ??? For Athlon it the difference is most probably 2. */
27606 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
27607 loadcost = 3;
27608 else
27609 loadcost = TARGET_ATHLON ? 2 : 0;
27610
27611 if (cost >= loadcost)
27612 cost -= loadcost;
27613 else
27614 cost = 0;
27615 }
27616 break;
27617
27618 case PROCESSOR_CORE2:
27619 case PROCESSOR_NEHALEM:
27620 case PROCESSOR_SANDYBRIDGE:
27621 case PROCESSOR_HASWELL:
27622 /* Stack engine allows to execute push&pop instructions in parall. */
27623 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27624 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
27625 return 0;
27626
27627 memory = get_attr_memory (insn);
27628
27629 /* Show ability of reorder buffer to hide latency of load by executing
27630 in parallel with previous instruction in case
27631 previous instruction is not needed to compute the address. */
27632 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27633 && !ix86_agi_dependent (dep_insn, insn))
27634 {
27635 if (cost >= 4)
27636 cost -= 4;
27637 else
27638 cost = 0;
27639 }
27640 break;
27641
27642 case PROCESSOR_SILVERMONT:
27643 case PROCESSOR_KNL:
27644 case PROCESSOR_INTEL:
27645 if (!reload_completed)
27646 return cost;
27647
27648 /* Increase cost of integer loads. */
27649 memory = get_attr_memory (dep_insn);
27650 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27651 {
27652 enum attr_unit unit = get_attr_unit (dep_insn);
27653 if (unit == UNIT_INTEGER && cost == 1)
27654 {
27655 if (memory == MEMORY_LOAD)
27656 cost = 3;
27657 else
27658 {
27659 /* Increase cost of ld/st for short int types only
27660 because of store forwarding issue. */
27661 rtx set = single_set (dep_insn);
27662 if (set && (GET_MODE (SET_DEST (set)) == QImode
27663 || GET_MODE (SET_DEST (set)) == HImode))
27664 {
27665 /* Increase cost of store/load insn if exact
27666 dependence exists and it is load insn. */
27667 enum attr_memory insn_memory = get_attr_memory (insn);
27668 if (insn_memory == MEMORY_LOAD
27669 && exact_store_load_dependency (dep_insn, insn))
27670 cost = 3;
27671 }
27672 }
27673 }
27674 }
27675
27676 default:
27677 break;
27678 }
27679
27680 return cost;
27681 }
27682
27683 /* How many alternative schedules to try. This should be as wide as the
27684 scheduling freedom in the DFA, but no wider. Making this value too
27685 large results extra work for the scheduler. */
27686
27687 static int
27688 ia32_multipass_dfa_lookahead (void)
27689 {
27690 switch (ix86_tune)
27691 {
27692 case PROCESSOR_PENTIUM:
27693 case PROCESSOR_LAKEMONT:
27694 return 2;
27695
27696 case PROCESSOR_PENTIUMPRO:
27697 case PROCESSOR_K6:
27698 return 1;
27699
27700 case PROCESSOR_BDVER1:
27701 case PROCESSOR_BDVER2:
27702 case PROCESSOR_BDVER3:
27703 case PROCESSOR_BDVER4:
27704 /* We use lookahead value 4 for BD both before and after reload
27705 schedules. Plan is to have value 8 included for O3. */
27706 return 4;
27707
27708 case PROCESSOR_CORE2:
27709 case PROCESSOR_NEHALEM:
27710 case PROCESSOR_SANDYBRIDGE:
27711 case PROCESSOR_HASWELL:
27712 case PROCESSOR_BONNELL:
27713 case PROCESSOR_SILVERMONT:
27714 case PROCESSOR_KNL:
27715 case PROCESSOR_INTEL:
27716 /* Generally, we want haifa-sched:max_issue() to look ahead as far
27717 as many instructions can be executed on a cycle, i.e.,
27718 issue_rate. I wonder why tuning for many CPUs does not do this. */
27719 if (reload_completed)
27720 return ix86_issue_rate ();
27721 /* Don't use lookahead for pre-reload schedule to save compile time. */
27722 return 0;
27723
27724 default:
27725 return 0;
27726 }
27727 }
27728
27729 /* Return true if target platform supports macro-fusion. */
27730
27731 static bool
27732 ix86_macro_fusion_p ()
27733 {
27734 return TARGET_FUSE_CMP_AND_BRANCH;
27735 }
27736
27737 /* Check whether current microarchitecture support macro fusion
27738 for insn pair "CONDGEN + CONDJMP". Refer to
27739 "Intel Architectures Optimization Reference Manual". */
27740
27741 static bool
27742 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
27743 {
27744 rtx src, dest;
27745 enum rtx_code ccode;
27746 rtx compare_set = NULL_RTX, test_if, cond;
27747 rtx alu_set = NULL_RTX, addr = NULL_RTX;
27748
27749 if (!any_condjump_p (condjmp))
27750 return false;
27751
27752 if (get_attr_type (condgen) != TYPE_TEST
27753 && get_attr_type (condgen) != TYPE_ICMP
27754 && get_attr_type (condgen) != TYPE_INCDEC
27755 && get_attr_type (condgen) != TYPE_ALU)
27756 return false;
27757
27758 compare_set = single_set (condgen);
27759 if (compare_set == NULL_RTX
27760 && !TARGET_FUSE_ALU_AND_BRANCH)
27761 return false;
27762
27763 if (compare_set == NULL_RTX)
27764 {
27765 int i;
27766 rtx pat = PATTERN (condgen);
27767 for (i = 0; i < XVECLEN (pat, 0); i++)
27768 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27769 {
27770 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
27771 if (GET_CODE (set_src) == COMPARE)
27772 compare_set = XVECEXP (pat, 0, i);
27773 else
27774 alu_set = XVECEXP (pat, 0, i);
27775 }
27776 }
27777 if (compare_set == NULL_RTX)
27778 return false;
27779 src = SET_SRC (compare_set);
27780 if (GET_CODE (src) != COMPARE)
27781 return false;
27782
27783 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
27784 supported. */
27785 if ((MEM_P (XEXP (src, 0))
27786 && CONST_INT_P (XEXP (src, 1)))
27787 || (MEM_P (XEXP (src, 1))
27788 && CONST_INT_P (XEXP (src, 0))))
27789 return false;
27790
27791 /* No fusion for RIP-relative address. */
27792 if (MEM_P (XEXP (src, 0)))
27793 addr = XEXP (XEXP (src, 0), 0);
27794 else if (MEM_P (XEXP (src, 1)))
27795 addr = XEXP (XEXP (src, 1), 0);
27796
27797 if (addr) {
27798 ix86_address parts;
27799 int ok = ix86_decompose_address (addr, &parts);
27800 gcc_assert (ok);
27801
27802 if (rip_relative_addr_p (&parts))
27803 return false;
27804 }
27805
27806 test_if = SET_SRC (pc_set (condjmp));
27807 cond = XEXP (test_if, 0);
27808 ccode = GET_CODE (cond);
27809 /* Check whether conditional jump use Sign or Overflow Flags. */
27810 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
27811 && (ccode == GE
27812 || ccode == GT
27813 || ccode == LE
27814 || ccode == LT))
27815 return false;
27816
27817 /* Return true for TYPE_TEST and TYPE_ICMP. */
27818 if (get_attr_type (condgen) == TYPE_TEST
27819 || get_attr_type (condgen) == TYPE_ICMP)
27820 return true;
27821
27822 /* The following is the case that macro-fusion for alu + jmp. */
27823 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
27824 return false;
27825
27826 /* No fusion for alu op with memory destination operand. */
27827 dest = SET_DEST (alu_set);
27828 if (MEM_P (dest))
27829 return false;
27830
27831 /* Macro-fusion for inc/dec + unsigned conditional jump is not
27832 supported. */
27833 if (get_attr_type (condgen) == TYPE_INCDEC
27834 && (ccode == GEU
27835 || ccode == GTU
27836 || ccode == LEU
27837 || ccode == LTU))
27838 return false;
27839
27840 return true;
27841 }
27842
27843 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
27844 execution. It is applied if
27845 (1) IMUL instruction is on the top of list;
27846 (2) There exists the only producer of independent IMUL instruction in
27847 ready list.
27848 Return index of IMUL producer if it was found and -1 otherwise. */
27849 static int
27850 do_reorder_for_imul (rtx_insn **ready, int n_ready)
27851 {
27852 rtx_insn *insn;
27853 rtx set, insn1, insn2;
27854 sd_iterator_def sd_it;
27855 dep_t dep;
27856 int index = -1;
27857 int i;
27858
27859 if (!TARGET_BONNELL)
27860 return index;
27861
27862 /* Check that IMUL instruction is on the top of ready list. */
27863 insn = ready[n_ready - 1];
27864 set = single_set (insn);
27865 if (!set)
27866 return index;
27867 if (!(GET_CODE (SET_SRC (set)) == MULT
27868 && GET_MODE (SET_SRC (set)) == SImode))
27869 return index;
27870
27871 /* Search for producer of independent IMUL instruction. */
27872 for (i = n_ready - 2; i >= 0; i--)
27873 {
27874 insn = ready[i];
27875 if (!NONDEBUG_INSN_P (insn))
27876 continue;
27877 /* Skip IMUL instruction. */
27878 insn2 = PATTERN (insn);
27879 if (GET_CODE (insn2) == PARALLEL)
27880 insn2 = XVECEXP (insn2, 0, 0);
27881 if (GET_CODE (insn2) == SET
27882 && GET_CODE (SET_SRC (insn2)) == MULT
27883 && GET_MODE (SET_SRC (insn2)) == SImode)
27884 continue;
27885
27886 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
27887 {
27888 rtx con;
27889 con = DEP_CON (dep);
27890 if (!NONDEBUG_INSN_P (con))
27891 continue;
27892 insn1 = PATTERN (con);
27893 if (GET_CODE (insn1) == PARALLEL)
27894 insn1 = XVECEXP (insn1, 0, 0);
27895
27896 if (GET_CODE (insn1) == SET
27897 && GET_CODE (SET_SRC (insn1)) == MULT
27898 && GET_MODE (SET_SRC (insn1)) == SImode)
27899 {
27900 sd_iterator_def sd_it1;
27901 dep_t dep1;
27902 /* Check if there is no other dependee for IMUL. */
27903 index = i;
27904 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
27905 {
27906 rtx pro;
27907 pro = DEP_PRO (dep1);
27908 if (!NONDEBUG_INSN_P (pro))
27909 continue;
27910 if (pro != insn)
27911 index = -1;
27912 }
27913 if (index >= 0)
27914 break;
27915 }
27916 }
27917 if (index >= 0)
27918 break;
27919 }
27920 return index;
27921 }
27922
27923 /* Try to find the best candidate on the top of ready list if two insns
27924 have the same priority - candidate is best if its dependees were
27925 scheduled earlier. Applied for Silvermont only.
27926 Return true if top 2 insns must be interchanged. */
27927 static bool
27928 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
27929 {
27930 rtx_insn *top = ready[n_ready - 1];
27931 rtx_insn *next = ready[n_ready - 2];
27932 rtx set;
27933 sd_iterator_def sd_it;
27934 dep_t dep;
27935 int clock1 = -1;
27936 int clock2 = -1;
27937 #define INSN_TICK(INSN) (HID (INSN)->tick)
27938
27939 if (!TARGET_SILVERMONT && !TARGET_INTEL)
27940 return false;
27941
27942 if (!NONDEBUG_INSN_P (top))
27943 return false;
27944 if (!NONJUMP_INSN_P (top))
27945 return false;
27946 if (!NONDEBUG_INSN_P (next))
27947 return false;
27948 if (!NONJUMP_INSN_P (next))
27949 return false;
27950 set = single_set (top);
27951 if (!set)
27952 return false;
27953 set = single_set (next);
27954 if (!set)
27955 return false;
27956
27957 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
27958 {
27959 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
27960 return false;
27961 /* Determine winner more precise. */
27962 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
27963 {
27964 rtx pro;
27965 pro = DEP_PRO (dep);
27966 if (!NONDEBUG_INSN_P (pro))
27967 continue;
27968 if (INSN_TICK (pro) > clock1)
27969 clock1 = INSN_TICK (pro);
27970 }
27971 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
27972 {
27973 rtx pro;
27974 pro = DEP_PRO (dep);
27975 if (!NONDEBUG_INSN_P (pro))
27976 continue;
27977 if (INSN_TICK (pro) > clock2)
27978 clock2 = INSN_TICK (pro);
27979 }
27980
27981 if (clock1 == clock2)
27982 {
27983 /* Determine winner - load must win. */
27984 enum attr_memory memory1, memory2;
27985 memory1 = get_attr_memory (top);
27986 memory2 = get_attr_memory (next);
27987 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
27988 return true;
27989 }
27990 return (bool) (clock2 < clock1);
27991 }
27992 return false;
27993 #undef INSN_TICK
27994 }
27995
27996 /* Perform possible reodering of ready list for Atom/Silvermont only.
27997 Return issue rate. */
27998 static int
27999 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28000 int *pn_ready, int clock_var)
28001 {
28002 int issue_rate = -1;
28003 int n_ready = *pn_ready;
28004 int i;
28005 rtx_insn *insn;
28006 int index = -1;
28007
28008 /* Set up issue rate. */
28009 issue_rate = ix86_issue_rate ();
28010
28011 /* Do reodering for BONNELL/SILVERMONT only. */
28012 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28013 return issue_rate;
28014
28015 /* Nothing to do if ready list contains only 1 instruction. */
28016 if (n_ready <= 1)
28017 return issue_rate;
28018
28019 /* Do reodering for post-reload scheduler only. */
28020 if (!reload_completed)
28021 return issue_rate;
28022
28023 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28024 {
28025 if (sched_verbose > 1)
28026 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28027 INSN_UID (ready[index]));
28028
28029 /* Put IMUL producer (ready[index]) at the top of ready list. */
28030 insn = ready[index];
28031 for (i = index; i < n_ready - 1; i++)
28032 ready[i] = ready[i + 1];
28033 ready[n_ready - 1] = insn;
28034 return issue_rate;
28035 }
28036
28037 /* Skip selective scheduling since HID is not populated in it. */
28038 if (clock_var != 0
28039 && !sel_sched_p ()
28040 && swap_top_of_ready_list (ready, n_ready))
28041 {
28042 if (sched_verbose > 1)
28043 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28044 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28045 /* Swap 2 top elements of ready list. */
28046 insn = ready[n_ready - 1];
28047 ready[n_ready - 1] = ready[n_ready - 2];
28048 ready[n_ready - 2] = insn;
28049 }
28050 return issue_rate;
28051 }
28052
28053 static bool
28054 ix86_class_likely_spilled_p (reg_class_t);
28055
28056 /* Returns true if lhs of insn is HW function argument register and set up
28057 is_spilled to true if it is likely spilled HW register. */
28058 static bool
28059 insn_is_function_arg (rtx insn, bool* is_spilled)
28060 {
28061 rtx dst;
28062
28063 if (!NONDEBUG_INSN_P (insn))
28064 return false;
28065 /* Call instructions are not movable, ignore it. */
28066 if (CALL_P (insn))
28067 return false;
28068 insn = PATTERN (insn);
28069 if (GET_CODE (insn) == PARALLEL)
28070 insn = XVECEXP (insn, 0, 0);
28071 if (GET_CODE (insn) != SET)
28072 return false;
28073 dst = SET_DEST (insn);
28074 if (REG_P (dst) && HARD_REGISTER_P (dst)
28075 && ix86_function_arg_regno_p (REGNO (dst)))
28076 {
28077 /* Is it likely spilled HW register? */
28078 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28079 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28080 *is_spilled = true;
28081 return true;
28082 }
28083 return false;
28084 }
28085
28086 /* Add output dependencies for chain of function adjacent arguments if only
28087 there is a move to likely spilled HW register. Return first argument
28088 if at least one dependence was added or NULL otherwise. */
28089 static rtx_insn *
28090 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28091 {
28092 rtx_insn *insn;
28093 rtx_insn *last = call;
28094 rtx_insn *first_arg = NULL;
28095 bool is_spilled = false;
28096
28097 head = PREV_INSN (head);
28098
28099 /* Find nearest to call argument passing instruction. */
28100 while (true)
28101 {
28102 last = PREV_INSN (last);
28103 if (last == head)
28104 return NULL;
28105 if (!NONDEBUG_INSN_P (last))
28106 continue;
28107 if (insn_is_function_arg (last, &is_spilled))
28108 break;
28109 return NULL;
28110 }
28111
28112 first_arg = last;
28113 while (true)
28114 {
28115 insn = PREV_INSN (last);
28116 if (!INSN_P (insn))
28117 break;
28118 if (insn == head)
28119 break;
28120 if (!NONDEBUG_INSN_P (insn))
28121 {
28122 last = insn;
28123 continue;
28124 }
28125 if (insn_is_function_arg (insn, &is_spilled))
28126 {
28127 /* Add output depdendence between two function arguments if chain
28128 of output arguments contains likely spilled HW registers. */
28129 if (is_spilled)
28130 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28131 first_arg = last = insn;
28132 }
28133 else
28134 break;
28135 }
28136 if (!is_spilled)
28137 return NULL;
28138 return first_arg;
28139 }
28140
28141 /* Add output or anti dependency from insn to first_arg to restrict its code
28142 motion. */
28143 static void
28144 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28145 {
28146 rtx set;
28147 rtx tmp;
28148
28149 /* Add anti dependencies for bounds stores. */
28150 if (INSN_P (insn)
28151 && GET_CODE (PATTERN (insn)) == PARALLEL
28152 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28153 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28154 {
28155 add_dependence (first_arg, insn, REG_DEP_ANTI);
28156 return;
28157 }
28158
28159 set = single_set (insn);
28160 if (!set)
28161 return;
28162 tmp = SET_DEST (set);
28163 if (REG_P (tmp))
28164 {
28165 /* Add output dependency to the first function argument. */
28166 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28167 return;
28168 }
28169 /* Add anti dependency. */
28170 add_dependence (first_arg, insn, REG_DEP_ANTI);
28171 }
28172
28173 /* Avoid cross block motion of function argument through adding dependency
28174 from the first non-jump instruction in bb. */
28175 static void
28176 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28177 {
28178 rtx_insn *insn = BB_END (bb);
28179
28180 while (insn)
28181 {
28182 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28183 {
28184 rtx set = single_set (insn);
28185 if (set)
28186 {
28187 avoid_func_arg_motion (arg, insn);
28188 return;
28189 }
28190 }
28191 if (insn == BB_HEAD (bb))
28192 return;
28193 insn = PREV_INSN (insn);
28194 }
28195 }
28196
28197 /* Hook for pre-reload schedule - avoid motion of function arguments
28198 passed in likely spilled HW registers. */
28199 static void
28200 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28201 {
28202 rtx_insn *insn;
28203 rtx_insn *first_arg = NULL;
28204 if (reload_completed)
28205 return;
28206 while (head != tail && DEBUG_INSN_P (head))
28207 head = NEXT_INSN (head);
28208 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28209 if (INSN_P (insn) && CALL_P (insn))
28210 {
28211 first_arg = add_parameter_dependencies (insn, head);
28212 if (first_arg)
28213 {
28214 /* Add dependee for first argument to predecessors if only
28215 region contains more than one block. */
28216 basic_block bb = BLOCK_FOR_INSN (insn);
28217 int rgn = CONTAINING_RGN (bb->index);
28218 int nr_blks = RGN_NR_BLOCKS (rgn);
28219 /* Skip trivial regions and region head blocks that can have
28220 predecessors outside of region. */
28221 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28222 {
28223 edge e;
28224 edge_iterator ei;
28225
28226 /* Regions are SCCs with the exception of selective
28227 scheduling with pipelining of outer blocks enabled.
28228 So also check that immediate predecessors of a non-head
28229 block are in the same region. */
28230 FOR_EACH_EDGE (e, ei, bb->preds)
28231 {
28232 /* Avoid creating of loop-carried dependencies through
28233 using topological ordering in the region. */
28234 if (rgn == CONTAINING_RGN (e->src->index)
28235 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28236 add_dependee_for_func_arg (first_arg, e->src);
28237 }
28238 }
28239 insn = first_arg;
28240 if (insn == head)
28241 break;
28242 }
28243 }
28244 else if (first_arg)
28245 avoid_func_arg_motion (first_arg, insn);
28246 }
28247
28248 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28249 HW registers to maximum, to schedule them at soon as possible. These are
28250 moves from function argument registers at the top of the function entry
28251 and moves from function return value registers after call. */
28252 static int
28253 ix86_adjust_priority (rtx_insn *insn, int priority)
28254 {
28255 rtx set;
28256
28257 if (reload_completed)
28258 return priority;
28259
28260 if (!NONDEBUG_INSN_P (insn))
28261 return priority;
28262
28263 set = single_set (insn);
28264 if (set)
28265 {
28266 rtx tmp = SET_SRC (set);
28267 if (REG_P (tmp)
28268 && HARD_REGISTER_P (tmp)
28269 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28270 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28271 return current_sched_info->sched_max_insns_priority;
28272 }
28273
28274 return priority;
28275 }
28276
28277 /* Model decoder of Core 2/i7.
28278 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28279 track the instruction fetch block boundaries and make sure that long
28280 (9+ bytes) instructions are assigned to D0. */
28281
28282 /* Maximum length of an insn that can be handled by
28283 a secondary decoder unit. '8' for Core 2/i7. */
28284 static int core2i7_secondary_decoder_max_insn_size;
28285
28286 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28287 '16' for Core 2/i7. */
28288 static int core2i7_ifetch_block_size;
28289
28290 /* Maximum number of instructions decoder can handle per cycle.
28291 '6' for Core 2/i7. */
28292 static int core2i7_ifetch_block_max_insns;
28293
28294 typedef struct ix86_first_cycle_multipass_data_ *
28295 ix86_first_cycle_multipass_data_t;
28296 typedef const struct ix86_first_cycle_multipass_data_ *
28297 const_ix86_first_cycle_multipass_data_t;
28298
28299 /* A variable to store target state across calls to max_issue within
28300 one cycle. */
28301 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28302 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28303
28304 /* Initialize DATA. */
28305 static void
28306 core2i7_first_cycle_multipass_init (void *_data)
28307 {
28308 ix86_first_cycle_multipass_data_t data
28309 = (ix86_first_cycle_multipass_data_t) _data;
28310
28311 data->ifetch_block_len = 0;
28312 data->ifetch_block_n_insns = 0;
28313 data->ready_try_change = NULL;
28314 data->ready_try_change_size = 0;
28315 }
28316
28317 /* Advancing the cycle; reset ifetch block counts. */
28318 static void
28319 core2i7_dfa_post_advance_cycle (void)
28320 {
28321 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28322
28323 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28324
28325 data->ifetch_block_len = 0;
28326 data->ifetch_block_n_insns = 0;
28327 }
28328
28329 static int min_insn_size (rtx_insn *);
28330
28331 /* Filter out insns from ready_try that the core will not be able to issue
28332 on current cycle due to decoder. */
28333 static void
28334 core2i7_first_cycle_multipass_filter_ready_try
28335 (const_ix86_first_cycle_multipass_data_t data,
28336 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28337 {
28338 while (n_ready--)
28339 {
28340 rtx_insn *insn;
28341 int insn_size;
28342
28343 if (ready_try[n_ready])
28344 continue;
28345
28346 insn = get_ready_element (n_ready);
28347 insn_size = min_insn_size (insn);
28348
28349 if (/* If this is a too long an insn for a secondary decoder ... */
28350 (!first_cycle_insn_p
28351 && insn_size > core2i7_secondary_decoder_max_insn_size)
28352 /* ... or it would not fit into the ifetch block ... */
28353 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28354 /* ... or the decoder is full already ... */
28355 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28356 /* ... mask the insn out. */
28357 {
28358 ready_try[n_ready] = 1;
28359
28360 if (data->ready_try_change)
28361 bitmap_set_bit (data->ready_try_change, n_ready);
28362 }
28363 }
28364 }
28365
28366 /* Prepare for a new round of multipass lookahead scheduling. */
28367 static void
28368 core2i7_first_cycle_multipass_begin (void *_data,
28369 signed char *ready_try, int n_ready,
28370 bool first_cycle_insn_p)
28371 {
28372 ix86_first_cycle_multipass_data_t data
28373 = (ix86_first_cycle_multipass_data_t) _data;
28374 const_ix86_first_cycle_multipass_data_t prev_data
28375 = ix86_first_cycle_multipass_data;
28376
28377 /* Restore the state from the end of the previous round. */
28378 data->ifetch_block_len = prev_data->ifetch_block_len;
28379 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28380
28381 /* Filter instructions that cannot be issued on current cycle due to
28382 decoder restrictions. */
28383 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28384 first_cycle_insn_p);
28385 }
28386
28387 /* INSN is being issued in current solution. Account for its impact on
28388 the decoder model. */
28389 static void
28390 core2i7_first_cycle_multipass_issue (void *_data,
28391 signed char *ready_try, int n_ready,
28392 rtx_insn *insn, const void *_prev_data)
28393 {
28394 ix86_first_cycle_multipass_data_t data
28395 = (ix86_first_cycle_multipass_data_t) _data;
28396 const_ix86_first_cycle_multipass_data_t prev_data
28397 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28398
28399 int insn_size = min_insn_size (insn);
28400
28401 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28402 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28403 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28404 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28405
28406 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28407 if (!data->ready_try_change)
28408 {
28409 data->ready_try_change = sbitmap_alloc (n_ready);
28410 data->ready_try_change_size = n_ready;
28411 }
28412 else if (data->ready_try_change_size < n_ready)
28413 {
28414 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28415 n_ready, 0);
28416 data->ready_try_change_size = n_ready;
28417 }
28418 bitmap_clear (data->ready_try_change);
28419
28420 /* Filter out insns from ready_try that the core will not be able to issue
28421 on current cycle due to decoder. */
28422 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28423 false);
28424 }
28425
28426 /* Revert the effect on ready_try. */
28427 static void
28428 core2i7_first_cycle_multipass_backtrack (const void *_data,
28429 signed char *ready_try,
28430 int n_ready ATTRIBUTE_UNUSED)
28431 {
28432 const_ix86_first_cycle_multipass_data_t data
28433 = (const_ix86_first_cycle_multipass_data_t) _data;
28434 unsigned int i = 0;
28435 sbitmap_iterator sbi;
28436
28437 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28438 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28439 {
28440 ready_try[i] = 0;
28441 }
28442 }
28443
28444 /* Save the result of multipass lookahead scheduling for the next round. */
28445 static void
28446 core2i7_first_cycle_multipass_end (const void *_data)
28447 {
28448 const_ix86_first_cycle_multipass_data_t data
28449 = (const_ix86_first_cycle_multipass_data_t) _data;
28450 ix86_first_cycle_multipass_data_t next_data
28451 = ix86_first_cycle_multipass_data;
28452
28453 if (data != NULL)
28454 {
28455 next_data->ifetch_block_len = data->ifetch_block_len;
28456 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28457 }
28458 }
28459
28460 /* Deallocate target data. */
28461 static void
28462 core2i7_first_cycle_multipass_fini (void *_data)
28463 {
28464 ix86_first_cycle_multipass_data_t data
28465 = (ix86_first_cycle_multipass_data_t) _data;
28466
28467 if (data->ready_try_change)
28468 {
28469 sbitmap_free (data->ready_try_change);
28470 data->ready_try_change = NULL;
28471 data->ready_try_change_size = 0;
28472 }
28473 }
28474
28475 /* Prepare for scheduling pass. */
28476 static void
28477 ix86_sched_init_global (FILE *, int, int)
28478 {
28479 /* Install scheduling hooks for current CPU. Some of these hooks are used
28480 in time-critical parts of the scheduler, so we only set them up when
28481 they are actually used. */
28482 switch (ix86_tune)
28483 {
28484 case PROCESSOR_CORE2:
28485 case PROCESSOR_NEHALEM:
28486 case PROCESSOR_SANDYBRIDGE:
28487 case PROCESSOR_HASWELL:
28488 /* Do not perform multipass scheduling for pre-reload schedule
28489 to save compile time. */
28490 if (reload_completed)
28491 {
28492 targetm.sched.dfa_post_advance_cycle
28493 = core2i7_dfa_post_advance_cycle;
28494 targetm.sched.first_cycle_multipass_init
28495 = core2i7_first_cycle_multipass_init;
28496 targetm.sched.first_cycle_multipass_begin
28497 = core2i7_first_cycle_multipass_begin;
28498 targetm.sched.first_cycle_multipass_issue
28499 = core2i7_first_cycle_multipass_issue;
28500 targetm.sched.first_cycle_multipass_backtrack
28501 = core2i7_first_cycle_multipass_backtrack;
28502 targetm.sched.first_cycle_multipass_end
28503 = core2i7_first_cycle_multipass_end;
28504 targetm.sched.first_cycle_multipass_fini
28505 = core2i7_first_cycle_multipass_fini;
28506
28507 /* Set decoder parameters. */
28508 core2i7_secondary_decoder_max_insn_size = 8;
28509 core2i7_ifetch_block_size = 16;
28510 core2i7_ifetch_block_max_insns = 6;
28511 break;
28512 }
28513 /* ... Fall through ... */
28514 default:
28515 targetm.sched.dfa_post_advance_cycle = NULL;
28516 targetm.sched.first_cycle_multipass_init = NULL;
28517 targetm.sched.first_cycle_multipass_begin = NULL;
28518 targetm.sched.first_cycle_multipass_issue = NULL;
28519 targetm.sched.first_cycle_multipass_backtrack = NULL;
28520 targetm.sched.first_cycle_multipass_end = NULL;
28521 targetm.sched.first_cycle_multipass_fini = NULL;
28522 break;
28523 }
28524 }
28525
28526 \f
28527 /* Compute the alignment given to a constant that is being placed in memory.
28528 EXP is the constant and ALIGN is the alignment that the object would
28529 ordinarily have.
28530 The value of this function is used instead of that alignment to align
28531 the object. */
28532
28533 int
28534 ix86_constant_alignment (tree exp, int align)
28535 {
28536 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28537 || TREE_CODE (exp) == INTEGER_CST)
28538 {
28539 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
28540 return 64;
28541 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
28542 return 128;
28543 }
28544 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28545 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28546 return BITS_PER_WORD;
28547
28548 return align;
28549 }
28550
28551 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28552 the data type, and ALIGN is the alignment that the object would
28553 ordinarily have. */
28554
28555 static int
28556 iamcu_alignment (tree type, int align)
28557 {
28558 enum machine_mode mode;
28559
28560 if (align < 32 || TYPE_USER_ALIGN (type))
28561 return align;
28562
28563 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28564 bytes. */
28565 mode = TYPE_MODE (strip_array_types (type));
28566 switch (GET_MODE_CLASS (mode))
28567 {
28568 case MODE_INT:
28569 case MODE_COMPLEX_INT:
28570 case MODE_COMPLEX_FLOAT:
28571 case MODE_FLOAT:
28572 case MODE_DECIMAL_FLOAT:
28573 return 32;
28574 default:
28575 return align;
28576 }
28577 }
28578
28579 /* Compute the alignment for a static variable.
28580 TYPE is the data type, and ALIGN is the alignment that
28581 the object would ordinarily have. The value of this function is used
28582 instead of that alignment to align the object. */
28583
28584 int
28585 ix86_data_alignment (tree type, int align, bool opt)
28586 {
28587 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28588 for symbols from other compilation units or symbols that don't need
28589 to bind locally. In order to preserve some ABI compatibility with
28590 those compilers, ensure we don't decrease alignment from what we
28591 used to assume. */
28592
28593 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
28594
28595 /* A data structure, equal or greater than the size of a cache line
28596 (64 bytes in the Pentium 4 and other recent Intel processors, including
28597 processors based on Intel Core microarchitecture) should be aligned
28598 so that its base address is a multiple of a cache line size. */
28599
28600 int max_align
28601 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
28602
28603 if (max_align < BITS_PER_WORD)
28604 max_align = BITS_PER_WORD;
28605
28606 switch (ix86_align_data_type)
28607 {
28608 case ix86_align_data_type_abi: opt = false; break;
28609 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
28610 case ix86_align_data_type_cacheline: break;
28611 }
28612
28613 if (TARGET_IAMCU)
28614 align = iamcu_alignment (type, align);
28615
28616 if (opt
28617 && AGGREGATE_TYPE_P (type)
28618 && TYPE_SIZE (type)
28619 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
28620 {
28621 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
28622 && align < max_align_compat)
28623 align = max_align_compat;
28624 if (wi::geu_p (TYPE_SIZE (type), max_align)
28625 && align < max_align)
28626 align = max_align;
28627 }
28628
28629 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28630 to 16byte boundary. */
28631 if (TARGET_64BIT)
28632 {
28633 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
28634 && TYPE_SIZE (type)
28635 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28636 && wi::geu_p (TYPE_SIZE (type), 128)
28637 && align < 128)
28638 return 128;
28639 }
28640
28641 if (!opt)
28642 return align;
28643
28644 if (TREE_CODE (type) == ARRAY_TYPE)
28645 {
28646 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28647 return 64;
28648 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28649 return 128;
28650 }
28651 else if (TREE_CODE (type) == COMPLEX_TYPE)
28652 {
28653
28654 if (TYPE_MODE (type) == DCmode && align < 64)
28655 return 64;
28656 if ((TYPE_MODE (type) == XCmode
28657 || TYPE_MODE (type) == TCmode) && align < 128)
28658 return 128;
28659 }
28660 else if ((TREE_CODE (type) == RECORD_TYPE
28661 || TREE_CODE (type) == UNION_TYPE
28662 || TREE_CODE (type) == QUAL_UNION_TYPE)
28663 && TYPE_FIELDS (type))
28664 {
28665 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28666 return 64;
28667 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28668 return 128;
28669 }
28670 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28671 || TREE_CODE (type) == INTEGER_TYPE)
28672 {
28673 if (TYPE_MODE (type) == DFmode && align < 64)
28674 return 64;
28675 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28676 return 128;
28677 }
28678
28679 return align;
28680 }
28681
28682 /* Compute the alignment for a local variable or a stack slot. EXP is
28683 the data type or decl itself, MODE is the widest mode available and
28684 ALIGN is the alignment that the object would ordinarily have. The
28685 value of this macro is used instead of that alignment to align the
28686 object. */
28687
28688 unsigned int
28689 ix86_local_alignment (tree exp, machine_mode mode,
28690 unsigned int align)
28691 {
28692 tree type, decl;
28693
28694 if (exp && DECL_P (exp))
28695 {
28696 type = TREE_TYPE (exp);
28697 decl = exp;
28698 }
28699 else
28700 {
28701 type = exp;
28702 decl = NULL;
28703 }
28704
28705 /* Don't do dynamic stack realignment for long long objects with
28706 -mpreferred-stack-boundary=2. */
28707 if (!TARGET_64BIT
28708 && align == 64
28709 && ix86_preferred_stack_boundary < 64
28710 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
28711 && (!type || !TYPE_USER_ALIGN (type))
28712 && (!decl || !DECL_USER_ALIGN (decl)))
28713 align = 32;
28714
28715 /* If TYPE is NULL, we are allocating a stack slot for caller-save
28716 register in MODE. We will return the largest alignment of XF
28717 and DF. */
28718 if (!type)
28719 {
28720 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
28721 align = GET_MODE_ALIGNMENT (DFmode);
28722 return align;
28723 }
28724
28725 /* Don't increase alignment for Intel MCU psABI. */
28726 if (TARGET_IAMCU)
28727 return align;
28728
28729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28730 to 16byte boundary. Exact wording is:
28731
28732 An array uses the same alignment as its elements, except that a local or
28733 global array variable of length at least 16 bytes or
28734 a C99 variable-length array variable always has alignment of at least 16 bytes.
28735
28736 This was added to allow use of aligned SSE instructions at arrays. This
28737 rule is meant for static storage (where compiler can not do the analysis
28738 by itself). We follow it for automatic variables only when convenient.
28739 We fully control everything in the function compiled and functions from
28740 other unit can not rely on the alignment.
28741
28742 Exclude va_list type. It is the common case of local array where
28743 we can not benefit from the alignment.
28744
28745 TODO: Probably one should optimize for size only when var is not escaping. */
28746 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
28747 && TARGET_SSE)
28748 {
28749 if (AGGREGATE_TYPE_P (type)
28750 && (va_list_type_node == NULL_TREE
28751 || (TYPE_MAIN_VARIANT (type)
28752 != TYPE_MAIN_VARIANT (va_list_type_node)))
28753 && TYPE_SIZE (type)
28754 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28755 && wi::geu_p (TYPE_SIZE (type), 16)
28756 && align < 128)
28757 return 128;
28758 }
28759 if (TREE_CODE (type) == ARRAY_TYPE)
28760 {
28761 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28762 return 64;
28763 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28764 return 128;
28765 }
28766 else if (TREE_CODE (type) == COMPLEX_TYPE)
28767 {
28768 if (TYPE_MODE (type) == DCmode && align < 64)
28769 return 64;
28770 if ((TYPE_MODE (type) == XCmode
28771 || TYPE_MODE (type) == TCmode) && align < 128)
28772 return 128;
28773 }
28774 else if ((TREE_CODE (type) == RECORD_TYPE
28775 || TREE_CODE (type) == UNION_TYPE
28776 || TREE_CODE (type) == QUAL_UNION_TYPE)
28777 && TYPE_FIELDS (type))
28778 {
28779 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28780 return 64;
28781 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28782 return 128;
28783 }
28784 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28785 || TREE_CODE (type) == INTEGER_TYPE)
28786 {
28787
28788 if (TYPE_MODE (type) == DFmode && align < 64)
28789 return 64;
28790 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28791 return 128;
28792 }
28793 return align;
28794 }
28795
28796 /* Compute the minimum required alignment for dynamic stack realignment
28797 purposes for a local variable, parameter or a stack slot. EXP is
28798 the data type or decl itself, MODE is its mode and ALIGN is the
28799 alignment that the object would ordinarily have. */
28800
28801 unsigned int
28802 ix86_minimum_alignment (tree exp, machine_mode mode,
28803 unsigned int align)
28804 {
28805 tree type, decl;
28806
28807 if (exp && DECL_P (exp))
28808 {
28809 type = TREE_TYPE (exp);
28810 decl = exp;
28811 }
28812 else
28813 {
28814 type = exp;
28815 decl = NULL;
28816 }
28817
28818 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
28819 return align;
28820
28821 /* Don't do dynamic stack realignment for long long objects with
28822 -mpreferred-stack-boundary=2. */
28823 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
28824 && (!type || !TYPE_USER_ALIGN (type))
28825 && (!decl || !DECL_USER_ALIGN (decl)))
28826 return 32;
28827
28828 return align;
28829 }
28830 \f
28831 /* Find a location for the static chain incoming to a nested function.
28832 This is a register, unless all free registers are used by arguments. */
28833
28834 static rtx
28835 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
28836 {
28837 unsigned regno;
28838
28839 /* While this function won't be called by the middle-end when a static
28840 chain isn't needed, it's also used throughout the backend so it's
28841 easiest to keep this check centralized. */
28842 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
28843 return NULL;
28844
28845 if (TARGET_64BIT)
28846 {
28847 /* We always use R10 in 64-bit mode. */
28848 regno = R10_REG;
28849 }
28850 else
28851 {
28852 const_tree fntype, fndecl;
28853 unsigned int ccvt;
28854
28855 /* By default in 32-bit mode we use ECX to pass the static chain. */
28856 regno = CX_REG;
28857
28858 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
28859 {
28860 fntype = TREE_TYPE (fndecl_or_type);
28861 fndecl = fndecl_or_type;
28862 }
28863 else
28864 {
28865 fntype = fndecl_or_type;
28866 fndecl = NULL;
28867 }
28868
28869 ccvt = ix86_get_callcvt (fntype);
28870 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
28871 {
28872 /* Fastcall functions use ecx/edx for arguments, which leaves
28873 us with EAX for the static chain.
28874 Thiscall functions use ecx for arguments, which also
28875 leaves us with EAX for the static chain. */
28876 regno = AX_REG;
28877 }
28878 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
28879 {
28880 /* Thiscall functions use ecx for arguments, which leaves
28881 us with EAX and EDX for the static chain.
28882 We are using for abi-compatibility EAX. */
28883 regno = AX_REG;
28884 }
28885 else if (ix86_function_regparm (fntype, fndecl) == 3)
28886 {
28887 /* For regparm 3, we have no free call-clobbered registers in
28888 which to store the static chain. In order to implement this,
28889 we have the trampoline push the static chain to the stack.
28890 However, we can't push a value below the return address when
28891 we call the nested function directly, so we have to use an
28892 alternate entry point. For this we use ESI, and have the
28893 alternate entry point push ESI, so that things appear the
28894 same once we're executing the nested function. */
28895 if (incoming_p)
28896 {
28897 if (fndecl == current_function_decl)
28898 ix86_static_chain_on_stack = true;
28899 return gen_frame_mem (SImode,
28900 plus_constant (Pmode,
28901 arg_pointer_rtx, -8));
28902 }
28903 regno = SI_REG;
28904 }
28905 }
28906
28907 return gen_rtx_REG (Pmode, regno);
28908 }
28909
28910 /* Emit RTL insns to initialize the variable parts of a trampoline.
28911 FNDECL is the decl of the target address; M_TRAMP is a MEM for
28912 the trampoline, and CHAIN_VALUE is an RTX for the static chain
28913 to be passed to the target function. */
28914
28915 static void
28916 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
28917 {
28918 rtx mem, fnaddr;
28919 int opcode;
28920 int offset = 0;
28921
28922 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28923
28924 if (TARGET_64BIT)
28925 {
28926 int size;
28927
28928 /* Load the function address to r11. Try to load address using
28929 the shorter movl instead of movabs. We may want to support
28930 movq for kernel mode, but kernel does not use trampolines at
28931 the moment. FNADDR is a 32bit address and may not be in
28932 DImode when ptr_mode == SImode. Always use movl in this
28933 case. */
28934 if (ptr_mode == SImode
28935 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
28936 {
28937 fnaddr = copy_addr_to_reg (fnaddr);
28938
28939 mem = adjust_address (m_tramp, HImode, offset);
28940 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
28941
28942 mem = adjust_address (m_tramp, SImode, offset + 2);
28943 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
28944 offset += 6;
28945 }
28946 else
28947 {
28948 mem = adjust_address (m_tramp, HImode, offset);
28949 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
28950
28951 mem = adjust_address (m_tramp, DImode, offset + 2);
28952 emit_move_insn (mem, fnaddr);
28953 offset += 10;
28954 }
28955
28956 /* Load static chain using movabs to r10. Use the shorter movl
28957 instead of movabs when ptr_mode == SImode. */
28958 if (ptr_mode == SImode)
28959 {
28960 opcode = 0xba41;
28961 size = 6;
28962 }
28963 else
28964 {
28965 opcode = 0xba49;
28966 size = 10;
28967 }
28968
28969 mem = adjust_address (m_tramp, HImode, offset);
28970 emit_move_insn (mem, gen_int_mode (opcode, HImode));
28971
28972 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
28973 emit_move_insn (mem, chain_value);
28974 offset += size;
28975
28976 /* Jump to r11; the last (unused) byte is a nop, only there to
28977 pad the write out to a single 32-bit store. */
28978 mem = adjust_address (m_tramp, SImode, offset);
28979 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
28980 offset += 4;
28981 }
28982 else
28983 {
28984 rtx disp, chain;
28985
28986 /* Depending on the static chain location, either load a register
28987 with a constant, or push the constant to the stack. All of the
28988 instructions are the same size. */
28989 chain = ix86_static_chain (fndecl, true);
28990 if (REG_P (chain))
28991 {
28992 switch (REGNO (chain))
28993 {
28994 case AX_REG:
28995 opcode = 0xb8; break;
28996 case CX_REG:
28997 opcode = 0xb9; break;
28998 default:
28999 gcc_unreachable ();
29000 }
29001 }
29002 else
29003 opcode = 0x68;
29004
29005 mem = adjust_address (m_tramp, QImode, offset);
29006 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29007
29008 mem = adjust_address (m_tramp, SImode, offset + 1);
29009 emit_move_insn (mem, chain_value);
29010 offset += 5;
29011
29012 mem = adjust_address (m_tramp, QImode, offset);
29013 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29014
29015 mem = adjust_address (m_tramp, SImode, offset + 1);
29016
29017 /* Compute offset from the end of the jmp to the target function.
29018 In the case in which the trampoline stores the static chain on
29019 the stack, we need to skip the first insn which pushes the
29020 (call-saved) register static chain; this push is 1 byte. */
29021 offset += 5;
29022 disp = expand_binop (SImode, sub_optab, fnaddr,
29023 plus_constant (Pmode, XEXP (m_tramp, 0),
29024 offset - (MEM_P (chain) ? 1 : 0)),
29025 NULL_RTX, 1, OPTAB_DIRECT);
29026 emit_move_insn (mem, disp);
29027 }
29028
29029 gcc_assert (offset <= TRAMPOLINE_SIZE);
29030
29031 #ifdef HAVE_ENABLE_EXECUTE_STACK
29032 #ifdef CHECK_EXECUTE_STACK_ENABLED
29033 if (CHECK_EXECUTE_STACK_ENABLED)
29034 #endif
29035 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29036 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29037 #endif
29038 }
29039 \f
29040 /* The following file contains several enumerations and data structures
29041 built from the definitions in i386-builtin-types.def. */
29042
29043 #include "i386-builtin-types.inc"
29044
29045 /* Table for the ix86 builtin non-function types. */
29046 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29047
29048 /* Retrieve an element from the above table, building some of
29049 the types lazily. */
29050
29051 static tree
29052 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29053 {
29054 unsigned int index;
29055 tree type, itype;
29056
29057 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29058
29059 type = ix86_builtin_type_tab[(int) tcode];
29060 if (type != NULL)
29061 return type;
29062
29063 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29064 if (tcode <= IX86_BT_LAST_VECT)
29065 {
29066 machine_mode mode;
29067
29068 index = tcode - IX86_BT_LAST_PRIM - 1;
29069 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29070 mode = ix86_builtin_type_vect_mode[index];
29071
29072 type = build_vector_type_for_mode (itype, mode);
29073 }
29074 else
29075 {
29076 int quals;
29077
29078 index = tcode - IX86_BT_LAST_VECT - 1;
29079 if (tcode <= IX86_BT_LAST_PTR)
29080 quals = TYPE_UNQUALIFIED;
29081 else
29082 quals = TYPE_QUAL_CONST;
29083
29084 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29085 if (quals != TYPE_UNQUALIFIED)
29086 itype = build_qualified_type (itype, quals);
29087
29088 type = build_pointer_type (itype);
29089 }
29090
29091 ix86_builtin_type_tab[(int) tcode] = type;
29092 return type;
29093 }
29094
29095 /* Table for the ix86 builtin function types. */
29096 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29097
29098 /* Retrieve an element from the above table, building some of
29099 the types lazily. */
29100
29101 static tree
29102 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29103 {
29104 tree type;
29105
29106 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29107
29108 type = ix86_builtin_func_type_tab[(int) tcode];
29109 if (type != NULL)
29110 return type;
29111
29112 if (tcode <= IX86_BT_LAST_FUNC)
29113 {
29114 unsigned start = ix86_builtin_func_start[(int) tcode];
29115 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29116 tree rtype, atype, args = void_list_node;
29117 unsigned i;
29118
29119 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29120 for (i = after - 1; i > start; --i)
29121 {
29122 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29123 args = tree_cons (NULL, atype, args);
29124 }
29125
29126 type = build_function_type (rtype, args);
29127 }
29128 else
29129 {
29130 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29131 enum ix86_builtin_func_type icode;
29132
29133 icode = ix86_builtin_func_alias_base[index];
29134 type = ix86_get_builtin_func_type (icode);
29135 }
29136
29137 ix86_builtin_func_type_tab[(int) tcode] = type;
29138 return type;
29139 }
29140
29141
29142 /* Codes for all the SSE/MMX builtins. */
29143 enum ix86_builtins
29144 {
29145 IX86_BUILTIN_ADDPS,
29146 IX86_BUILTIN_ADDSS,
29147 IX86_BUILTIN_DIVPS,
29148 IX86_BUILTIN_DIVSS,
29149 IX86_BUILTIN_MULPS,
29150 IX86_BUILTIN_MULSS,
29151 IX86_BUILTIN_SUBPS,
29152 IX86_BUILTIN_SUBSS,
29153
29154 IX86_BUILTIN_CMPEQPS,
29155 IX86_BUILTIN_CMPLTPS,
29156 IX86_BUILTIN_CMPLEPS,
29157 IX86_BUILTIN_CMPGTPS,
29158 IX86_BUILTIN_CMPGEPS,
29159 IX86_BUILTIN_CMPNEQPS,
29160 IX86_BUILTIN_CMPNLTPS,
29161 IX86_BUILTIN_CMPNLEPS,
29162 IX86_BUILTIN_CMPNGTPS,
29163 IX86_BUILTIN_CMPNGEPS,
29164 IX86_BUILTIN_CMPORDPS,
29165 IX86_BUILTIN_CMPUNORDPS,
29166 IX86_BUILTIN_CMPEQSS,
29167 IX86_BUILTIN_CMPLTSS,
29168 IX86_BUILTIN_CMPLESS,
29169 IX86_BUILTIN_CMPNEQSS,
29170 IX86_BUILTIN_CMPNLTSS,
29171 IX86_BUILTIN_CMPNLESS,
29172 IX86_BUILTIN_CMPORDSS,
29173 IX86_BUILTIN_CMPUNORDSS,
29174
29175 IX86_BUILTIN_COMIEQSS,
29176 IX86_BUILTIN_COMILTSS,
29177 IX86_BUILTIN_COMILESS,
29178 IX86_BUILTIN_COMIGTSS,
29179 IX86_BUILTIN_COMIGESS,
29180 IX86_BUILTIN_COMINEQSS,
29181 IX86_BUILTIN_UCOMIEQSS,
29182 IX86_BUILTIN_UCOMILTSS,
29183 IX86_BUILTIN_UCOMILESS,
29184 IX86_BUILTIN_UCOMIGTSS,
29185 IX86_BUILTIN_UCOMIGESS,
29186 IX86_BUILTIN_UCOMINEQSS,
29187
29188 IX86_BUILTIN_CVTPI2PS,
29189 IX86_BUILTIN_CVTPS2PI,
29190 IX86_BUILTIN_CVTSI2SS,
29191 IX86_BUILTIN_CVTSI642SS,
29192 IX86_BUILTIN_CVTSS2SI,
29193 IX86_BUILTIN_CVTSS2SI64,
29194 IX86_BUILTIN_CVTTPS2PI,
29195 IX86_BUILTIN_CVTTSS2SI,
29196 IX86_BUILTIN_CVTTSS2SI64,
29197
29198 IX86_BUILTIN_MAXPS,
29199 IX86_BUILTIN_MAXSS,
29200 IX86_BUILTIN_MINPS,
29201 IX86_BUILTIN_MINSS,
29202
29203 IX86_BUILTIN_LOADUPS,
29204 IX86_BUILTIN_STOREUPS,
29205 IX86_BUILTIN_MOVSS,
29206
29207 IX86_BUILTIN_MOVHLPS,
29208 IX86_BUILTIN_MOVLHPS,
29209 IX86_BUILTIN_LOADHPS,
29210 IX86_BUILTIN_LOADLPS,
29211 IX86_BUILTIN_STOREHPS,
29212 IX86_BUILTIN_STORELPS,
29213
29214 IX86_BUILTIN_MASKMOVQ,
29215 IX86_BUILTIN_MOVMSKPS,
29216 IX86_BUILTIN_PMOVMSKB,
29217
29218 IX86_BUILTIN_MOVNTPS,
29219 IX86_BUILTIN_MOVNTQ,
29220
29221 IX86_BUILTIN_LOADDQU,
29222 IX86_BUILTIN_STOREDQU,
29223
29224 IX86_BUILTIN_PACKSSWB,
29225 IX86_BUILTIN_PACKSSDW,
29226 IX86_BUILTIN_PACKUSWB,
29227
29228 IX86_BUILTIN_PADDB,
29229 IX86_BUILTIN_PADDW,
29230 IX86_BUILTIN_PADDD,
29231 IX86_BUILTIN_PADDQ,
29232 IX86_BUILTIN_PADDSB,
29233 IX86_BUILTIN_PADDSW,
29234 IX86_BUILTIN_PADDUSB,
29235 IX86_BUILTIN_PADDUSW,
29236 IX86_BUILTIN_PSUBB,
29237 IX86_BUILTIN_PSUBW,
29238 IX86_BUILTIN_PSUBD,
29239 IX86_BUILTIN_PSUBQ,
29240 IX86_BUILTIN_PSUBSB,
29241 IX86_BUILTIN_PSUBSW,
29242 IX86_BUILTIN_PSUBUSB,
29243 IX86_BUILTIN_PSUBUSW,
29244
29245 IX86_BUILTIN_PAND,
29246 IX86_BUILTIN_PANDN,
29247 IX86_BUILTIN_POR,
29248 IX86_BUILTIN_PXOR,
29249
29250 IX86_BUILTIN_PAVGB,
29251 IX86_BUILTIN_PAVGW,
29252
29253 IX86_BUILTIN_PCMPEQB,
29254 IX86_BUILTIN_PCMPEQW,
29255 IX86_BUILTIN_PCMPEQD,
29256 IX86_BUILTIN_PCMPGTB,
29257 IX86_BUILTIN_PCMPGTW,
29258 IX86_BUILTIN_PCMPGTD,
29259
29260 IX86_BUILTIN_PMADDWD,
29261
29262 IX86_BUILTIN_PMAXSW,
29263 IX86_BUILTIN_PMAXUB,
29264 IX86_BUILTIN_PMINSW,
29265 IX86_BUILTIN_PMINUB,
29266
29267 IX86_BUILTIN_PMULHUW,
29268 IX86_BUILTIN_PMULHW,
29269 IX86_BUILTIN_PMULLW,
29270
29271 IX86_BUILTIN_PSADBW,
29272 IX86_BUILTIN_PSHUFW,
29273
29274 IX86_BUILTIN_PSLLW,
29275 IX86_BUILTIN_PSLLD,
29276 IX86_BUILTIN_PSLLQ,
29277 IX86_BUILTIN_PSRAW,
29278 IX86_BUILTIN_PSRAD,
29279 IX86_BUILTIN_PSRLW,
29280 IX86_BUILTIN_PSRLD,
29281 IX86_BUILTIN_PSRLQ,
29282 IX86_BUILTIN_PSLLWI,
29283 IX86_BUILTIN_PSLLDI,
29284 IX86_BUILTIN_PSLLQI,
29285 IX86_BUILTIN_PSRAWI,
29286 IX86_BUILTIN_PSRADI,
29287 IX86_BUILTIN_PSRLWI,
29288 IX86_BUILTIN_PSRLDI,
29289 IX86_BUILTIN_PSRLQI,
29290
29291 IX86_BUILTIN_PUNPCKHBW,
29292 IX86_BUILTIN_PUNPCKHWD,
29293 IX86_BUILTIN_PUNPCKHDQ,
29294 IX86_BUILTIN_PUNPCKLBW,
29295 IX86_BUILTIN_PUNPCKLWD,
29296 IX86_BUILTIN_PUNPCKLDQ,
29297
29298 IX86_BUILTIN_SHUFPS,
29299
29300 IX86_BUILTIN_RCPPS,
29301 IX86_BUILTIN_RCPSS,
29302 IX86_BUILTIN_RSQRTPS,
29303 IX86_BUILTIN_RSQRTPS_NR,
29304 IX86_BUILTIN_RSQRTSS,
29305 IX86_BUILTIN_RSQRTF,
29306 IX86_BUILTIN_SQRTPS,
29307 IX86_BUILTIN_SQRTPS_NR,
29308 IX86_BUILTIN_SQRTSS,
29309
29310 IX86_BUILTIN_UNPCKHPS,
29311 IX86_BUILTIN_UNPCKLPS,
29312
29313 IX86_BUILTIN_ANDPS,
29314 IX86_BUILTIN_ANDNPS,
29315 IX86_BUILTIN_ORPS,
29316 IX86_BUILTIN_XORPS,
29317
29318 IX86_BUILTIN_EMMS,
29319 IX86_BUILTIN_LDMXCSR,
29320 IX86_BUILTIN_STMXCSR,
29321 IX86_BUILTIN_SFENCE,
29322
29323 IX86_BUILTIN_FXSAVE,
29324 IX86_BUILTIN_FXRSTOR,
29325 IX86_BUILTIN_FXSAVE64,
29326 IX86_BUILTIN_FXRSTOR64,
29327
29328 IX86_BUILTIN_XSAVE,
29329 IX86_BUILTIN_XRSTOR,
29330 IX86_BUILTIN_XSAVE64,
29331 IX86_BUILTIN_XRSTOR64,
29332
29333 IX86_BUILTIN_XSAVEOPT,
29334 IX86_BUILTIN_XSAVEOPT64,
29335
29336 IX86_BUILTIN_XSAVEC,
29337 IX86_BUILTIN_XSAVEC64,
29338
29339 IX86_BUILTIN_XSAVES,
29340 IX86_BUILTIN_XRSTORS,
29341 IX86_BUILTIN_XSAVES64,
29342 IX86_BUILTIN_XRSTORS64,
29343
29344 /* 3DNow! Original */
29345 IX86_BUILTIN_FEMMS,
29346 IX86_BUILTIN_PAVGUSB,
29347 IX86_BUILTIN_PF2ID,
29348 IX86_BUILTIN_PFACC,
29349 IX86_BUILTIN_PFADD,
29350 IX86_BUILTIN_PFCMPEQ,
29351 IX86_BUILTIN_PFCMPGE,
29352 IX86_BUILTIN_PFCMPGT,
29353 IX86_BUILTIN_PFMAX,
29354 IX86_BUILTIN_PFMIN,
29355 IX86_BUILTIN_PFMUL,
29356 IX86_BUILTIN_PFRCP,
29357 IX86_BUILTIN_PFRCPIT1,
29358 IX86_BUILTIN_PFRCPIT2,
29359 IX86_BUILTIN_PFRSQIT1,
29360 IX86_BUILTIN_PFRSQRT,
29361 IX86_BUILTIN_PFSUB,
29362 IX86_BUILTIN_PFSUBR,
29363 IX86_BUILTIN_PI2FD,
29364 IX86_BUILTIN_PMULHRW,
29365
29366 /* 3DNow! Athlon Extensions */
29367 IX86_BUILTIN_PF2IW,
29368 IX86_BUILTIN_PFNACC,
29369 IX86_BUILTIN_PFPNACC,
29370 IX86_BUILTIN_PI2FW,
29371 IX86_BUILTIN_PSWAPDSI,
29372 IX86_BUILTIN_PSWAPDSF,
29373
29374 /* SSE2 */
29375 IX86_BUILTIN_ADDPD,
29376 IX86_BUILTIN_ADDSD,
29377 IX86_BUILTIN_DIVPD,
29378 IX86_BUILTIN_DIVSD,
29379 IX86_BUILTIN_MULPD,
29380 IX86_BUILTIN_MULSD,
29381 IX86_BUILTIN_SUBPD,
29382 IX86_BUILTIN_SUBSD,
29383
29384 IX86_BUILTIN_CMPEQPD,
29385 IX86_BUILTIN_CMPLTPD,
29386 IX86_BUILTIN_CMPLEPD,
29387 IX86_BUILTIN_CMPGTPD,
29388 IX86_BUILTIN_CMPGEPD,
29389 IX86_BUILTIN_CMPNEQPD,
29390 IX86_BUILTIN_CMPNLTPD,
29391 IX86_BUILTIN_CMPNLEPD,
29392 IX86_BUILTIN_CMPNGTPD,
29393 IX86_BUILTIN_CMPNGEPD,
29394 IX86_BUILTIN_CMPORDPD,
29395 IX86_BUILTIN_CMPUNORDPD,
29396 IX86_BUILTIN_CMPEQSD,
29397 IX86_BUILTIN_CMPLTSD,
29398 IX86_BUILTIN_CMPLESD,
29399 IX86_BUILTIN_CMPNEQSD,
29400 IX86_BUILTIN_CMPNLTSD,
29401 IX86_BUILTIN_CMPNLESD,
29402 IX86_BUILTIN_CMPORDSD,
29403 IX86_BUILTIN_CMPUNORDSD,
29404
29405 IX86_BUILTIN_COMIEQSD,
29406 IX86_BUILTIN_COMILTSD,
29407 IX86_BUILTIN_COMILESD,
29408 IX86_BUILTIN_COMIGTSD,
29409 IX86_BUILTIN_COMIGESD,
29410 IX86_BUILTIN_COMINEQSD,
29411 IX86_BUILTIN_UCOMIEQSD,
29412 IX86_BUILTIN_UCOMILTSD,
29413 IX86_BUILTIN_UCOMILESD,
29414 IX86_BUILTIN_UCOMIGTSD,
29415 IX86_BUILTIN_UCOMIGESD,
29416 IX86_BUILTIN_UCOMINEQSD,
29417
29418 IX86_BUILTIN_MAXPD,
29419 IX86_BUILTIN_MAXSD,
29420 IX86_BUILTIN_MINPD,
29421 IX86_BUILTIN_MINSD,
29422
29423 IX86_BUILTIN_ANDPD,
29424 IX86_BUILTIN_ANDNPD,
29425 IX86_BUILTIN_ORPD,
29426 IX86_BUILTIN_XORPD,
29427
29428 IX86_BUILTIN_SQRTPD,
29429 IX86_BUILTIN_SQRTSD,
29430
29431 IX86_BUILTIN_UNPCKHPD,
29432 IX86_BUILTIN_UNPCKLPD,
29433
29434 IX86_BUILTIN_SHUFPD,
29435
29436 IX86_BUILTIN_LOADUPD,
29437 IX86_BUILTIN_STOREUPD,
29438 IX86_BUILTIN_MOVSD,
29439
29440 IX86_BUILTIN_LOADHPD,
29441 IX86_BUILTIN_LOADLPD,
29442
29443 IX86_BUILTIN_CVTDQ2PD,
29444 IX86_BUILTIN_CVTDQ2PS,
29445
29446 IX86_BUILTIN_CVTPD2DQ,
29447 IX86_BUILTIN_CVTPD2PI,
29448 IX86_BUILTIN_CVTPD2PS,
29449 IX86_BUILTIN_CVTTPD2DQ,
29450 IX86_BUILTIN_CVTTPD2PI,
29451
29452 IX86_BUILTIN_CVTPI2PD,
29453 IX86_BUILTIN_CVTSI2SD,
29454 IX86_BUILTIN_CVTSI642SD,
29455
29456 IX86_BUILTIN_CVTSD2SI,
29457 IX86_BUILTIN_CVTSD2SI64,
29458 IX86_BUILTIN_CVTSD2SS,
29459 IX86_BUILTIN_CVTSS2SD,
29460 IX86_BUILTIN_CVTTSD2SI,
29461 IX86_BUILTIN_CVTTSD2SI64,
29462
29463 IX86_BUILTIN_CVTPS2DQ,
29464 IX86_BUILTIN_CVTPS2PD,
29465 IX86_BUILTIN_CVTTPS2DQ,
29466
29467 IX86_BUILTIN_MOVNTI,
29468 IX86_BUILTIN_MOVNTI64,
29469 IX86_BUILTIN_MOVNTPD,
29470 IX86_BUILTIN_MOVNTDQ,
29471
29472 IX86_BUILTIN_MOVQ128,
29473
29474 /* SSE2 MMX */
29475 IX86_BUILTIN_MASKMOVDQU,
29476 IX86_BUILTIN_MOVMSKPD,
29477 IX86_BUILTIN_PMOVMSKB128,
29478
29479 IX86_BUILTIN_PACKSSWB128,
29480 IX86_BUILTIN_PACKSSDW128,
29481 IX86_BUILTIN_PACKUSWB128,
29482
29483 IX86_BUILTIN_PADDB128,
29484 IX86_BUILTIN_PADDW128,
29485 IX86_BUILTIN_PADDD128,
29486 IX86_BUILTIN_PADDQ128,
29487 IX86_BUILTIN_PADDSB128,
29488 IX86_BUILTIN_PADDSW128,
29489 IX86_BUILTIN_PADDUSB128,
29490 IX86_BUILTIN_PADDUSW128,
29491 IX86_BUILTIN_PSUBB128,
29492 IX86_BUILTIN_PSUBW128,
29493 IX86_BUILTIN_PSUBD128,
29494 IX86_BUILTIN_PSUBQ128,
29495 IX86_BUILTIN_PSUBSB128,
29496 IX86_BUILTIN_PSUBSW128,
29497 IX86_BUILTIN_PSUBUSB128,
29498 IX86_BUILTIN_PSUBUSW128,
29499
29500 IX86_BUILTIN_PAND128,
29501 IX86_BUILTIN_PANDN128,
29502 IX86_BUILTIN_POR128,
29503 IX86_BUILTIN_PXOR128,
29504
29505 IX86_BUILTIN_PAVGB128,
29506 IX86_BUILTIN_PAVGW128,
29507
29508 IX86_BUILTIN_PCMPEQB128,
29509 IX86_BUILTIN_PCMPEQW128,
29510 IX86_BUILTIN_PCMPEQD128,
29511 IX86_BUILTIN_PCMPGTB128,
29512 IX86_BUILTIN_PCMPGTW128,
29513 IX86_BUILTIN_PCMPGTD128,
29514
29515 IX86_BUILTIN_PMADDWD128,
29516
29517 IX86_BUILTIN_PMAXSW128,
29518 IX86_BUILTIN_PMAXUB128,
29519 IX86_BUILTIN_PMINSW128,
29520 IX86_BUILTIN_PMINUB128,
29521
29522 IX86_BUILTIN_PMULUDQ,
29523 IX86_BUILTIN_PMULUDQ128,
29524 IX86_BUILTIN_PMULHUW128,
29525 IX86_BUILTIN_PMULHW128,
29526 IX86_BUILTIN_PMULLW128,
29527
29528 IX86_BUILTIN_PSADBW128,
29529 IX86_BUILTIN_PSHUFHW,
29530 IX86_BUILTIN_PSHUFLW,
29531 IX86_BUILTIN_PSHUFD,
29532
29533 IX86_BUILTIN_PSLLDQI128,
29534 IX86_BUILTIN_PSLLWI128,
29535 IX86_BUILTIN_PSLLDI128,
29536 IX86_BUILTIN_PSLLQI128,
29537 IX86_BUILTIN_PSRAWI128,
29538 IX86_BUILTIN_PSRADI128,
29539 IX86_BUILTIN_PSRLDQI128,
29540 IX86_BUILTIN_PSRLWI128,
29541 IX86_BUILTIN_PSRLDI128,
29542 IX86_BUILTIN_PSRLQI128,
29543
29544 IX86_BUILTIN_PSLLDQ128,
29545 IX86_BUILTIN_PSLLW128,
29546 IX86_BUILTIN_PSLLD128,
29547 IX86_BUILTIN_PSLLQ128,
29548 IX86_BUILTIN_PSRAW128,
29549 IX86_BUILTIN_PSRAD128,
29550 IX86_BUILTIN_PSRLW128,
29551 IX86_BUILTIN_PSRLD128,
29552 IX86_BUILTIN_PSRLQ128,
29553
29554 IX86_BUILTIN_PUNPCKHBW128,
29555 IX86_BUILTIN_PUNPCKHWD128,
29556 IX86_BUILTIN_PUNPCKHDQ128,
29557 IX86_BUILTIN_PUNPCKHQDQ128,
29558 IX86_BUILTIN_PUNPCKLBW128,
29559 IX86_BUILTIN_PUNPCKLWD128,
29560 IX86_BUILTIN_PUNPCKLDQ128,
29561 IX86_BUILTIN_PUNPCKLQDQ128,
29562
29563 IX86_BUILTIN_CLFLUSH,
29564 IX86_BUILTIN_MFENCE,
29565 IX86_BUILTIN_LFENCE,
29566 IX86_BUILTIN_PAUSE,
29567
29568 IX86_BUILTIN_FNSTENV,
29569 IX86_BUILTIN_FLDENV,
29570 IX86_BUILTIN_FNSTSW,
29571 IX86_BUILTIN_FNCLEX,
29572
29573 IX86_BUILTIN_BSRSI,
29574 IX86_BUILTIN_BSRDI,
29575 IX86_BUILTIN_RDPMC,
29576 IX86_BUILTIN_RDTSC,
29577 IX86_BUILTIN_RDTSCP,
29578 IX86_BUILTIN_ROLQI,
29579 IX86_BUILTIN_ROLHI,
29580 IX86_BUILTIN_RORQI,
29581 IX86_BUILTIN_RORHI,
29582
29583 /* SSE3. */
29584 IX86_BUILTIN_ADDSUBPS,
29585 IX86_BUILTIN_HADDPS,
29586 IX86_BUILTIN_HSUBPS,
29587 IX86_BUILTIN_MOVSHDUP,
29588 IX86_BUILTIN_MOVSLDUP,
29589 IX86_BUILTIN_ADDSUBPD,
29590 IX86_BUILTIN_HADDPD,
29591 IX86_BUILTIN_HSUBPD,
29592 IX86_BUILTIN_LDDQU,
29593
29594 IX86_BUILTIN_MONITOR,
29595 IX86_BUILTIN_MWAIT,
29596
29597 /* SSSE3. */
29598 IX86_BUILTIN_PHADDW,
29599 IX86_BUILTIN_PHADDD,
29600 IX86_BUILTIN_PHADDSW,
29601 IX86_BUILTIN_PHSUBW,
29602 IX86_BUILTIN_PHSUBD,
29603 IX86_BUILTIN_PHSUBSW,
29604 IX86_BUILTIN_PMADDUBSW,
29605 IX86_BUILTIN_PMULHRSW,
29606 IX86_BUILTIN_PSHUFB,
29607 IX86_BUILTIN_PSIGNB,
29608 IX86_BUILTIN_PSIGNW,
29609 IX86_BUILTIN_PSIGND,
29610 IX86_BUILTIN_PALIGNR,
29611 IX86_BUILTIN_PABSB,
29612 IX86_BUILTIN_PABSW,
29613 IX86_BUILTIN_PABSD,
29614
29615 IX86_BUILTIN_PHADDW128,
29616 IX86_BUILTIN_PHADDD128,
29617 IX86_BUILTIN_PHADDSW128,
29618 IX86_BUILTIN_PHSUBW128,
29619 IX86_BUILTIN_PHSUBD128,
29620 IX86_BUILTIN_PHSUBSW128,
29621 IX86_BUILTIN_PMADDUBSW128,
29622 IX86_BUILTIN_PMULHRSW128,
29623 IX86_BUILTIN_PSHUFB128,
29624 IX86_BUILTIN_PSIGNB128,
29625 IX86_BUILTIN_PSIGNW128,
29626 IX86_BUILTIN_PSIGND128,
29627 IX86_BUILTIN_PALIGNR128,
29628 IX86_BUILTIN_PABSB128,
29629 IX86_BUILTIN_PABSW128,
29630 IX86_BUILTIN_PABSD128,
29631
29632 /* AMDFAM10 - SSE4A New Instructions. */
29633 IX86_BUILTIN_MOVNTSD,
29634 IX86_BUILTIN_MOVNTSS,
29635 IX86_BUILTIN_EXTRQI,
29636 IX86_BUILTIN_EXTRQ,
29637 IX86_BUILTIN_INSERTQI,
29638 IX86_BUILTIN_INSERTQ,
29639
29640 /* SSE4.1. */
29641 IX86_BUILTIN_BLENDPD,
29642 IX86_BUILTIN_BLENDPS,
29643 IX86_BUILTIN_BLENDVPD,
29644 IX86_BUILTIN_BLENDVPS,
29645 IX86_BUILTIN_PBLENDVB128,
29646 IX86_BUILTIN_PBLENDW128,
29647
29648 IX86_BUILTIN_DPPD,
29649 IX86_BUILTIN_DPPS,
29650
29651 IX86_BUILTIN_INSERTPS128,
29652
29653 IX86_BUILTIN_MOVNTDQA,
29654 IX86_BUILTIN_MPSADBW128,
29655 IX86_BUILTIN_PACKUSDW128,
29656 IX86_BUILTIN_PCMPEQQ,
29657 IX86_BUILTIN_PHMINPOSUW128,
29658
29659 IX86_BUILTIN_PMAXSB128,
29660 IX86_BUILTIN_PMAXSD128,
29661 IX86_BUILTIN_PMAXUD128,
29662 IX86_BUILTIN_PMAXUW128,
29663
29664 IX86_BUILTIN_PMINSB128,
29665 IX86_BUILTIN_PMINSD128,
29666 IX86_BUILTIN_PMINUD128,
29667 IX86_BUILTIN_PMINUW128,
29668
29669 IX86_BUILTIN_PMOVSXBW128,
29670 IX86_BUILTIN_PMOVSXBD128,
29671 IX86_BUILTIN_PMOVSXBQ128,
29672 IX86_BUILTIN_PMOVSXWD128,
29673 IX86_BUILTIN_PMOVSXWQ128,
29674 IX86_BUILTIN_PMOVSXDQ128,
29675
29676 IX86_BUILTIN_PMOVZXBW128,
29677 IX86_BUILTIN_PMOVZXBD128,
29678 IX86_BUILTIN_PMOVZXBQ128,
29679 IX86_BUILTIN_PMOVZXWD128,
29680 IX86_BUILTIN_PMOVZXWQ128,
29681 IX86_BUILTIN_PMOVZXDQ128,
29682
29683 IX86_BUILTIN_PMULDQ128,
29684 IX86_BUILTIN_PMULLD128,
29685
29686 IX86_BUILTIN_ROUNDSD,
29687 IX86_BUILTIN_ROUNDSS,
29688
29689 IX86_BUILTIN_ROUNDPD,
29690 IX86_BUILTIN_ROUNDPS,
29691
29692 IX86_BUILTIN_FLOORPD,
29693 IX86_BUILTIN_CEILPD,
29694 IX86_BUILTIN_TRUNCPD,
29695 IX86_BUILTIN_RINTPD,
29696 IX86_BUILTIN_ROUNDPD_AZ,
29697
29698 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
29699 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
29700 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
29701
29702 IX86_BUILTIN_FLOORPS,
29703 IX86_BUILTIN_CEILPS,
29704 IX86_BUILTIN_TRUNCPS,
29705 IX86_BUILTIN_RINTPS,
29706 IX86_BUILTIN_ROUNDPS_AZ,
29707
29708 IX86_BUILTIN_FLOORPS_SFIX,
29709 IX86_BUILTIN_CEILPS_SFIX,
29710 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
29711
29712 IX86_BUILTIN_PTESTZ,
29713 IX86_BUILTIN_PTESTC,
29714 IX86_BUILTIN_PTESTNZC,
29715
29716 IX86_BUILTIN_VEC_INIT_V2SI,
29717 IX86_BUILTIN_VEC_INIT_V4HI,
29718 IX86_BUILTIN_VEC_INIT_V8QI,
29719 IX86_BUILTIN_VEC_EXT_V2DF,
29720 IX86_BUILTIN_VEC_EXT_V2DI,
29721 IX86_BUILTIN_VEC_EXT_V4SF,
29722 IX86_BUILTIN_VEC_EXT_V4SI,
29723 IX86_BUILTIN_VEC_EXT_V8HI,
29724 IX86_BUILTIN_VEC_EXT_V2SI,
29725 IX86_BUILTIN_VEC_EXT_V4HI,
29726 IX86_BUILTIN_VEC_EXT_V16QI,
29727 IX86_BUILTIN_VEC_SET_V2DI,
29728 IX86_BUILTIN_VEC_SET_V4SF,
29729 IX86_BUILTIN_VEC_SET_V4SI,
29730 IX86_BUILTIN_VEC_SET_V8HI,
29731 IX86_BUILTIN_VEC_SET_V4HI,
29732 IX86_BUILTIN_VEC_SET_V16QI,
29733
29734 IX86_BUILTIN_VEC_PACK_SFIX,
29735 IX86_BUILTIN_VEC_PACK_SFIX256,
29736
29737 /* SSE4.2. */
29738 IX86_BUILTIN_CRC32QI,
29739 IX86_BUILTIN_CRC32HI,
29740 IX86_BUILTIN_CRC32SI,
29741 IX86_BUILTIN_CRC32DI,
29742
29743 IX86_BUILTIN_PCMPESTRI128,
29744 IX86_BUILTIN_PCMPESTRM128,
29745 IX86_BUILTIN_PCMPESTRA128,
29746 IX86_BUILTIN_PCMPESTRC128,
29747 IX86_BUILTIN_PCMPESTRO128,
29748 IX86_BUILTIN_PCMPESTRS128,
29749 IX86_BUILTIN_PCMPESTRZ128,
29750 IX86_BUILTIN_PCMPISTRI128,
29751 IX86_BUILTIN_PCMPISTRM128,
29752 IX86_BUILTIN_PCMPISTRA128,
29753 IX86_BUILTIN_PCMPISTRC128,
29754 IX86_BUILTIN_PCMPISTRO128,
29755 IX86_BUILTIN_PCMPISTRS128,
29756 IX86_BUILTIN_PCMPISTRZ128,
29757
29758 IX86_BUILTIN_PCMPGTQ,
29759
29760 /* AES instructions */
29761 IX86_BUILTIN_AESENC128,
29762 IX86_BUILTIN_AESENCLAST128,
29763 IX86_BUILTIN_AESDEC128,
29764 IX86_BUILTIN_AESDECLAST128,
29765 IX86_BUILTIN_AESIMC128,
29766 IX86_BUILTIN_AESKEYGENASSIST128,
29767
29768 /* PCLMUL instruction */
29769 IX86_BUILTIN_PCLMULQDQ128,
29770
29771 /* AVX */
29772 IX86_BUILTIN_ADDPD256,
29773 IX86_BUILTIN_ADDPS256,
29774 IX86_BUILTIN_ADDSUBPD256,
29775 IX86_BUILTIN_ADDSUBPS256,
29776 IX86_BUILTIN_ANDPD256,
29777 IX86_BUILTIN_ANDPS256,
29778 IX86_BUILTIN_ANDNPD256,
29779 IX86_BUILTIN_ANDNPS256,
29780 IX86_BUILTIN_BLENDPD256,
29781 IX86_BUILTIN_BLENDPS256,
29782 IX86_BUILTIN_BLENDVPD256,
29783 IX86_BUILTIN_BLENDVPS256,
29784 IX86_BUILTIN_DIVPD256,
29785 IX86_BUILTIN_DIVPS256,
29786 IX86_BUILTIN_DPPS256,
29787 IX86_BUILTIN_HADDPD256,
29788 IX86_BUILTIN_HADDPS256,
29789 IX86_BUILTIN_HSUBPD256,
29790 IX86_BUILTIN_HSUBPS256,
29791 IX86_BUILTIN_MAXPD256,
29792 IX86_BUILTIN_MAXPS256,
29793 IX86_BUILTIN_MINPD256,
29794 IX86_BUILTIN_MINPS256,
29795 IX86_BUILTIN_MULPD256,
29796 IX86_BUILTIN_MULPS256,
29797 IX86_BUILTIN_ORPD256,
29798 IX86_BUILTIN_ORPS256,
29799 IX86_BUILTIN_SHUFPD256,
29800 IX86_BUILTIN_SHUFPS256,
29801 IX86_BUILTIN_SUBPD256,
29802 IX86_BUILTIN_SUBPS256,
29803 IX86_BUILTIN_XORPD256,
29804 IX86_BUILTIN_XORPS256,
29805 IX86_BUILTIN_CMPSD,
29806 IX86_BUILTIN_CMPSS,
29807 IX86_BUILTIN_CMPPD,
29808 IX86_BUILTIN_CMPPS,
29809 IX86_BUILTIN_CMPPD256,
29810 IX86_BUILTIN_CMPPS256,
29811 IX86_BUILTIN_CVTDQ2PD256,
29812 IX86_BUILTIN_CVTDQ2PS256,
29813 IX86_BUILTIN_CVTPD2PS256,
29814 IX86_BUILTIN_CVTPS2DQ256,
29815 IX86_BUILTIN_CVTPS2PD256,
29816 IX86_BUILTIN_CVTTPD2DQ256,
29817 IX86_BUILTIN_CVTPD2DQ256,
29818 IX86_BUILTIN_CVTTPS2DQ256,
29819 IX86_BUILTIN_EXTRACTF128PD256,
29820 IX86_BUILTIN_EXTRACTF128PS256,
29821 IX86_BUILTIN_EXTRACTF128SI256,
29822 IX86_BUILTIN_VZEROALL,
29823 IX86_BUILTIN_VZEROUPPER,
29824 IX86_BUILTIN_VPERMILVARPD,
29825 IX86_BUILTIN_VPERMILVARPS,
29826 IX86_BUILTIN_VPERMILVARPD256,
29827 IX86_BUILTIN_VPERMILVARPS256,
29828 IX86_BUILTIN_VPERMILPD,
29829 IX86_BUILTIN_VPERMILPS,
29830 IX86_BUILTIN_VPERMILPD256,
29831 IX86_BUILTIN_VPERMILPS256,
29832 IX86_BUILTIN_VPERMIL2PD,
29833 IX86_BUILTIN_VPERMIL2PS,
29834 IX86_BUILTIN_VPERMIL2PD256,
29835 IX86_BUILTIN_VPERMIL2PS256,
29836 IX86_BUILTIN_VPERM2F128PD256,
29837 IX86_BUILTIN_VPERM2F128PS256,
29838 IX86_BUILTIN_VPERM2F128SI256,
29839 IX86_BUILTIN_VBROADCASTSS,
29840 IX86_BUILTIN_VBROADCASTSD256,
29841 IX86_BUILTIN_VBROADCASTSS256,
29842 IX86_BUILTIN_VBROADCASTPD256,
29843 IX86_BUILTIN_VBROADCASTPS256,
29844 IX86_BUILTIN_VINSERTF128PD256,
29845 IX86_BUILTIN_VINSERTF128PS256,
29846 IX86_BUILTIN_VINSERTF128SI256,
29847 IX86_BUILTIN_LOADUPD256,
29848 IX86_BUILTIN_LOADUPS256,
29849 IX86_BUILTIN_STOREUPD256,
29850 IX86_BUILTIN_STOREUPS256,
29851 IX86_BUILTIN_LDDQU256,
29852 IX86_BUILTIN_MOVNTDQ256,
29853 IX86_BUILTIN_MOVNTPD256,
29854 IX86_BUILTIN_MOVNTPS256,
29855 IX86_BUILTIN_LOADDQU256,
29856 IX86_BUILTIN_STOREDQU256,
29857 IX86_BUILTIN_MASKLOADPD,
29858 IX86_BUILTIN_MASKLOADPS,
29859 IX86_BUILTIN_MASKSTOREPD,
29860 IX86_BUILTIN_MASKSTOREPS,
29861 IX86_BUILTIN_MASKLOADPD256,
29862 IX86_BUILTIN_MASKLOADPS256,
29863 IX86_BUILTIN_MASKSTOREPD256,
29864 IX86_BUILTIN_MASKSTOREPS256,
29865 IX86_BUILTIN_MOVSHDUP256,
29866 IX86_BUILTIN_MOVSLDUP256,
29867 IX86_BUILTIN_MOVDDUP256,
29868
29869 IX86_BUILTIN_SQRTPD256,
29870 IX86_BUILTIN_SQRTPS256,
29871 IX86_BUILTIN_SQRTPS_NR256,
29872 IX86_BUILTIN_RSQRTPS256,
29873 IX86_BUILTIN_RSQRTPS_NR256,
29874
29875 IX86_BUILTIN_RCPPS256,
29876
29877 IX86_BUILTIN_ROUNDPD256,
29878 IX86_BUILTIN_ROUNDPS256,
29879
29880 IX86_BUILTIN_FLOORPD256,
29881 IX86_BUILTIN_CEILPD256,
29882 IX86_BUILTIN_TRUNCPD256,
29883 IX86_BUILTIN_RINTPD256,
29884 IX86_BUILTIN_ROUNDPD_AZ256,
29885
29886 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
29887 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
29888 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
29889
29890 IX86_BUILTIN_FLOORPS256,
29891 IX86_BUILTIN_CEILPS256,
29892 IX86_BUILTIN_TRUNCPS256,
29893 IX86_BUILTIN_RINTPS256,
29894 IX86_BUILTIN_ROUNDPS_AZ256,
29895
29896 IX86_BUILTIN_FLOORPS_SFIX256,
29897 IX86_BUILTIN_CEILPS_SFIX256,
29898 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
29899
29900 IX86_BUILTIN_UNPCKHPD256,
29901 IX86_BUILTIN_UNPCKLPD256,
29902 IX86_BUILTIN_UNPCKHPS256,
29903 IX86_BUILTIN_UNPCKLPS256,
29904
29905 IX86_BUILTIN_SI256_SI,
29906 IX86_BUILTIN_PS256_PS,
29907 IX86_BUILTIN_PD256_PD,
29908 IX86_BUILTIN_SI_SI256,
29909 IX86_BUILTIN_PS_PS256,
29910 IX86_BUILTIN_PD_PD256,
29911
29912 IX86_BUILTIN_VTESTZPD,
29913 IX86_BUILTIN_VTESTCPD,
29914 IX86_BUILTIN_VTESTNZCPD,
29915 IX86_BUILTIN_VTESTZPS,
29916 IX86_BUILTIN_VTESTCPS,
29917 IX86_BUILTIN_VTESTNZCPS,
29918 IX86_BUILTIN_VTESTZPD256,
29919 IX86_BUILTIN_VTESTCPD256,
29920 IX86_BUILTIN_VTESTNZCPD256,
29921 IX86_BUILTIN_VTESTZPS256,
29922 IX86_BUILTIN_VTESTCPS256,
29923 IX86_BUILTIN_VTESTNZCPS256,
29924 IX86_BUILTIN_PTESTZ256,
29925 IX86_BUILTIN_PTESTC256,
29926 IX86_BUILTIN_PTESTNZC256,
29927
29928 IX86_BUILTIN_MOVMSKPD256,
29929 IX86_BUILTIN_MOVMSKPS256,
29930
29931 /* AVX2 */
29932 IX86_BUILTIN_MPSADBW256,
29933 IX86_BUILTIN_PABSB256,
29934 IX86_BUILTIN_PABSW256,
29935 IX86_BUILTIN_PABSD256,
29936 IX86_BUILTIN_PACKSSDW256,
29937 IX86_BUILTIN_PACKSSWB256,
29938 IX86_BUILTIN_PACKUSDW256,
29939 IX86_BUILTIN_PACKUSWB256,
29940 IX86_BUILTIN_PADDB256,
29941 IX86_BUILTIN_PADDW256,
29942 IX86_BUILTIN_PADDD256,
29943 IX86_BUILTIN_PADDQ256,
29944 IX86_BUILTIN_PADDSB256,
29945 IX86_BUILTIN_PADDSW256,
29946 IX86_BUILTIN_PADDUSB256,
29947 IX86_BUILTIN_PADDUSW256,
29948 IX86_BUILTIN_PALIGNR256,
29949 IX86_BUILTIN_AND256I,
29950 IX86_BUILTIN_ANDNOT256I,
29951 IX86_BUILTIN_PAVGB256,
29952 IX86_BUILTIN_PAVGW256,
29953 IX86_BUILTIN_PBLENDVB256,
29954 IX86_BUILTIN_PBLENDVW256,
29955 IX86_BUILTIN_PCMPEQB256,
29956 IX86_BUILTIN_PCMPEQW256,
29957 IX86_BUILTIN_PCMPEQD256,
29958 IX86_BUILTIN_PCMPEQQ256,
29959 IX86_BUILTIN_PCMPGTB256,
29960 IX86_BUILTIN_PCMPGTW256,
29961 IX86_BUILTIN_PCMPGTD256,
29962 IX86_BUILTIN_PCMPGTQ256,
29963 IX86_BUILTIN_PHADDW256,
29964 IX86_BUILTIN_PHADDD256,
29965 IX86_BUILTIN_PHADDSW256,
29966 IX86_BUILTIN_PHSUBW256,
29967 IX86_BUILTIN_PHSUBD256,
29968 IX86_BUILTIN_PHSUBSW256,
29969 IX86_BUILTIN_PMADDUBSW256,
29970 IX86_BUILTIN_PMADDWD256,
29971 IX86_BUILTIN_PMAXSB256,
29972 IX86_BUILTIN_PMAXSW256,
29973 IX86_BUILTIN_PMAXSD256,
29974 IX86_BUILTIN_PMAXUB256,
29975 IX86_BUILTIN_PMAXUW256,
29976 IX86_BUILTIN_PMAXUD256,
29977 IX86_BUILTIN_PMINSB256,
29978 IX86_BUILTIN_PMINSW256,
29979 IX86_BUILTIN_PMINSD256,
29980 IX86_BUILTIN_PMINUB256,
29981 IX86_BUILTIN_PMINUW256,
29982 IX86_BUILTIN_PMINUD256,
29983 IX86_BUILTIN_PMOVMSKB256,
29984 IX86_BUILTIN_PMOVSXBW256,
29985 IX86_BUILTIN_PMOVSXBD256,
29986 IX86_BUILTIN_PMOVSXBQ256,
29987 IX86_BUILTIN_PMOVSXWD256,
29988 IX86_BUILTIN_PMOVSXWQ256,
29989 IX86_BUILTIN_PMOVSXDQ256,
29990 IX86_BUILTIN_PMOVZXBW256,
29991 IX86_BUILTIN_PMOVZXBD256,
29992 IX86_BUILTIN_PMOVZXBQ256,
29993 IX86_BUILTIN_PMOVZXWD256,
29994 IX86_BUILTIN_PMOVZXWQ256,
29995 IX86_BUILTIN_PMOVZXDQ256,
29996 IX86_BUILTIN_PMULDQ256,
29997 IX86_BUILTIN_PMULHRSW256,
29998 IX86_BUILTIN_PMULHUW256,
29999 IX86_BUILTIN_PMULHW256,
30000 IX86_BUILTIN_PMULLW256,
30001 IX86_BUILTIN_PMULLD256,
30002 IX86_BUILTIN_PMULUDQ256,
30003 IX86_BUILTIN_POR256,
30004 IX86_BUILTIN_PSADBW256,
30005 IX86_BUILTIN_PSHUFB256,
30006 IX86_BUILTIN_PSHUFD256,
30007 IX86_BUILTIN_PSHUFHW256,
30008 IX86_BUILTIN_PSHUFLW256,
30009 IX86_BUILTIN_PSIGNB256,
30010 IX86_BUILTIN_PSIGNW256,
30011 IX86_BUILTIN_PSIGND256,
30012 IX86_BUILTIN_PSLLDQI256,
30013 IX86_BUILTIN_PSLLWI256,
30014 IX86_BUILTIN_PSLLW256,
30015 IX86_BUILTIN_PSLLDI256,
30016 IX86_BUILTIN_PSLLD256,
30017 IX86_BUILTIN_PSLLQI256,
30018 IX86_BUILTIN_PSLLQ256,
30019 IX86_BUILTIN_PSRAWI256,
30020 IX86_BUILTIN_PSRAW256,
30021 IX86_BUILTIN_PSRADI256,
30022 IX86_BUILTIN_PSRAD256,
30023 IX86_BUILTIN_PSRLDQI256,
30024 IX86_BUILTIN_PSRLWI256,
30025 IX86_BUILTIN_PSRLW256,
30026 IX86_BUILTIN_PSRLDI256,
30027 IX86_BUILTIN_PSRLD256,
30028 IX86_BUILTIN_PSRLQI256,
30029 IX86_BUILTIN_PSRLQ256,
30030 IX86_BUILTIN_PSUBB256,
30031 IX86_BUILTIN_PSUBW256,
30032 IX86_BUILTIN_PSUBD256,
30033 IX86_BUILTIN_PSUBQ256,
30034 IX86_BUILTIN_PSUBSB256,
30035 IX86_BUILTIN_PSUBSW256,
30036 IX86_BUILTIN_PSUBUSB256,
30037 IX86_BUILTIN_PSUBUSW256,
30038 IX86_BUILTIN_PUNPCKHBW256,
30039 IX86_BUILTIN_PUNPCKHWD256,
30040 IX86_BUILTIN_PUNPCKHDQ256,
30041 IX86_BUILTIN_PUNPCKHQDQ256,
30042 IX86_BUILTIN_PUNPCKLBW256,
30043 IX86_BUILTIN_PUNPCKLWD256,
30044 IX86_BUILTIN_PUNPCKLDQ256,
30045 IX86_BUILTIN_PUNPCKLQDQ256,
30046 IX86_BUILTIN_PXOR256,
30047 IX86_BUILTIN_MOVNTDQA256,
30048 IX86_BUILTIN_VBROADCASTSS_PS,
30049 IX86_BUILTIN_VBROADCASTSS_PS256,
30050 IX86_BUILTIN_VBROADCASTSD_PD256,
30051 IX86_BUILTIN_VBROADCASTSI256,
30052 IX86_BUILTIN_PBLENDD256,
30053 IX86_BUILTIN_PBLENDD128,
30054 IX86_BUILTIN_PBROADCASTB256,
30055 IX86_BUILTIN_PBROADCASTW256,
30056 IX86_BUILTIN_PBROADCASTD256,
30057 IX86_BUILTIN_PBROADCASTQ256,
30058 IX86_BUILTIN_PBROADCASTB128,
30059 IX86_BUILTIN_PBROADCASTW128,
30060 IX86_BUILTIN_PBROADCASTD128,
30061 IX86_BUILTIN_PBROADCASTQ128,
30062 IX86_BUILTIN_VPERMVARSI256,
30063 IX86_BUILTIN_VPERMDF256,
30064 IX86_BUILTIN_VPERMVARSF256,
30065 IX86_BUILTIN_VPERMDI256,
30066 IX86_BUILTIN_VPERMTI256,
30067 IX86_BUILTIN_VEXTRACT128I256,
30068 IX86_BUILTIN_VINSERT128I256,
30069 IX86_BUILTIN_MASKLOADD,
30070 IX86_BUILTIN_MASKLOADQ,
30071 IX86_BUILTIN_MASKLOADD256,
30072 IX86_BUILTIN_MASKLOADQ256,
30073 IX86_BUILTIN_MASKSTORED,
30074 IX86_BUILTIN_MASKSTOREQ,
30075 IX86_BUILTIN_MASKSTORED256,
30076 IX86_BUILTIN_MASKSTOREQ256,
30077 IX86_BUILTIN_PSLLVV4DI,
30078 IX86_BUILTIN_PSLLVV2DI,
30079 IX86_BUILTIN_PSLLVV8SI,
30080 IX86_BUILTIN_PSLLVV4SI,
30081 IX86_BUILTIN_PSRAVV8SI,
30082 IX86_BUILTIN_PSRAVV4SI,
30083 IX86_BUILTIN_PSRLVV4DI,
30084 IX86_BUILTIN_PSRLVV2DI,
30085 IX86_BUILTIN_PSRLVV8SI,
30086 IX86_BUILTIN_PSRLVV4SI,
30087
30088 IX86_BUILTIN_GATHERSIV2DF,
30089 IX86_BUILTIN_GATHERSIV4DF,
30090 IX86_BUILTIN_GATHERDIV2DF,
30091 IX86_BUILTIN_GATHERDIV4DF,
30092 IX86_BUILTIN_GATHERSIV4SF,
30093 IX86_BUILTIN_GATHERSIV8SF,
30094 IX86_BUILTIN_GATHERDIV4SF,
30095 IX86_BUILTIN_GATHERDIV8SF,
30096 IX86_BUILTIN_GATHERSIV2DI,
30097 IX86_BUILTIN_GATHERSIV4DI,
30098 IX86_BUILTIN_GATHERDIV2DI,
30099 IX86_BUILTIN_GATHERDIV4DI,
30100 IX86_BUILTIN_GATHERSIV4SI,
30101 IX86_BUILTIN_GATHERSIV8SI,
30102 IX86_BUILTIN_GATHERDIV4SI,
30103 IX86_BUILTIN_GATHERDIV8SI,
30104
30105 /* AVX512F */
30106 IX86_BUILTIN_SI512_SI256,
30107 IX86_BUILTIN_PD512_PD256,
30108 IX86_BUILTIN_PS512_PS256,
30109 IX86_BUILTIN_SI512_SI,
30110 IX86_BUILTIN_PD512_PD,
30111 IX86_BUILTIN_PS512_PS,
30112 IX86_BUILTIN_ADDPD512,
30113 IX86_BUILTIN_ADDPS512,
30114 IX86_BUILTIN_ADDSD_ROUND,
30115 IX86_BUILTIN_ADDSS_ROUND,
30116 IX86_BUILTIN_ALIGND512,
30117 IX86_BUILTIN_ALIGNQ512,
30118 IX86_BUILTIN_BLENDMD512,
30119 IX86_BUILTIN_BLENDMPD512,
30120 IX86_BUILTIN_BLENDMPS512,
30121 IX86_BUILTIN_BLENDMQ512,
30122 IX86_BUILTIN_BROADCASTF32X4_512,
30123 IX86_BUILTIN_BROADCASTF64X4_512,
30124 IX86_BUILTIN_BROADCASTI32X4_512,
30125 IX86_BUILTIN_BROADCASTI64X4_512,
30126 IX86_BUILTIN_BROADCASTSD512,
30127 IX86_BUILTIN_BROADCASTSS512,
30128 IX86_BUILTIN_CMPD512,
30129 IX86_BUILTIN_CMPPD512,
30130 IX86_BUILTIN_CMPPS512,
30131 IX86_BUILTIN_CMPQ512,
30132 IX86_BUILTIN_CMPSD_MASK,
30133 IX86_BUILTIN_CMPSS_MASK,
30134 IX86_BUILTIN_COMIDF,
30135 IX86_BUILTIN_COMISF,
30136 IX86_BUILTIN_COMPRESSPD512,
30137 IX86_BUILTIN_COMPRESSPDSTORE512,
30138 IX86_BUILTIN_COMPRESSPS512,
30139 IX86_BUILTIN_COMPRESSPSSTORE512,
30140 IX86_BUILTIN_CVTDQ2PD512,
30141 IX86_BUILTIN_CVTDQ2PS512,
30142 IX86_BUILTIN_CVTPD2DQ512,
30143 IX86_BUILTIN_CVTPD2PS512,
30144 IX86_BUILTIN_CVTPD2UDQ512,
30145 IX86_BUILTIN_CVTPH2PS512,
30146 IX86_BUILTIN_CVTPS2DQ512,
30147 IX86_BUILTIN_CVTPS2PD512,
30148 IX86_BUILTIN_CVTPS2PH512,
30149 IX86_BUILTIN_CVTPS2UDQ512,
30150 IX86_BUILTIN_CVTSD2SS_ROUND,
30151 IX86_BUILTIN_CVTSI2SD64,
30152 IX86_BUILTIN_CVTSI2SS32,
30153 IX86_BUILTIN_CVTSI2SS64,
30154 IX86_BUILTIN_CVTSS2SD_ROUND,
30155 IX86_BUILTIN_CVTTPD2DQ512,
30156 IX86_BUILTIN_CVTTPD2UDQ512,
30157 IX86_BUILTIN_CVTTPS2DQ512,
30158 IX86_BUILTIN_CVTTPS2UDQ512,
30159 IX86_BUILTIN_CVTUDQ2PD512,
30160 IX86_BUILTIN_CVTUDQ2PS512,
30161 IX86_BUILTIN_CVTUSI2SD32,
30162 IX86_BUILTIN_CVTUSI2SD64,
30163 IX86_BUILTIN_CVTUSI2SS32,
30164 IX86_BUILTIN_CVTUSI2SS64,
30165 IX86_BUILTIN_DIVPD512,
30166 IX86_BUILTIN_DIVPS512,
30167 IX86_BUILTIN_DIVSD_ROUND,
30168 IX86_BUILTIN_DIVSS_ROUND,
30169 IX86_BUILTIN_EXPANDPD512,
30170 IX86_BUILTIN_EXPANDPD512Z,
30171 IX86_BUILTIN_EXPANDPDLOAD512,
30172 IX86_BUILTIN_EXPANDPDLOAD512Z,
30173 IX86_BUILTIN_EXPANDPS512,
30174 IX86_BUILTIN_EXPANDPS512Z,
30175 IX86_BUILTIN_EXPANDPSLOAD512,
30176 IX86_BUILTIN_EXPANDPSLOAD512Z,
30177 IX86_BUILTIN_EXTRACTF32X4,
30178 IX86_BUILTIN_EXTRACTF64X4,
30179 IX86_BUILTIN_EXTRACTI32X4,
30180 IX86_BUILTIN_EXTRACTI64X4,
30181 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30182 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30183 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30184 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30185 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30186 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30187 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30188 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30189 IX86_BUILTIN_GETEXPPD512,
30190 IX86_BUILTIN_GETEXPPS512,
30191 IX86_BUILTIN_GETEXPSD128,
30192 IX86_BUILTIN_GETEXPSS128,
30193 IX86_BUILTIN_GETMANTPD512,
30194 IX86_BUILTIN_GETMANTPS512,
30195 IX86_BUILTIN_GETMANTSD128,
30196 IX86_BUILTIN_GETMANTSS128,
30197 IX86_BUILTIN_INSERTF32X4,
30198 IX86_BUILTIN_INSERTF64X4,
30199 IX86_BUILTIN_INSERTI32X4,
30200 IX86_BUILTIN_INSERTI64X4,
30201 IX86_BUILTIN_LOADAPD512,
30202 IX86_BUILTIN_LOADAPS512,
30203 IX86_BUILTIN_LOADDQUDI512,
30204 IX86_BUILTIN_LOADDQUSI512,
30205 IX86_BUILTIN_LOADUPD512,
30206 IX86_BUILTIN_LOADUPS512,
30207 IX86_BUILTIN_MAXPD512,
30208 IX86_BUILTIN_MAXPS512,
30209 IX86_BUILTIN_MAXSD_ROUND,
30210 IX86_BUILTIN_MAXSS_ROUND,
30211 IX86_BUILTIN_MINPD512,
30212 IX86_BUILTIN_MINPS512,
30213 IX86_BUILTIN_MINSD_ROUND,
30214 IX86_BUILTIN_MINSS_ROUND,
30215 IX86_BUILTIN_MOVAPD512,
30216 IX86_BUILTIN_MOVAPS512,
30217 IX86_BUILTIN_MOVDDUP512,
30218 IX86_BUILTIN_MOVDQA32LOAD512,
30219 IX86_BUILTIN_MOVDQA32STORE512,
30220 IX86_BUILTIN_MOVDQA32_512,
30221 IX86_BUILTIN_MOVDQA64LOAD512,
30222 IX86_BUILTIN_MOVDQA64STORE512,
30223 IX86_BUILTIN_MOVDQA64_512,
30224 IX86_BUILTIN_MOVNTDQ512,
30225 IX86_BUILTIN_MOVNTDQA512,
30226 IX86_BUILTIN_MOVNTPD512,
30227 IX86_BUILTIN_MOVNTPS512,
30228 IX86_BUILTIN_MOVSHDUP512,
30229 IX86_BUILTIN_MOVSLDUP512,
30230 IX86_BUILTIN_MULPD512,
30231 IX86_BUILTIN_MULPS512,
30232 IX86_BUILTIN_MULSD_ROUND,
30233 IX86_BUILTIN_MULSS_ROUND,
30234 IX86_BUILTIN_PABSD512,
30235 IX86_BUILTIN_PABSQ512,
30236 IX86_BUILTIN_PADDD512,
30237 IX86_BUILTIN_PADDQ512,
30238 IX86_BUILTIN_PANDD512,
30239 IX86_BUILTIN_PANDND512,
30240 IX86_BUILTIN_PANDNQ512,
30241 IX86_BUILTIN_PANDQ512,
30242 IX86_BUILTIN_PBROADCASTD512,
30243 IX86_BUILTIN_PBROADCASTD512_GPR,
30244 IX86_BUILTIN_PBROADCASTMB512,
30245 IX86_BUILTIN_PBROADCASTMW512,
30246 IX86_BUILTIN_PBROADCASTQ512,
30247 IX86_BUILTIN_PBROADCASTQ512_GPR,
30248 IX86_BUILTIN_PCMPEQD512_MASK,
30249 IX86_BUILTIN_PCMPEQQ512_MASK,
30250 IX86_BUILTIN_PCMPGTD512_MASK,
30251 IX86_BUILTIN_PCMPGTQ512_MASK,
30252 IX86_BUILTIN_PCOMPRESSD512,
30253 IX86_BUILTIN_PCOMPRESSDSTORE512,
30254 IX86_BUILTIN_PCOMPRESSQ512,
30255 IX86_BUILTIN_PCOMPRESSQSTORE512,
30256 IX86_BUILTIN_PEXPANDD512,
30257 IX86_BUILTIN_PEXPANDD512Z,
30258 IX86_BUILTIN_PEXPANDDLOAD512,
30259 IX86_BUILTIN_PEXPANDDLOAD512Z,
30260 IX86_BUILTIN_PEXPANDQ512,
30261 IX86_BUILTIN_PEXPANDQ512Z,
30262 IX86_BUILTIN_PEXPANDQLOAD512,
30263 IX86_BUILTIN_PEXPANDQLOAD512Z,
30264 IX86_BUILTIN_PMAXSD512,
30265 IX86_BUILTIN_PMAXSQ512,
30266 IX86_BUILTIN_PMAXUD512,
30267 IX86_BUILTIN_PMAXUQ512,
30268 IX86_BUILTIN_PMINSD512,
30269 IX86_BUILTIN_PMINSQ512,
30270 IX86_BUILTIN_PMINUD512,
30271 IX86_BUILTIN_PMINUQ512,
30272 IX86_BUILTIN_PMOVDB512,
30273 IX86_BUILTIN_PMOVDB512_MEM,
30274 IX86_BUILTIN_PMOVDW512,
30275 IX86_BUILTIN_PMOVDW512_MEM,
30276 IX86_BUILTIN_PMOVQB512,
30277 IX86_BUILTIN_PMOVQB512_MEM,
30278 IX86_BUILTIN_PMOVQD512,
30279 IX86_BUILTIN_PMOVQD512_MEM,
30280 IX86_BUILTIN_PMOVQW512,
30281 IX86_BUILTIN_PMOVQW512_MEM,
30282 IX86_BUILTIN_PMOVSDB512,
30283 IX86_BUILTIN_PMOVSDB512_MEM,
30284 IX86_BUILTIN_PMOVSDW512,
30285 IX86_BUILTIN_PMOVSDW512_MEM,
30286 IX86_BUILTIN_PMOVSQB512,
30287 IX86_BUILTIN_PMOVSQB512_MEM,
30288 IX86_BUILTIN_PMOVSQD512,
30289 IX86_BUILTIN_PMOVSQD512_MEM,
30290 IX86_BUILTIN_PMOVSQW512,
30291 IX86_BUILTIN_PMOVSQW512_MEM,
30292 IX86_BUILTIN_PMOVSXBD512,
30293 IX86_BUILTIN_PMOVSXBQ512,
30294 IX86_BUILTIN_PMOVSXDQ512,
30295 IX86_BUILTIN_PMOVSXWD512,
30296 IX86_BUILTIN_PMOVSXWQ512,
30297 IX86_BUILTIN_PMOVUSDB512,
30298 IX86_BUILTIN_PMOVUSDB512_MEM,
30299 IX86_BUILTIN_PMOVUSDW512,
30300 IX86_BUILTIN_PMOVUSDW512_MEM,
30301 IX86_BUILTIN_PMOVUSQB512,
30302 IX86_BUILTIN_PMOVUSQB512_MEM,
30303 IX86_BUILTIN_PMOVUSQD512,
30304 IX86_BUILTIN_PMOVUSQD512_MEM,
30305 IX86_BUILTIN_PMOVUSQW512,
30306 IX86_BUILTIN_PMOVUSQW512_MEM,
30307 IX86_BUILTIN_PMOVZXBD512,
30308 IX86_BUILTIN_PMOVZXBQ512,
30309 IX86_BUILTIN_PMOVZXDQ512,
30310 IX86_BUILTIN_PMOVZXWD512,
30311 IX86_BUILTIN_PMOVZXWQ512,
30312 IX86_BUILTIN_PMULDQ512,
30313 IX86_BUILTIN_PMULLD512,
30314 IX86_BUILTIN_PMULUDQ512,
30315 IX86_BUILTIN_PORD512,
30316 IX86_BUILTIN_PORQ512,
30317 IX86_BUILTIN_PROLD512,
30318 IX86_BUILTIN_PROLQ512,
30319 IX86_BUILTIN_PROLVD512,
30320 IX86_BUILTIN_PROLVQ512,
30321 IX86_BUILTIN_PRORD512,
30322 IX86_BUILTIN_PRORQ512,
30323 IX86_BUILTIN_PRORVD512,
30324 IX86_BUILTIN_PRORVQ512,
30325 IX86_BUILTIN_PSHUFD512,
30326 IX86_BUILTIN_PSLLD512,
30327 IX86_BUILTIN_PSLLDI512,
30328 IX86_BUILTIN_PSLLQ512,
30329 IX86_BUILTIN_PSLLQI512,
30330 IX86_BUILTIN_PSLLVV16SI,
30331 IX86_BUILTIN_PSLLVV8DI,
30332 IX86_BUILTIN_PSRAD512,
30333 IX86_BUILTIN_PSRADI512,
30334 IX86_BUILTIN_PSRAQ512,
30335 IX86_BUILTIN_PSRAQI512,
30336 IX86_BUILTIN_PSRAVV16SI,
30337 IX86_BUILTIN_PSRAVV8DI,
30338 IX86_BUILTIN_PSRLD512,
30339 IX86_BUILTIN_PSRLDI512,
30340 IX86_BUILTIN_PSRLQ512,
30341 IX86_BUILTIN_PSRLQI512,
30342 IX86_BUILTIN_PSRLVV16SI,
30343 IX86_BUILTIN_PSRLVV8DI,
30344 IX86_BUILTIN_PSUBD512,
30345 IX86_BUILTIN_PSUBQ512,
30346 IX86_BUILTIN_PTESTMD512,
30347 IX86_BUILTIN_PTESTMQ512,
30348 IX86_BUILTIN_PTESTNMD512,
30349 IX86_BUILTIN_PTESTNMQ512,
30350 IX86_BUILTIN_PUNPCKHDQ512,
30351 IX86_BUILTIN_PUNPCKHQDQ512,
30352 IX86_BUILTIN_PUNPCKLDQ512,
30353 IX86_BUILTIN_PUNPCKLQDQ512,
30354 IX86_BUILTIN_PXORD512,
30355 IX86_BUILTIN_PXORQ512,
30356 IX86_BUILTIN_RCP14PD512,
30357 IX86_BUILTIN_RCP14PS512,
30358 IX86_BUILTIN_RCP14SD,
30359 IX86_BUILTIN_RCP14SS,
30360 IX86_BUILTIN_RNDSCALEPD,
30361 IX86_BUILTIN_RNDSCALEPS,
30362 IX86_BUILTIN_RNDSCALESD,
30363 IX86_BUILTIN_RNDSCALESS,
30364 IX86_BUILTIN_RSQRT14PD512,
30365 IX86_BUILTIN_RSQRT14PS512,
30366 IX86_BUILTIN_RSQRT14SD,
30367 IX86_BUILTIN_RSQRT14SS,
30368 IX86_BUILTIN_SCALEFPD512,
30369 IX86_BUILTIN_SCALEFPS512,
30370 IX86_BUILTIN_SCALEFSD,
30371 IX86_BUILTIN_SCALEFSS,
30372 IX86_BUILTIN_SHUFPD512,
30373 IX86_BUILTIN_SHUFPS512,
30374 IX86_BUILTIN_SHUF_F32x4,
30375 IX86_BUILTIN_SHUF_F64x2,
30376 IX86_BUILTIN_SHUF_I32x4,
30377 IX86_BUILTIN_SHUF_I64x2,
30378 IX86_BUILTIN_SQRTPD512,
30379 IX86_BUILTIN_SQRTPD512_MASK,
30380 IX86_BUILTIN_SQRTPS512_MASK,
30381 IX86_BUILTIN_SQRTPS_NR512,
30382 IX86_BUILTIN_SQRTSD_ROUND,
30383 IX86_BUILTIN_SQRTSS_ROUND,
30384 IX86_BUILTIN_STOREAPD512,
30385 IX86_BUILTIN_STOREAPS512,
30386 IX86_BUILTIN_STOREDQUDI512,
30387 IX86_BUILTIN_STOREDQUSI512,
30388 IX86_BUILTIN_STOREUPD512,
30389 IX86_BUILTIN_STOREUPS512,
30390 IX86_BUILTIN_SUBPD512,
30391 IX86_BUILTIN_SUBPS512,
30392 IX86_BUILTIN_SUBSD_ROUND,
30393 IX86_BUILTIN_SUBSS_ROUND,
30394 IX86_BUILTIN_UCMPD512,
30395 IX86_BUILTIN_UCMPQ512,
30396 IX86_BUILTIN_UNPCKHPD512,
30397 IX86_BUILTIN_UNPCKHPS512,
30398 IX86_BUILTIN_UNPCKLPD512,
30399 IX86_BUILTIN_UNPCKLPS512,
30400 IX86_BUILTIN_VCVTSD2SI32,
30401 IX86_BUILTIN_VCVTSD2SI64,
30402 IX86_BUILTIN_VCVTSD2USI32,
30403 IX86_BUILTIN_VCVTSD2USI64,
30404 IX86_BUILTIN_VCVTSS2SI32,
30405 IX86_BUILTIN_VCVTSS2SI64,
30406 IX86_BUILTIN_VCVTSS2USI32,
30407 IX86_BUILTIN_VCVTSS2USI64,
30408 IX86_BUILTIN_VCVTTSD2SI32,
30409 IX86_BUILTIN_VCVTTSD2SI64,
30410 IX86_BUILTIN_VCVTTSD2USI32,
30411 IX86_BUILTIN_VCVTTSD2USI64,
30412 IX86_BUILTIN_VCVTTSS2SI32,
30413 IX86_BUILTIN_VCVTTSS2SI64,
30414 IX86_BUILTIN_VCVTTSS2USI32,
30415 IX86_BUILTIN_VCVTTSS2USI64,
30416 IX86_BUILTIN_VFMADDPD512_MASK,
30417 IX86_BUILTIN_VFMADDPD512_MASK3,
30418 IX86_BUILTIN_VFMADDPD512_MASKZ,
30419 IX86_BUILTIN_VFMADDPS512_MASK,
30420 IX86_BUILTIN_VFMADDPS512_MASK3,
30421 IX86_BUILTIN_VFMADDPS512_MASKZ,
30422 IX86_BUILTIN_VFMADDSD3_ROUND,
30423 IX86_BUILTIN_VFMADDSS3_ROUND,
30424 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30425 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30426 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30427 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30428 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30429 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30430 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30431 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30432 IX86_BUILTIN_VFMSUBPD512_MASK3,
30433 IX86_BUILTIN_VFMSUBPS512_MASK3,
30434 IX86_BUILTIN_VFMSUBSD3_MASK3,
30435 IX86_BUILTIN_VFMSUBSS3_MASK3,
30436 IX86_BUILTIN_VFNMADDPD512_MASK,
30437 IX86_BUILTIN_VFNMADDPS512_MASK,
30438 IX86_BUILTIN_VFNMSUBPD512_MASK,
30439 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30440 IX86_BUILTIN_VFNMSUBPS512_MASK,
30441 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30442 IX86_BUILTIN_VPCLZCNTD512,
30443 IX86_BUILTIN_VPCLZCNTQ512,
30444 IX86_BUILTIN_VPCONFLICTD512,
30445 IX86_BUILTIN_VPCONFLICTQ512,
30446 IX86_BUILTIN_VPERMDF512,
30447 IX86_BUILTIN_VPERMDI512,
30448 IX86_BUILTIN_VPERMI2VARD512,
30449 IX86_BUILTIN_VPERMI2VARPD512,
30450 IX86_BUILTIN_VPERMI2VARPS512,
30451 IX86_BUILTIN_VPERMI2VARQ512,
30452 IX86_BUILTIN_VPERMILPD512,
30453 IX86_BUILTIN_VPERMILPS512,
30454 IX86_BUILTIN_VPERMILVARPD512,
30455 IX86_BUILTIN_VPERMILVARPS512,
30456 IX86_BUILTIN_VPERMT2VARD512,
30457 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30458 IX86_BUILTIN_VPERMT2VARPD512,
30459 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30460 IX86_BUILTIN_VPERMT2VARPS512,
30461 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30462 IX86_BUILTIN_VPERMT2VARQ512,
30463 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30464 IX86_BUILTIN_VPERMVARDF512,
30465 IX86_BUILTIN_VPERMVARDI512,
30466 IX86_BUILTIN_VPERMVARSF512,
30467 IX86_BUILTIN_VPERMVARSI512,
30468 IX86_BUILTIN_VTERNLOGD512_MASK,
30469 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30470 IX86_BUILTIN_VTERNLOGQ512_MASK,
30471 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30472
30473 /* Mask arithmetic operations */
30474 IX86_BUILTIN_KAND16,
30475 IX86_BUILTIN_KANDN16,
30476 IX86_BUILTIN_KNOT16,
30477 IX86_BUILTIN_KOR16,
30478 IX86_BUILTIN_KORTESTC16,
30479 IX86_BUILTIN_KORTESTZ16,
30480 IX86_BUILTIN_KUNPCKBW,
30481 IX86_BUILTIN_KXNOR16,
30482 IX86_BUILTIN_KXOR16,
30483 IX86_BUILTIN_KMOV16,
30484
30485 /* AVX512VL. */
30486 IX86_BUILTIN_PMOVUSQD256_MEM,
30487 IX86_BUILTIN_PMOVUSQD128_MEM,
30488 IX86_BUILTIN_PMOVSQD256_MEM,
30489 IX86_BUILTIN_PMOVSQD128_MEM,
30490 IX86_BUILTIN_PMOVQD256_MEM,
30491 IX86_BUILTIN_PMOVQD128_MEM,
30492 IX86_BUILTIN_PMOVUSQW256_MEM,
30493 IX86_BUILTIN_PMOVUSQW128_MEM,
30494 IX86_BUILTIN_PMOVSQW256_MEM,
30495 IX86_BUILTIN_PMOVSQW128_MEM,
30496 IX86_BUILTIN_PMOVQW256_MEM,
30497 IX86_BUILTIN_PMOVQW128_MEM,
30498 IX86_BUILTIN_PMOVUSQB256_MEM,
30499 IX86_BUILTIN_PMOVUSQB128_MEM,
30500 IX86_BUILTIN_PMOVSQB256_MEM,
30501 IX86_BUILTIN_PMOVSQB128_MEM,
30502 IX86_BUILTIN_PMOVQB256_MEM,
30503 IX86_BUILTIN_PMOVQB128_MEM,
30504 IX86_BUILTIN_PMOVUSDW256_MEM,
30505 IX86_BUILTIN_PMOVUSDW128_MEM,
30506 IX86_BUILTIN_PMOVSDW256_MEM,
30507 IX86_BUILTIN_PMOVSDW128_MEM,
30508 IX86_BUILTIN_PMOVDW256_MEM,
30509 IX86_BUILTIN_PMOVDW128_MEM,
30510 IX86_BUILTIN_PMOVUSDB256_MEM,
30511 IX86_BUILTIN_PMOVUSDB128_MEM,
30512 IX86_BUILTIN_PMOVSDB256_MEM,
30513 IX86_BUILTIN_PMOVSDB128_MEM,
30514 IX86_BUILTIN_PMOVDB256_MEM,
30515 IX86_BUILTIN_PMOVDB128_MEM,
30516 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
30517 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
30518 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
30519 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
30520 IX86_BUILTIN_MOVDQA64STORE256_MASK,
30521 IX86_BUILTIN_MOVDQA64STORE128_MASK,
30522 IX86_BUILTIN_MOVDQA32STORE256_MASK,
30523 IX86_BUILTIN_MOVDQA32STORE128_MASK,
30524 IX86_BUILTIN_LOADAPD256_MASK,
30525 IX86_BUILTIN_LOADAPD128_MASK,
30526 IX86_BUILTIN_LOADAPS256_MASK,
30527 IX86_BUILTIN_LOADAPS128_MASK,
30528 IX86_BUILTIN_STOREAPD256_MASK,
30529 IX86_BUILTIN_STOREAPD128_MASK,
30530 IX86_BUILTIN_STOREAPS256_MASK,
30531 IX86_BUILTIN_STOREAPS128_MASK,
30532 IX86_BUILTIN_LOADUPD256_MASK,
30533 IX86_BUILTIN_LOADUPD128_MASK,
30534 IX86_BUILTIN_LOADUPS256_MASK,
30535 IX86_BUILTIN_LOADUPS128_MASK,
30536 IX86_BUILTIN_STOREUPD256_MASK,
30537 IX86_BUILTIN_STOREUPD128_MASK,
30538 IX86_BUILTIN_STOREUPS256_MASK,
30539 IX86_BUILTIN_STOREUPS128_MASK,
30540 IX86_BUILTIN_LOADDQUDI256_MASK,
30541 IX86_BUILTIN_LOADDQUDI128_MASK,
30542 IX86_BUILTIN_LOADDQUSI256_MASK,
30543 IX86_BUILTIN_LOADDQUSI128_MASK,
30544 IX86_BUILTIN_LOADDQUHI256_MASK,
30545 IX86_BUILTIN_LOADDQUHI128_MASK,
30546 IX86_BUILTIN_LOADDQUQI256_MASK,
30547 IX86_BUILTIN_LOADDQUQI128_MASK,
30548 IX86_BUILTIN_STOREDQUDI256_MASK,
30549 IX86_BUILTIN_STOREDQUDI128_MASK,
30550 IX86_BUILTIN_STOREDQUSI256_MASK,
30551 IX86_BUILTIN_STOREDQUSI128_MASK,
30552 IX86_BUILTIN_STOREDQUHI256_MASK,
30553 IX86_BUILTIN_STOREDQUHI128_MASK,
30554 IX86_BUILTIN_STOREDQUQI256_MASK,
30555 IX86_BUILTIN_STOREDQUQI128_MASK,
30556 IX86_BUILTIN_COMPRESSPDSTORE256,
30557 IX86_BUILTIN_COMPRESSPDSTORE128,
30558 IX86_BUILTIN_COMPRESSPSSTORE256,
30559 IX86_BUILTIN_COMPRESSPSSTORE128,
30560 IX86_BUILTIN_PCOMPRESSQSTORE256,
30561 IX86_BUILTIN_PCOMPRESSQSTORE128,
30562 IX86_BUILTIN_PCOMPRESSDSTORE256,
30563 IX86_BUILTIN_PCOMPRESSDSTORE128,
30564 IX86_BUILTIN_EXPANDPDLOAD256,
30565 IX86_BUILTIN_EXPANDPDLOAD128,
30566 IX86_BUILTIN_EXPANDPSLOAD256,
30567 IX86_BUILTIN_EXPANDPSLOAD128,
30568 IX86_BUILTIN_PEXPANDQLOAD256,
30569 IX86_BUILTIN_PEXPANDQLOAD128,
30570 IX86_BUILTIN_PEXPANDDLOAD256,
30571 IX86_BUILTIN_PEXPANDDLOAD128,
30572 IX86_BUILTIN_EXPANDPDLOAD256Z,
30573 IX86_BUILTIN_EXPANDPDLOAD128Z,
30574 IX86_BUILTIN_EXPANDPSLOAD256Z,
30575 IX86_BUILTIN_EXPANDPSLOAD128Z,
30576 IX86_BUILTIN_PEXPANDQLOAD256Z,
30577 IX86_BUILTIN_PEXPANDQLOAD128Z,
30578 IX86_BUILTIN_PEXPANDDLOAD256Z,
30579 IX86_BUILTIN_PEXPANDDLOAD128Z,
30580 IX86_BUILTIN_PALIGNR256_MASK,
30581 IX86_BUILTIN_PALIGNR128_MASK,
30582 IX86_BUILTIN_MOVDQA64_256_MASK,
30583 IX86_BUILTIN_MOVDQA64_128_MASK,
30584 IX86_BUILTIN_MOVDQA32_256_MASK,
30585 IX86_BUILTIN_MOVDQA32_128_MASK,
30586 IX86_BUILTIN_MOVAPD256_MASK,
30587 IX86_BUILTIN_MOVAPD128_MASK,
30588 IX86_BUILTIN_MOVAPS256_MASK,
30589 IX86_BUILTIN_MOVAPS128_MASK,
30590 IX86_BUILTIN_MOVDQUHI256_MASK,
30591 IX86_BUILTIN_MOVDQUHI128_MASK,
30592 IX86_BUILTIN_MOVDQUQI256_MASK,
30593 IX86_BUILTIN_MOVDQUQI128_MASK,
30594 IX86_BUILTIN_MINPS128_MASK,
30595 IX86_BUILTIN_MAXPS128_MASK,
30596 IX86_BUILTIN_MINPD128_MASK,
30597 IX86_BUILTIN_MAXPD128_MASK,
30598 IX86_BUILTIN_MAXPD256_MASK,
30599 IX86_BUILTIN_MAXPS256_MASK,
30600 IX86_BUILTIN_MINPD256_MASK,
30601 IX86_BUILTIN_MINPS256_MASK,
30602 IX86_BUILTIN_MULPS128_MASK,
30603 IX86_BUILTIN_DIVPS128_MASK,
30604 IX86_BUILTIN_MULPD128_MASK,
30605 IX86_BUILTIN_DIVPD128_MASK,
30606 IX86_BUILTIN_DIVPD256_MASK,
30607 IX86_BUILTIN_DIVPS256_MASK,
30608 IX86_BUILTIN_MULPD256_MASK,
30609 IX86_BUILTIN_MULPS256_MASK,
30610 IX86_BUILTIN_ADDPD128_MASK,
30611 IX86_BUILTIN_ADDPD256_MASK,
30612 IX86_BUILTIN_ADDPS128_MASK,
30613 IX86_BUILTIN_ADDPS256_MASK,
30614 IX86_BUILTIN_SUBPD128_MASK,
30615 IX86_BUILTIN_SUBPD256_MASK,
30616 IX86_BUILTIN_SUBPS128_MASK,
30617 IX86_BUILTIN_SUBPS256_MASK,
30618 IX86_BUILTIN_XORPD256_MASK,
30619 IX86_BUILTIN_XORPD128_MASK,
30620 IX86_BUILTIN_XORPS256_MASK,
30621 IX86_BUILTIN_XORPS128_MASK,
30622 IX86_BUILTIN_ORPD256_MASK,
30623 IX86_BUILTIN_ORPD128_MASK,
30624 IX86_BUILTIN_ORPS256_MASK,
30625 IX86_BUILTIN_ORPS128_MASK,
30626 IX86_BUILTIN_BROADCASTF32x2_256,
30627 IX86_BUILTIN_BROADCASTI32x2_256,
30628 IX86_BUILTIN_BROADCASTI32x2_128,
30629 IX86_BUILTIN_BROADCASTF64X2_256,
30630 IX86_BUILTIN_BROADCASTI64X2_256,
30631 IX86_BUILTIN_BROADCASTF32X4_256,
30632 IX86_BUILTIN_BROADCASTI32X4_256,
30633 IX86_BUILTIN_EXTRACTF32X4_256,
30634 IX86_BUILTIN_EXTRACTI32X4_256,
30635 IX86_BUILTIN_DBPSADBW256,
30636 IX86_BUILTIN_DBPSADBW128,
30637 IX86_BUILTIN_CVTTPD2QQ256,
30638 IX86_BUILTIN_CVTTPD2QQ128,
30639 IX86_BUILTIN_CVTTPD2UQQ256,
30640 IX86_BUILTIN_CVTTPD2UQQ128,
30641 IX86_BUILTIN_CVTPD2QQ256,
30642 IX86_BUILTIN_CVTPD2QQ128,
30643 IX86_BUILTIN_CVTPD2UQQ256,
30644 IX86_BUILTIN_CVTPD2UQQ128,
30645 IX86_BUILTIN_CVTPD2UDQ256_MASK,
30646 IX86_BUILTIN_CVTPD2UDQ128_MASK,
30647 IX86_BUILTIN_CVTTPS2QQ256,
30648 IX86_BUILTIN_CVTTPS2QQ128,
30649 IX86_BUILTIN_CVTTPS2UQQ256,
30650 IX86_BUILTIN_CVTTPS2UQQ128,
30651 IX86_BUILTIN_CVTTPS2DQ256_MASK,
30652 IX86_BUILTIN_CVTTPS2DQ128_MASK,
30653 IX86_BUILTIN_CVTTPS2UDQ256,
30654 IX86_BUILTIN_CVTTPS2UDQ128,
30655 IX86_BUILTIN_CVTTPD2DQ256_MASK,
30656 IX86_BUILTIN_CVTTPD2DQ128_MASK,
30657 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
30658 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
30659 IX86_BUILTIN_CVTPD2DQ256_MASK,
30660 IX86_BUILTIN_CVTPD2DQ128_MASK,
30661 IX86_BUILTIN_CVTDQ2PD256_MASK,
30662 IX86_BUILTIN_CVTDQ2PD128_MASK,
30663 IX86_BUILTIN_CVTUDQ2PD256_MASK,
30664 IX86_BUILTIN_CVTUDQ2PD128_MASK,
30665 IX86_BUILTIN_CVTDQ2PS256_MASK,
30666 IX86_BUILTIN_CVTDQ2PS128_MASK,
30667 IX86_BUILTIN_CVTUDQ2PS256_MASK,
30668 IX86_BUILTIN_CVTUDQ2PS128_MASK,
30669 IX86_BUILTIN_CVTPS2PD256_MASK,
30670 IX86_BUILTIN_CVTPS2PD128_MASK,
30671 IX86_BUILTIN_PBROADCASTB256_MASK,
30672 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
30673 IX86_BUILTIN_PBROADCASTB128_MASK,
30674 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
30675 IX86_BUILTIN_PBROADCASTW256_MASK,
30676 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
30677 IX86_BUILTIN_PBROADCASTW128_MASK,
30678 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
30679 IX86_BUILTIN_PBROADCASTD256_MASK,
30680 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
30681 IX86_BUILTIN_PBROADCASTD128_MASK,
30682 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
30683 IX86_BUILTIN_PBROADCASTQ256_MASK,
30684 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
30685 IX86_BUILTIN_PBROADCASTQ128_MASK,
30686 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
30687 IX86_BUILTIN_BROADCASTSS256,
30688 IX86_BUILTIN_BROADCASTSS128,
30689 IX86_BUILTIN_BROADCASTSD256,
30690 IX86_BUILTIN_EXTRACTF64X2_256,
30691 IX86_BUILTIN_EXTRACTI64X2_256,
30692 IX86_BUILTIN_INSERTF32X4_256,
30693 IX86_BUILTIN_INSERTI32X4_256,
30694 IX86_BUILTIN_PMOVSXBW256_MASK,
30695 IX86_BUILTIN_PMOVSXBW128_MASK,
30696 IX86_BUILTIN_PMOVSXBD256_MASK,
30697 IX86_BUILTIN_PMOVSXBD128_MASK,
30698 IX86_BUILTIN_PMOVSXBQ256_MASK,
30699 IX86_BUILTIN_PMOVSXBQ128_MASK,
30700 IX86_BUILTIN_PMOVSXWD256_MASK,
30701 IX86_BUILTIN_PMOVSXWD128_MASK,
30702 IX86_BUILTIN_PMOVSXWQ256_MASK,
30703 IX86_BUILTIN_PMOVSXWQ128_MASK,
30704 IX86_BUILTIN_PMOVSXDQ256_MASK,
30705 IX86_BUILTIN_PMOVSXDQ128_MASK,
30706 IX86_BUILTIN_PMOVZXBW256_MASK,
30707 IX86_BUILTIN_PMOVZXBW128_MASK,
30708 IX86_BUILTIN_PMOVZXBD256_MASK,
30709 IX86_BUILTIN_PMOVZXBD128_MASK,
30710 IX86_BUILTIN_PMOVZXBQ256_MASK,
30711 IX86_BUILTIN_PMOVZXBQ128_MASK,
30712 IX86_BUILTIN_PMOVZXWD256_MASK,
30713 IX86_BUILTIN_PMOVZXWD128_MASK,
30714 IX86_BUILTIN_PMOVZXWQ256_MASK,
30715 IX86_BUILTIN_PMOVZXWQ128_MASK,
30716 IX86_BUILTIN_PMOVZXDQ256_MASK,
30717 IX86_BUILTIN_PMOVZXDQ128_MASK,
30718 IX86_BUILTIN_REDUCEPD256_MASK,
30719 IX86_BUILTIN_REDUCEPD128_MASK,
30720 IX86_BUILTIN_REDUCEPS256_MASK,
30721 IX86_BUILTIN_REDUCEPS128_MASK,
30722 IX86_BUILTIN_REDUCESD_MASK,
30723 IX86_BUILTIN_REDUCESS_MASK,
30724 IX86_BUILTIN_VPERMVARHI256_MASK,
30725 IX86_BUILTIN_VPERMVARHI128_MASK,
30726 IX86_BUILTIN_VPERMT2VARHI256,
30727 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
30728 IX86_BUILTIN_VPERMT2VARHI128,
30729 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
30730 IX86_BUILTIN_VPERMI2VARHI256,
30731 IX86_BUILTIN_VPERMI2VARHI128,
30732 IX86_BUILTIN_RCP14PD256,
30733 IX86_BUILTIN_RCP14PD128,
30734 IX86_BUILTIN_RCP14PS256,
30735 IX86_BUILTIN_RCP14PS128,
30736 IX86_BUILTIN_RSQRT14PD256_MASK,
30737 IX86_BUILTIN_RSQRT14PD128_MASK,
30738 IX86_BUILTIN_RSQRT14PS256_MASK,
30739 IX86_BUILTIN_RSQRT14PS128_MASK,
30740 IX86_BUILTIN_SQRTPD256_MASK,
30741 IX86_BUILTIN_SQRTPD128_MASK,
30742 IX86_BUILTIN_SQRTPS256_MASK,
30743 IX86_BUILTIN_SQRTPS128_MASK,
30744 IX86_BUILTIN_PADDB128_MASK,
30745 IX86_BUILTIN_PADDW128_MASK,
30746 IX86_BUILTIN_PADDD128_MASK,
30747 IX86_BUILTIN_PADDQ128_MASK,
30748 IX86_BUILTIN_PSUBB128_MASK,
30749 IX86_BUILTIN_PSUBW128_MASK,
30750 IX86_BUILTIN_PSUBD128_MASK,
30751 IX86_BUILTIN_PSUBQ128_MASK,
30752 IX86_BUILTIN_PADDSB128_MASK,
30753 IX86_BUILTIN_PADDSW128_MASK,
30754 IX86_BUILTIN_PSUBSB128_MASK,
30755 IX86_BUILTIN_PSUBSW128_MASK,
30756 IX86_BUILTIN_PADDUSB128_MASK,
30757 IX86_BUILTIN_PADDUSW128_MASK,
30758 IX86_BUILTIN_PSUBUSB128_MASK,
30759 IX86_BUILTIN_PSUBUSW128_MASK,
30760 IX86_BUILTIN_PADDB256_MASK,
30761 IX86_BUILTIN_PADDW256_MASK,
30762 IX86_BUILTIN_PADDD256_MASK,
30763 IX86_BUILTIN_PADDQ256_MASK,
30764 IX86_BUILTIN_PADDSB256_MASK,
30765 IX86_BUILTIN_PADDSW256_MASK,
30766 IX86_BUILTIN_PADDUSB256_MASK,
30767 IX86_BUILTIN_PADDUSW256_MASK,
30768 IX86_BUILTIN_PSUBB256_MASK,
30769 IX86_BUILTIN_PSUBW256_MASK,
30770 IX86_BUILTIN_PSUBD256_MASK,
30771 IX86_BUILTIN_PSUBQ256_MASK,
30772 IX86_BUILTIN_PSUBSB256_MASK,
30773 IX86_BUILTIN_PSUBSW256_MASK,
30774 IX86_BUILTIN_PSUBUSB256_MASK,
30775 IX86_BUILTIN_PSUBUSW256_MASK,
30776 IX86_BUILTIN_SHUF_F64x2_256,
30777 IX86_BUILTIN_SHUF_I64x2_256,
30778 IX86_BUILTIN_SHUF_I32x4_256,
30779 IX86_BUILTIN_SHUF_F32x4_256,
30780 IX86_BUILTIN_PMOVWB128,
30781 IX86_BUILTIN_PMOVWB256,
30782 IX86_BUILTIN_PMOVSWB128,
30783 IX86_BUILTIN_PMOVSWB256,
30784 IX86_BUILTIN_PMOVUSWB128,
30785 IX86_BUILTIN_PMOVUSWB256,
30786 IX86_BUILTIN_PMOVDB128,
30787 IX86_BUILTIN_PMOVDB256,
30788 IX86_BUILTIN_PMOVSDB128,
30789 IX86_BUILTIN_PMOVSDB256,
30790 IX86_BUILTIN_PMOVUSDB128,
30791 IX86_BUILTIN_PMOVUSDB256,
30792 IX86_BUILTIN_PMOVDW128,
30793 IX86_BUILTIN_PMOVDW256,
30794 IX86_BUILTIN_PMOVSDW128,
30795 IX86_BUILTIN_PMOVSDW256,
30796 IX86_BUILTIN_PMOVUSDW128,
30797 IX86_BUILTIN_PMOVUSDW256,
30798 IX86_BUILTIN_PMOVQB128,
30799 IX86_BUILTIN_PMOVQB256,
30800 IX86_BUILTIN_PMOVSQB128,
30801 IX86_BUILTIN_PMOVSQB256,
30802 IX86_BUILTIN_PMOVUSQB128,
30803 IX86_BUILTIN_PMOVUSQB256,
30804 IX86_BUILTIN_PMOVQW128,
30805 IX86_BUILTIN_PMOVQW256,
30806 IX86_BUILTIN_PMOVSQW128,
30807 IX86_BUILTIN_PMOVSQW256,
30808 IX86_BUILTIN_PMOVUSQW128,
30809 IX86_BUILTIN_PMOVUSQW256,
30810 IX86_BUILTIN_PMOVQD128,
30811 IX86_BUILTIN_PMOVQD256,
30812 IX86_BUILTIN_PMOVSQD128,
30813 IX86_BUILTIN_PMOVSQD256,
30814 IX86_BUILTIN_PMOVUSQD128,
30815 IX86_BUILTIN_PMOVUSQD256,
30816 IX86_BUILTIN_RANGEPD256,
30817 IX86_BUILTIN_RANGEPD128,
30818 IX86_BUILTIN_RANGEPS256,
30819 IX86_BUILTIN_RANGEPS128,
30820 IX86_BUILTIN_GETEXPPS256,
30821 IX86_BUILTIN_GETEXPPD256,
30822 IX86_BUILTIN_GETEXPPS128,
30823 IX86_BUILTIN_GETEXPPD128,
30824 IX86_BUILTIN_FIXUPIMMPD256_MASK,
30825 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
30826 IX86_BUILTIN_FIXUPIMMPS256_MASK,
30827 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
30828 IX86_BUILTIN_FIXUPIMMPD128_MASK,
30829 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
30830 IX86_BUILTIN_FIXUPIMMPS128_MASK,
30831 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
30832 IX86_BUILTIN_PABSQ256,
30833 IX86_BUILTIN_PABSQ128,
30834 IX86_BUILTIN_PABSD256_MASK,
30835 IX86_BUILTIN_PABSD128_MASK,
30836 IX86_BUILTIN_PMULHRSW256_MASK,
30837 IX86_BUILTIN_PMULHRSW128_MASK,
30838 IX86_BUILTIN_PMULHUW128_MASK,
30839 IX86_BUILTIN_PMULHUW256_MASK,
30840 IX86_BUILTIN_PMULHW256_MASK,
30841 IX86_BUILTIN_PMULHW128_MASK,
30842 IX86_BUILTIN_PMULLW256_MASK,
30843 IX86_BUILTIN_PMULLW128_MASK,
30844 IX86_BUILTIN_PMULLQ256,
30845 IX86_BUILTIN_PMULLQ128,
30846 IX86_BUILTIN_ANDPD256_MASK,
30847 IX86_BUILTIN_ANDPD128_MASK,
30848 IX86_BUILTIN_ANDPS256_MASK,
30849 IX86_BUILTIN_ANDPS128_MASK,
30850 IX86_BUILTIN_ANDNPD256_MASK,
30851 IX86_BUILTIN_ANDNPD128_MASK,
30852 IX86_BUILTIN_ANDNPS256_MASK,
30853 IX86_BUILTIN_ANDNPS128_MASK,
30854 IX86_BUILTIN_PSLLWI128_MASK,
30855 IX86_BUILTIN_PSLLDI128_MASK,
30856 IX86_BUILTIN_PSLLQI128_MASK,
30857 IX86_BUILTIN_PSLLW128_MASK,
30858 IX86_BUILTIN_PSLLD128_MASK,
30859 IX86_BUILTIN_PSLLQ128_MASK,
30860 IX86_BUILTIN_PSLLWI256_MASK ,
30861 IX86_BUILTIN_PSLLW256_MASK,
30862 IX86_BUILTIN_PSLLDI256_MASK,
30863 IX86_BUILTIN_PSLLD256_MASK,
30864 IX86_BUILTIN_PSLLQI256_MASK,
30865 IX86_BUILTIN_PSLLQ256_MASK,
30866 IX86_BUILTIN_PSRADI128_MASK,
30867 IX86_BUILTIN_PSRAD128_MASK,
30868 IX86_BUILTIN_PSRADI256_MASK,
30869 IX86_BUILTIN_PSRAD256_MASK,
30870 IX86_BUILTIN_PSRAQI128_MASK,
30871 IX86_BUILTIN_PSRAQ128_MASK,
30872 IX86_BUILTIN_PSRAQI256_MASK,
30873 IX86_BUILTIN_PSRAQ256_MASK,
30874 IX86_BUILTIN_PANDD256,
30875 IX86_BUILTIN_PANDD128,
30876 IX86_BUILTIN_PSRLDI128_MASK,
30877 IX86_BUILTIN_PSRLD128_MASK,
30878 IX86_BUILTIN_PSRLDI256_MASK,
30879 IX86_BUILTIN_PSRLD256_MASK,
30880 IX86_BUILTIN_PSRLQI128_MASK,
30881 IX86_BUILTIN_PSRLQ128_MASK,
30882 IX86_BUILTIN_PSRLQI256_MASK,
30883 IX86_BUILTIN_PSRLQ256_MASK,
30884 IX86_BUILTIN_PANDQ256,
30885 IX86_BUILTIN_PANDQ128,
30886 IX86_BUILTIN_PANDND256,
30887 IX86_BUILTIN_PANDND128,
30888 IX86_BUILTIN_PANDNQ256,
30889 IX86_BUILTIN_PANDNQ128,
30890 IX86_BUILTIN_PORD256,
30891 IX86_BUILTIN_PORD128,
30892 IX86_BUILTIN_PORQ256,
30893 IX86_BUILTIN_PORQ128,
30894 IX86_BUILTIN_PXORD256,
30895 IX86_BUILTIN_PXORD128,
30896 IX86_BUILTIN_PXORQ256,
30897 IX86_BUILTIN_PXORQ128,
30898 IX86_BUILTIN_PACKSSWB256_MASK,
30899 IX86_BUILTIN_PACKSSWB128_MASK,
30900 IX86_BUILTIN_PACKUSWB256_MASK,
30901 IX86_BUILTIN_PACKUSWB128_MASK,
30902 IX86_BUILTIN_RNDSCALEPS256,
30903 IX86_BUILTIN_RNDSCALEPD256,
30904 IX86_BUILTIN_RNDSCALEPS128,
30905 IX86_BUILTIN_RNDSCALEPD128,
30906 IX86_BUILTIN_VTERNLOGQ256_MASK,
30907 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
30908 IX86_BUILTIN_VTERNLOGD256_MASK,
30909 IX86_BUILTIN_VTERNLOGD256_MASKZ,
30910 IX86_BUILTIN_VTERNLOGQ128_MASK,
30911 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
30912 IX86_BUILTIN_VTERNLOGD128_MASK,
30913 IX86_BUILTIN_VTERNLOGD128_MASKZ,
30914 IX86_BUILTIN_SCALEFPD256,
30915 IX86_BUILTIN_SCALEFPS256,
30916 IX86_BUILTIN_SCALEFPD128,
30917 IX86_BUILTIN_SCALEFPS128,
30918 IX86_BUILTIN_VFMADDPD256_MASK,
30919 IX86_BUILTIN_VFMADDPD256_MASK3,
30920 IX86_BUILTIN_VFMADDPD256_MASKZ,
30921 IX86_BUILTIN_VFMADDPD128_MASK,
30922 IX86_BUILTIN_VFMADDPD128_MASK3,
30923 IX86_BUILTIN_VFMADDPD128_MASKZ,
30924 IX86_BUILTIN_VFMADDPS256_MASK,
30925 IX86_BUILTIN_VFMADDPS256_MASK3,
30926 IX86_BUILTIN_VFMADDPS256_MASKZ,
30927 IX86_BUILTIN_VFMADDPS128_MASK,
30928 IX86_BUILTIN_VFMADDPS128_MASK3,
30929 IX86_BUILTIN_VFMADDPS128_MASKZ,
30930 IX86_BUILTIN_VFMSUBPD256_MASK3,
30931 IX86_BUILTIN_VFMSUBPD128_MASK3,
30932 IX86_BUILTIN_VFMSUBPS256_MASK3,
30933 IX86_BUILTIN_VFMSUBPS128_MASK3,
30934 IX86_BUILTIN_VFNMADDPD256_MASK,
30935 IX86_BUILTIN_VFNMADDPD128_MASK,
30936 IX86_BUILTIN_VFNMADDPS256_MASK,
30937 IX86_BUILTIN_VFNMADDPS128_MASK,
30938 IX86_BUILTIN_VFNMSUBPD256_MASK,
30939 IX86_BUILTIN_VFNMSUBPD256_MASK3,
30940 IX86_BUILTIN_VFNMSUBPD128_MASK,
30941 IX86_BUILTIN_VFNMSUBPD128_MASK3,
30942 IX86_BUILTIN_VFNMSUBPS256_MASK,
30943 IX86_BUILTIN_VFNMSUBPS256_MASK3,
30944 IX86_BUILTIN_VFNMSUBPS128_MASK,
30945 IX86_BUILTIN_VFNMSUBPS128_MASK3,
30946 IX86_BUILTIN_VFMADDSUBPD256_MASK,
30947 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
30948 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
30949 IX86_BUILTIN_VFMADDSUBPD128_MASK,
30950 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
30951 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
30952 IX86_BUILTIN_VFMADDSUBPS256_MASK,
30953 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
30954 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
30955 IX86_BUILTIN_VFMADDSUBPS128_MASK,
30956 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
30957 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
30958 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
30959 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
30960 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
30961 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
30962 IX86_BUILTIN_INSERTF64X2_256,
30963 IX86_BUILTIN_INSERTI64X2_256,
30964 IX86_BUILTIN_PSRAVV16HI,
30965 IX86_BUILTIN_PSRAVV8HI,
30966 IX86_BUILTIN_PMADDUBSW256_MASK,
30967 IX86_BUILTIN_PMADDUBSW128_MASK,
30968 IX86_BUILTIN_PMADDWD256_MASK,
30969 IX86_BUILTIN_PMADDWD128_MASK,
30970 IX86_BUILTIN_PSRLVV16HI,
30971 IX86_BUILTIN_PSRLVV8HI,
30972 IX86_BUILTIN_CVTPS2DQ256_MASK,
30973 IX86_BUILTIN_CVTPS2DQ128_MASK,
30974 IX86_BUILTIN_CVTPS2UDQ256,
30975 IX86_BUILTIN_CVTPS2UDQ128,
30976 IX86_BUILTIN_CVTPS2QQ256,
30977 IX86_BUILTIN_CVTPS2QQ128,
30978 IX86_BUILTIN_CVTPS2UQQ256,
30979 IX86_BUILTIN_CVTPS2UQQ128,
30980 IX86_BUILTIN_GETMANTPS256,
30981 IX86_BUILTIN_GETMANTPS128,
30982 IX86_BUILTIN_GETMANTPD256,
30983 IX86_BUILTIN_GETMANTPD128,
30984 IX86_BUILTIN_MOVDDUP256_MASK,
30985 IX86_BUILTIN_MOVDDUP128_MASK,
30986 IX86_BUILTIN_MOVSHDUP256_MASK,
30987 IX86_BUILTIN_MOVSHDUP128_MASK,
30988 IX86_BUILTIN_MOVSLDUP256_MASK,
30989 IX86_BUILTIN_MOVSLDUP128_MASK,
30990 IX86_BUILTIN_CVTQQ2PS256,
30991 IX86_BUILTIN_CVTQQ2PS128,
30992 IX86_BUILTIN_CVTUQQ2PS256,
30993 IX86_BUILTIN_CVTUQQ2PS128,
30994 IX86_BUILTIN_CVTQQ2PD256,
30995 IX86_BUILTIN_CVTQQ2PD128,
30996 IX86_BUILTIN_CVTUQQ2PD256,
30997 IX86_BUILTIN_CVTUQQ2PD128,
30998 IX86_BUILTIN_VPERMT2VARQ256,
30999 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31000 IX86_BUILTIN_VPERMT2VARD256,
31001 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31002 IX86_BUILTIN_VPERMI2VARQ256,
31003 IX86_BUILTIN_VPERMI2VARD256,
31004 IX86_BUILTIN_VPERMT2VARPD256,
31005 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31006 IX86_BUILTIN_VPERMT2VARPS256,
31007 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31008 IX86_BUILTIN_VPERMI2VARPD256,
31009 IX86_BUILTIN_VPERMI2VARPS256,
31010 IX86_BUILTIN_VPERMT2VARQ128,
31011 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31012 IX86_BUILTIN_VPERMT2VARD128,
31013 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31014 IX86_BUILTIN_VPERMI2VARQ128,
31015 IX86_BUILTIN_VPERMI2VARD128,
31016 IX86_BUILTIN_VPERMT2VARPD128,
31017 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31018 IX86_BUILTIN_VPERMT2VARPS128,
31019 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31020 IX86_BUILTIN_VPERMI2VARPD128,
31021 IX86_BUILTIN_VPERMI2VARPS128,
31022 IX86_BUILTIN_PSHUFB256_MASK,
31023 IX86_BUILTIN_PSHUFB128_MASK,
31024 IX86_BUILTIN_PSHUFHW256_MASK,
31025 IX86_BUILTIN_PSHUFHW128_MASK,
31026 IX86_BUILTIN_PSHUFLW256_MASK,
31027 IX86_BUILTIN_PSHUFLW128_MASK,
31028 IX86_BUILTIN_PSHUFD256_MASK,
31029 IX86_BUILTIN_PSHUFD128_MASK,
31030 IX86_BUILTIN_SHUFPD256_MASK,
31031 IX86_BUILTIN_SHUFPD128_MASK,
31032 IX86_BUILTIN_SHUFPS256_MASK,
31033 IX86_BUILTIN_SHUFPS128_MASK,
31034 IX86_BUILTIN_PROLVQ256,
31035 IX86_BUILTIN_PROLVQ128,
31036 IX86_BUILTIN_PROLQ256,
31037 IX86_BUILTIN_PROLQ128,
31038 IX86_BUILTIN_PRORVQ256,
31039 IX86_BUILTIN_PRORVQ128,
31040 IX86_BUILTIN_PRORQ256,
31041 IX86_BUILTIN_PRORQ128,
31042 IX86_BUILTIN_PSRAVQ128,
31043 IX86_BUILTIN_PSRAVQ256,
31044 IX86_BUILTIN_PSLLVV4DI_MASK,
31045 IX86_BUILTIN_PSLLVV2DI_MASK,
31046 IX86_BUILTIN_PSLLVV8SI_MASK,
31047 IX86_BUILTIN_PSLLVV4SI_MASK,
31048 IX86_BUILTIN_PSRAVV8SI_MASK,
31049 IX86_BUILTIN_PSRAVV4SI_MASK,
31050 IX86_BUILTIN_PSRLVV4DI_MASK,
31051 IX86_BUILTIN_PSRLVV2DI_MASK,
31052 IX86_BUILTIN_PSRLVV8SI_MASK,
31053 IX86_BUILTIN_PSRLVV4SI_MASK,
31054 IX86_BUILTIN_PSRAWI256_MASK,
31055 IX86_BUILTIN_PSRAW256_MASK,
31056 IX86_BUILTIN_PSRAWI128_MASK,
31057 IX86_BUILTIN_PSRAW128_MASK,
31058 IX86_BUILTIN_PSRLWI256_MASK,
31059 IX86_BUILTIN_PSRLW256_MASK,
31060 IX86_BUILTIN_PSRLWI128_MASK,
31061 IX86_BUILTIN_PSRLW128_MASK,
31062 IX86_BUILTIN_PRORVD256,
31063 IX86_BUILTIN_PROLVD256,
31064 IX86_BUILTIN_PRORD256,
31065 IX86_BUILTIN_PROLD256,
31066 IX86_BUILTIN_PRORVD128,
31067 IX86_BUILTIN_PROLVD128,
31068 IX86_BUILTIN_PRORD128,
31069 IX86_BUILTIN_PROLD128,
31070 IX86_BUILTIN_FPCLASSPD256,
31071 IX86_BUILTIN_FPCLASSPD128,
31072 IX86_BUILTIN_FPCLASSSD,
31073 IX86_BUILTIN_FPCLASSPS256,
31074 IX86_BUILTIN_FPCLASSPS128,
31075 IX86_BUILTIN_FPCLASSSS,
31076 IX86_BUILTIN_CVTB2MASK128,
31077 IX86_BUILTIN_CVTB2MASK256,
31078 IX86_BUILTIN_CVTW2MASK128,
31079 IX86_BUILTIN_CVTW2MASK256,
31080 IX86_BUILTIN_CVTD2MASK128,
31081 IX86_BUILTIN_CVTD2MASK256,
31082 IX86_BUILTIN_CVTQ2MASK128,
31083 IX86_BUILTIN_CVTQ2MASK256,
31084 IX86_BUILTIN_CVTMASK2B128,
31085 IX86_BUILTIN_CVTMASK2B256,
31086 IX86_BUILTIN_CVTMASK2W128,
31087 IX86_BUILTIN_CVTMASK2W256,
31088 IX86_BUILTIN_CVTMASK2D128,
31089 IX86_BUILTIN_CVTMASK2D256,
31090 IX86_BUILTIN_CVTMASK2Q128,
31091 IX86_BUILTIN_CVTMASK2Q256,
31092 IX86_BUILTIN_PCMPEQB128_MASK,
31093 IX86_BUILTIN_PCMPEQB256_MASK,
31094 IX86_BUILTIN_PCMPEQW128_MASK,
31095 IX86_BUILTIN_PCMPEQW256_MASK,
31096 IX86_BUILTIN_PCMPEQD128_MASK,
31097 IX86_BUILTIN_PCMPEQD256_MASK,
31098 IX86_BUILTIN_PCMPEQQ128_MASK,
31099 IX86_BUILTIN_PCMPEQQ256_MASK,
31100 IX86_BUILTIN_PCMPGTB128_MASK,
31101 IX86_BUILTIN_PCMPGTB256_MASK,
31102 IX86_BUILTIN_PCMPGTW128_MASK,
31103 IX86_BUILTIN_PCMPGTW256_MASK,
31104 IX86_BUILTIN_PCMPGTD128_MASK,
31105 IX86_BUILTIN_PCMPGTD256_MASK,
31106 IX86_BUILTIN_PCMPGTQ128_MASK,
31107 IX86_BUILTIN_PCMPGTQ256_MASK,
31108 IX86_BUILTIN_PTESTMB128,
31109 IX86_BUILTIN_PTESTMB256,
31110 IX86_BUILTIN_PTESTMW128,
31111 IX86_BUILTIN_PTESTMW256,
31112 IX86_BUILTIN_PTESTMD128,
31113 IX86_BUILTIN_PTESTMD256,
31114 IX86_BUILTIN_PTESTMQ128,
31115 IX86_BUILTIN_PTESTMQ256,
31116 IX86_BUILTIN_PTESTNMB128,
31117 IX86_BUILTIN_PTESTNMB256,
31118 IX86_BUILTIN_PTESTNMW128,
31119 IX86_BUILTIN_PTESTNMW256,
31120 IX86_BUILTIN_PTESTNMD128,
31121 IX86_BUILTIN_PTESTNMD256,
31122 IX86_BUILTIN_PTESTNMQ128,
31123 IX86_BUILTIN_PTESTNMQ256,
31124 IX86_BUILTIN_PBROADCASTMB128,
31125 IX86_BUILTIN_PBROADCASTMB256,
31126 IX86_BUILTIN_PBROADCASTMW128,
31127 IX86_BUILTIN_PBROADCASTMW256,
31128 IX86_BUILTIN_COMPRESSPD256,
31129 IX86_BUILTIN_COMPRESSPD128,
31130 IX86_BUILTIN_COMPRESSPS256,
31131 IX86_BUILTIN_COMPRESSPS128,
31132 IX86_BUILTIN_PCOMPRESSQ256,
31133 IX86_BUILTIN_PCOMPRESSQ128,
31134 IX86_BUILTIN_PCOMPRESSD256,
31135 IX86_BUILTIN_PCOMPRESSD128,
31136 IX86_BUILTIN_EXPANDPD256,
31137 IX86_BUILTIN_EXPANDPD128,
31138 IX86_BUILTIN_EXPANDPS256,
31139 IX86_BUILTIN_EXPANDPS128,
31140 IX86_BUILTIN_PEXPANDQ256,
31141 IX86_BUILTIN_PEXPANDQ128,
31142 IX86_BUILTIN_PEXPANDD256,
31143 IX86_BUILTIN_PEXPANDD128,
31144 IX86_BUILTIN_EXPANDPD256Z,
31145 IX86_BUILTIN_EXPANDPD128Z,
31146 IX86_BUILTIN_EXPANDPS256Z,
31147 IX86_BUILTIN_EXPANDPS128Z,
31148 IX86_BUILTIN_PEXPANDQ256Z,
31149 IX86_BUILTIN_PEXPANDQ128Z,
31150 IX86_BUILTIN_PEXPANDD256Z,
31151 IX86_BUILTIN_PEXPANDD128Z,
31152 IX86_BUILTIN_PMAXSD256_MASK,
31153 IX86_BUILTIN_PMINSD256_MASK,
31154 IX86_BUILTIN_PMAXUD256_MASK,
31155 IX86_BUILTIN_PMINUD256_MASK,
31156 IX86_BUILTIN_PMAXSD128_MASK,
31157 IX86_BUILTIN_PMINSD128_MASK,
31158 IX86_BUILTIN_PMAXUD128_MASK,
31159 IX86_BUILTIN_PMINUD128_MASK,
31160 IX86_BUILTIN_PMAXSQ256_MASK,
31161 IX86_BUILTIN_PMINSQ256_MASK,
31162 IX86_BUILTIN_PMAXUQ256_MASK,
31163 IX86_BUILTIN_PMINUQ256_MASK,
31164 IX86_BUILTIN_PMAXSQ128_MASK,
31165 IX86_BUILTIN_PMINSQ128_MASK,
31166 IX86_BUILTIN_PMAXUQ128_MASK,
31167 IX86_BUILTIN_PMINUQ128_MASK,
31168 IX86_BUILTIN_PMINSB256_MASK,
31169 IX86_BUILTIN_PMINUB256_MASK,
31170 IX86_BUILTIN_PMAXSB256_MASK,
31171 IX86_BUILTIN_PMAXUB256_MASK,
31172 IX86_BUILTIN_PMINSB128_MASK,
31173 IX86_BUILTIN_PMINUB128_MASK,
31174 IX86_BUILTIN_PMAXSB128_MASK,
31175 IX86_BUILTIN_PMAXUB128_MASK,
31176 IX86_BUILTIN_PMINSW256_MASK,
31177 IX86_BUILTIN_PMINUW256_MASK,
31178 IX86_BUILTIN_PMAXSW256_MASK,
31179 IX86_BUILTIN_PMAXUW256_MASK,
31180 IX86_BUILTIN_PMINSW128_MASK,
31181 IX86_BUILTIN_PMINUW128_MASK,
31182 IX86_BUILTIN_PMAXSW128_MASK,
31183 IX86_BUILTIN_PMAXUW128_MASK,
31184 IX86_BUILTIN_VPCONFLICTQ256,
31185 IX86_BUILTIN_VPCONFLICTD256,
31186 IX86_BUILTIN_VPCLZCNTQ256,
31187 IX86_BUILTIN_VPCLZCNTD256,
31188 IX86_BUILTIN_UNPCKHPD256_MASK,
31189 IX86_BUILTIN_UNPCKHPD128_MASK,
31190 IX86_BUILTIN_UNPCKHPS256_MASK,
31191 IX86_BUILTIN_UNPCKHPS128_MASK,
31192 IX86_BUILTIN_UNPCKLPD256_MASK,
31193 IX86_BUILTIN_UNPCKLPD128_MASK,
31194 IX86_BUILTIN_UNPCKLPS256_MASK,
31195 IX86_BUILTIN_VPCONFLICTQ128,
31196 IX86_BUILTIN_VPCONFLICTD128,
31197 IX86_BUILTIN_VPCLZCNTQ128,
31198 IX86_BUILTIN_VPCLZCNTD128,
31199 IX86_BUILTIN_UNPCKLPS128_MASK,
31200 IX86_BUILTIN_ALIGND256,
31201 IX86_BUILTIN_ALIGNQ256,
31202 IX86_BUILTIN_ALIGND128,
31203 IX86_BUILTIN_ALIGNQ128,
31204 IX86_BUILTIN_CVTPS2PH256_MASK,
31205 IX86_BUILTIN_CVTPS2PH_MASK,
31206 IX86_BUILTIN_CVTPH2PS_MASK,
31207 IX86_BUILTIN_CVTPH2PS256_MASK,
31208 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31209 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31210 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31211 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31212 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31213 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31214 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31215 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31216 IX86_BUILTIN_PUNPCKHBW128_MASK,
31217 IX86_BUILTIN_PUNPCKHBW256_MASK,
31218 IX86_BUILTIN_PUNPCKHWD128_MASK,
31219 IX86_BUILTIN_PUNPCKHWD256_MASK,
31220 IX86_BUILTIN_PUNPCKLBW128_MASK,
31221 IX86_BUILTIN_PUNPCKLBW256_MASK,
31222 IX86_BUILTIN_PUNPCKLWD128_MASK,
31223 IX86_BUILTIN_PUNPCKLWD256_MASK,
31224 IX86_BUILTIN_PSLLVV16HI,
31225 IX86_BUILTIN_PSLLVV8HI,
31226 IX86_BUILTIN_PACKSSDW256_MASK,
31227 IX86_BUILTIN_PACKSSDW128_MASK,
31228 IX86_BUILTIN_PACKUSDW256_MASK,
31229 IX86_BUILTIN_PACKUSDW128_MASK,
31230 IX86_BUILTIN_PAVGB256_MASK,
31231 IX86_BUILTIN_PAVGW256_MASK,
31232 IX86_BUILTIN_PAVGB128_MASK,
31233 IX86_BUILTIN_PAVGW128_MASK,
31234 IX86_BUILTIN_VPERMVARSF256_MASK,
31235 IX86_BUILTIN_VPERMVARDF256_MASK,
31236 IX86_BUILTIN_VPERMDF256_MASK,
31237 IX86_BUILTIN_PABSB256_MASK,
31238 IX86_BUILTIN_PABSB128_MASK,
31239 IX86_BUILTIN_PABSW256_MASK,
31240 IX86_BUILTIN_PABSW128_MASK,
31241 IX86_BUILTIN_VPERMILVARPD_MASK,
31242 IX86_BUILTIN_VPERMILVARPS_MASK,
31243 IX86_BUILTIN_VPERMILVARPD256_MASK,
31244 IX86_BUILTIN_VPERMILVARPS256_MASK,
31245 IX86_BUILTIN_VPERMILPD_MASK,
31246 IX86_BUILTIN_VPERMILPS_MASK,
31247 IX86_BUILTIN_VPERMILPD256_MASK,
31248 IX86_BUILTIN_VPERMILPS256_MASK,
31249 IX86_BUILTIN_BLENDMQ256,
31250 IX86_BUILTIN_BLENDMD256,
31251 IX86_BUILTIN_BLENDMPD256,
31252 IX86_BUILTIN_BLENDMPS256,
31253 IX86_BUILTIN_BLENDMQ128,
31254 IX86_BUILTIN_BLENDMD128,
31255 IX86_BUILTIN_BLENDMPD128,
31256 IX86_BUILTIN_BLENDMPS128,
31257 IX86_BUILTIN_BLENDMW256,
31258 IX86_BUILTIN_BLENDMB256,
31259 IX86_BUILTIN_BLENDMW128,
31260 IX86_BUILTIN_BLENDMB128,
31261 IX86_BUILTIN_PMULLD256_MASK,
31262 IX86_BUILTIN_PMULLD128_MASK,
31263 IX86_BUILTIN_PMULUDQ256_MASK,
31264 IX86_BUILTIN_PMULDQ256_MASK,
31265 IX86_BUILTIN_PMULDQ128_MASK,
31266 IX86_BUILTIN_PMULUDQ128_MASK,
31267 IX86_BUILTIN_CVTPD2PS256_MASK,
31268 IX86_BUILTIN_CVTPD2PS_MASK,
31269 IX86_BUILTIN_VPERMVARSI256_MASK,
31270 IX86_BUILTIN_VPERMVARDI256_MASK,
31271 IX86_BUILTIN_VPERMDI256_MASK,
31272 IX86_BUILTIN_CMPQ256,
31273 IX86_BUILTIN_CMPD256,
31274 IX86_BUILTIN_UCMPQ256,
31275 IX86_BUILTIN_UCMPD256,
31276 IX86_BUILTIN_CMPB256,
31277 IX86_BUILTIN_CMPW256,
31278 IX86_BUILTIN_UCMPB256,
31279 IX86_BUILTIN_UCMPW256,
31280 IX86_BUILTIN_CMPPD256_MASK,
31281 IX86_BUILTIN_CMPPS256_MASK,
31282 IX86_BUILTIN_CMPQ128,
31283 IX86_BUILTIN_CMPD128,
31284 IX86_BUILTIN_UCMPQ128,
31285 IX86_BUILTIN_UCMPD128,
31286 IX86_BUILTIN_CMPB128,
31287 IX86_BUILTIN_CMPW128,
31288 IX86_BUILTIN_UCMPB128,
31289 IX86_BUILTIN_UCMPW128,
31290 IX86_BUILTIN_CMPPD128_MASK,
31291 IX86_BUILTIN_CMPPS128_MASK,
31292
31293 IX86_BUILTIN_GATHER3SIV8SF,
31294 IX86_BUILTIN_GATHER3SIV4SF,
31295 IX86_BUILTIN_GATHER3SIV4DF,
31296 IX86_BUILTIN_GATHER3SIV2DF,
31297 IX86_BUILTIN_GATHER3DIV8SF,
31298 IX86_BUILTIN_GATHER3DIV4SF,
31299 IX86_BUILTIN_GATHER3DIV4DF,
31300 IX86_BUILTIN_GATHER3DIV2DF,
31301 IX86_BUILTIN_GATHER3SIV8SI,
31302 IX86_BUILTIN_GATHER3SIV4SI,
31303 IX86_BUILTIN_GATHER3SIV4DI,
31304 IX86_BUILTIN_GATHER3SIV2DI,
31305 IX86_BUILTIN_GATHER3DIV8SI,
31306 IX86_BUILTIN_GATHER3DIV4SI,
31307 IX86_BUILTIN_GATHER3DIV4DI,
31308 IX86_BUILTIN_GATHER3DIV2DI,
31309 IX86_BUILTIN_SCATTERSIV8SF,
31310 IX86_BUILTIN_SCATTERSIV4SF,
31311 IX86_BUILTIN_SCATTERSIV4DF,
31312 IX86_BUILTIN_SCATTERSIV2DF,
31313 IX86_BUILTIN_SCATTERDIV8SF,
31314 IX86_BUILTIN_SCATTERDIV4SF,
31315 IX86_BUILTIN_SCATTERDIV4DF,
31316 IX86_BUILTIN_SCATTERDIV2DF,
31317 IX86_BUILTIN_SCATTERSIV8SI,
31318 IX86_BUILTIN_SCATTERSIV4SI,
31319 IX86_BUILTIN_SCATTERSIV4DI,
31320 IX86_BUILTIN_SCATTERSIV2DI,
31321 IX86_BUILTIN_SCATTERDIV8SI,
31322 IX86_BUILTIN_SCATTERDIV4SI,
31323 IX86_BUILTIN_SCATTERDIV4DI,
31324 IX86_BUILTIN_SCATTERDIV2DI,
31325
31326 /* AVX512DQ. */
31327 IX86_BUILTIN_RANGESD128,
31328 IX86_BUILTIN_RANGESS128,
31329 IX86_BUILTIN_KUNPCKWD,
31330 IX86_BUILTIN_KUNPCKDQ,
31331 IX86_BUILTIN_BROADCASTF32x2_512,
31332 IX86_BUILTIN_BROADCASTI32x2_512,
31333 IX86_BUILTIN_BROADCASTF64X2_512,
31334 IX86_BUILTIN_BROADCASTI64X2_512,
31335 IX86_BUILTIN_BROADCASTF32X8_512,
31336 IX86_BUILTIN_BROADCASTI32X8_512,
31337 IX86_BUILTIN_EXTRACTF64X2_512,
31338 IX86_BUILTIN_EXTRACTF32X8,
31339 IX86_BUILTIN_EXTRACTI64X2_512,
31340 IX86_BUILTIN_EXTRACTI32X8,
31341 IX86_BUILTIN_REDUCEPD512_MASK,
31342 IX86_BUILTIN_REDUCEPS512_MASK,
31343 IX86_BUILTIN_PMULLQ512,
31344 IX86_BUILTIN_XORPD512,
31345 IX86_BUILTIN_XORPS512,
31346 IX86_BUILTIN_ORPD512,
31347 IX86_BUILTIN_ORPS512,
31348 IX86_BUILTIN_ANDPD512,
31349 IX86_BUILTIN_ANDPS512,
31350 IX86_BUILTIN_ANDNPD512,
31351 IX86_BUILTIN_ANDNPS512,
31352 IX86_BUILTIN_INSERTF32X8,
31353 IX86_BUILTIN_INSERTI32X8,
31354 IX86_BUILTIN_INSERTF64X2_512,
31355 IX86_BUILTIN_INSERTI64X2_512,
31356 IX86_BUILTIN_FPCLASSPD512,
31357 IX86_BUILTIN_FPCLASSPS512,
31358 IX86_BUILTIN_CVTD2MASK512,
31359 IX86_BUILTIN_CVTQ2MASK512,
31360 IX86_BUILTIN_CVTMASK2D512,
31361 IX86_BUILTIN_CVTMASK2Q512,
31362 IX86_BUILTIN_CVTPD2QQ512,
31363 IX86_BUILTIN_CVTPS2QQ512,
31364 IX86_BUILTIN_CVTPD2UQQ512,
31365 IX86_BUILTIN_CVTPS2UQQ512,
31366 IX86_BUILTIN_CVTQQ2PS512,
31367 IX86_BUILTIN_CVTUQQ2PS512,
31368 IX86_BUILTIN_CVTQQ2PD512,
31369 IX86_BUILTIN_CVTUQQ2PD512,
31370 IX86_BUILTIN_CVTTPS2QQ512,
31371 IX86_BUILTIN_CVTTPS2UQQ512,
31372 IX86_BUILTIN_CVTTPD2QQ512,
31373 IX86_BUILTIN_CVTTPD2UQQ512,
31374 IX86_BUILTIN_RANGEPS512,
31375 IX86_BUILTIN_RANGEPD512,
31376
31377 /* AVX512BW. */
31378 IX86_BUILTIN_PACKUSDW512,
31379 IX86_BUILTIN_PACKSSDW512,
31380 IX86_BUILTIN_LOADDQUHI512_MASK,
31381 IX86_BUILTIN_LOADDQUQI512_MASK,
31382 IX86_BUILTIN_PSLLDQ512,
31383 IX86_BUILTIN_PSRLDQ512,
31384 IX86_BUILTIN_STOREDQUHI512_MASK,
31385 IX86_BUILTIN_STOREDQUQI512_MASK,
31386 IX86_BUILTIN_PALIGNR512,
31387 IX86_BUILTIN_PALIGNR512_MASK,
31388 IX86_BUILTIN_MOVDQUHI512_MASK,
31389 IX86_BUILTIN_MOVDQUQI512_MASK,
31390 IX86_BUILTIN_PSADBW512,
31391 IX86_BUILTIN_DBPSADBW512,
31392 IX86_BUILTIN_PBROADCASTB512,
31393 IX86_BUILTIN_PBROADCASTB512_GPR,
31394 IX86_BUILTIN_PBROADCASTW512,
31395 IX86_BUILTIN_PBROADCASTW512_GPR,
31396 IX86_BUILTIN_PMOVSXBW512_MASK,
31397 IX86_BUILTIN_PMOVZXBW512_MASK,
31398 IX86_BUILTIN_VPERMVARHI512_MASK,
31399 IX86_BUILTIN_VPERMT2VARHI512,
31400 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31401 IX86_BUILTIN_VPERMI2VARHI512,
31402 IX86_BUILTIN_PAVGB512,
31403 IX86_BUILTIN_PAVGW512,
31404 IX86_BUILTIN_PADDB512,
31405 IX86_BUILTIN_PSUBB512,
31406 IX86_BUILTIN_PSUBSB512,
31407 IX86_BUILTIN_PADDSB512,
31408 IX86_BUILTIN_PSUBUSB512,
31409 IX86_BUILTIN_PADDUSB512,
31410 IX86_BUILTIN_PSUBW512,
31411 IX86_BUILTIN_PADDW512,
31412 IX86_BUILTIN_PSUBSW512,
31413 IX86_BUILTIN_PADDSW512,
31414 IX86_BUILTIN_PSUBUSW512,
31415 IX86_BUILTIN_PADDUSW512,
31416 IX86_BUILTIN_PMAXUW512,
31417 IX86_BUILTIN_PMAXSW512,
31418 IX86_BUILTIN_PMINUW512,
31419 IX86_BUILTIN_PMINSW512,
31420 IX86_BUILTIN_PMAXUB512,
31421 IX86_BUILTIN_PMAXSB512,
31422 IX86_BUILTIN_PMINUB512,
31423 IX86_BUILTIN_PMINSB512,
31424 IX86_BUILTIN_PMOVWB512,
31425 IX86_BUILTIN_PMOVSWB512,
31426 IX86_BUILTIN_PMOVUSWB512,
31427 IX86_BUILTIN_PMULHRSW512_MASK,
31428 IX86_BUILTIN_PMULHUW512_MASK,
31429 IX86_BUILTIN_PMULHW512_MASK,
31430 IX86_BUILTIN_PMULLW512_MASK,
31431 IX86_BUILTIN_PSLLWI512_MASK,
31432 IX86_BUILTIN_PSLLW512_MASK,
31433 IX86_BUILTIN_PACKSSWB512,
31434 IX86_BUILTIN_PACKUSWB512,
31435 IX86_BUILTIN_PSRAVV32HI,
31436 IX86_BUILTIN_PMADDUBSW512_MASK,
31437 IX86_BUILTIN_PMADDWD512_MASK,
31438 IX86_BUILTIN_PSRLVV32HI,
31439 IX86_BUILTIN_PUNPCKHBW512,
31440 IX86_BUILTIN_PUNPCKHWD512,
31441 IX86_BUILTIN_PUNPCKLBW512,
31442 IX86_BUILTIN_PUNPCKLWD512,
31443 IX86_BUILTIN_PSHUFB512,
31444 IX86_BUILTIN_PSHUFHW512,
31445 IX86_BUILTIN_PSHUFLW512,
31446 IX86_BUILTIN_PSRAWI512,
31447 IX86_BUILTIN_PSRAW512,
31448 IX86_BUILTIN_PSRLWI512,
31449 IX86_BUILTIN_PSRLW512,
31450 IX86_BUILTIN_CVTB2MASK512,
31451 IX86_BUILTIN_CVTW2MASK512,
31452 IX86_BUILTIN_CVTMASK2B512,
31453 IX86_BUILTIN_CVTMASK2W512,
31454 IX86_BUILTIN_PCMPEQB512_MASK,
31455 IX86_BUILTIN_PCMPEQW512_MASK,
31456 IX86_BUILTIN_PCMPGTB512_MASK,
31457 IX86_BUILTIN_PCMPGTW512_MASK,
31458 IX86_BUILTIN_PTESTMB512,
31459 IX86_BUILTIN_PTESTMW512,
31460 IX86_BUILTIN_PTESTNMB512,
31461 IX86_BUILTIN_PTESTNMW512,
31462 IX86_BUILTIN_PSLLVV32HI,
31463 IX86_BUILTIN_PABSB512,
31464 IX86_BUILTIN_PABSW512,
31465 IX86_BUILTIN_BLENDMW512,
31466 IX86_BUILTIN_BLENDMB512,
31467 IX86_BUILTIN_CMPB512,
31468 IX86_BUILTIN_CMPW512,
31469 IX86_BUILTIN_UCMPB512,
31470 IX86_BUILTIN_UCMPW512,
31471
31472 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31473 where all operands are 32-byte or 64-byte wide respectively. */
31474 IX86_BUILTIN_GATHERALTSIV4DF,
31475 IX86_BUILTIN_GATHERALTDIV8SF,
31476 IX86_BUILTIN_GATHERALTSIV4DI,
31477 IX86_BUILTIN_GATHERALTDIV8SI,
31478 IX86_BUILTIN_GATHER3ALTDIV16SF,
31479 IX86_BUILTIN_GATHER3ALTDIV16SI,
31480 IX86_BUILTIN_GATHER3ALTSIV4DF,
31481 IX86_BUILTIN_GATHER3ALTDIV8SF,
31482 IX86_BUILTIN_GATHER3ALTSIV4DI,
31483 IX86_BUILTIN_GATHER3ALTDIV8SI,
31484 IX86_BUILTIN_GATHER3ALTSIV8DF,
31485 IX86_BUILTIN_GATHER3ALTSIV8DI,
31486 IX86_BUILTIN_GATHER3DIV16SF,
31487 IX86_BUILTIN_GATHER3DIV16SI,
31488 IX86_BUILTIN_GATHER3DIV8DF,
31489 IX86_BUILTIN_GATHER3DIV8DI,
31490 IX86_BUILTIN_GATHER3SIV16SF,
31491 IX86_BUILTIN_GATHER3SIV16SI,
31492 IX86_BUILTIN_GATHER3SIV8DF,
31493 IX86_BUILTIN_GATHER3SIV8DI,
31494 IX86_BUILTIN_SCATTERALTSIV8DF,
31495 IX86_BUILTIN_SCATTERALTDIV16SF,
31496 IX86_BUILTIN_SCATTERALTSIV8DI,
31497 IX86_BUILTIN_SCATTERALTDIV16SI,
31498 IX86_BUILTIN_SCATTERDIV16SF,
31499 IX86_BUILTIN_SCATTERDIV16SI,
31500 IX86_BUILTIN_SCATTERDIV8DF,
31501 IX86_BUILTIN_SCATTERDIV8DI,
31502 IX86_BUILTIN_SCATTERSIV16SF,
31503 IX86_BUILTIN_SCATTERSIV16SI,
31504 IX86_BUILTIN_SCATTERSIV8DF,
31505 IX86_BUILTIN_SCATTERSIV8DI,
31506
31507 /* AVX512PF */
31508 IX86_BUILTIN_GATHERPFQPD,
31509 IX86_BUILTIN_GATHERPFDPS,
31510 IX86_BUILTIN_GATHERPFDPD,
31511 IX86_BUILTIN_GATHERPFQPS,
31512 IX86_BUILTIN_SCATTERPFDPD,
31513 IX86_BUILTIN_SCATTERPFDPS,
31514 IX86_BUILTIN_SCATTERPFQPD,
31515 IX86_BUILTIN_SCATTERPFQPS,
31516
31517 /* AVX-512ER */
31518 IX86_BUILTIN_EXP2PD_MASK,
31519 IX86_BUILTIN_EXP2PS_MASK,
31520 IX86_BUILTIN_EXP2PS,
31521 IX86_BUILTIN_RCP28PD,
31522 IX86_BUILTIN_RCP28PS,
31523 IX86_BUILTIN_RCP28SD,
31524 IX86_BUILTIN_RCP28SS,
31525 IX86_BUILTIN_RSQRT28PD,
31526 IX86_BUILTIN_RSQRT28PS,
31527 IX86_BUILTIN_RSQRT28SD,
31528 IX86_BUILTIN_RSQRT28SS,
31529
31530 /* AVX-512IFMA */
31531 IX86_BUILTIN_VPMADD52LUQ512,
31532 IX86_BUILTIN_VPMADD52HUQ512,
31533 IX86_BUILTIN_VPMADD52LUQ256,
31534 IX86_BUILTIN_VPMADD52HUQ256,
31535 IX86_BUILTIN_VPMADD52LUQ128,
31536 IX86_BUILTIN_VPMADD52HUQ128,
31537 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
31538 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
31539 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
31540 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
31541 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
31542 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
31543
31544 /* AVX-512VBMI */
31545 IX86_BUILTIN_VPMULTISHIFTQB512,
31546 IX86_BUILTIN_VPMULTISHIFTQB256,
31547 IX86_BUILTIN_VPMULTISHIFTQB128,
31548 IX86_BUILTIN_VPERMVARQI512_MASK,
31549 IX86_BUILTIN_VPERMT2VARQI512,
31550 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
31551 IX86_BUILTIN_VPERMI2VARQI512,
31552 IX86_BUILTIN_VPERMVARQI256_MASK,
31553 IX86_BUILTIN_VPERMVARQI128_MASK,
31554 IX86_BUILTIN_VPERMT2VARQI256,
31555 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
31556 IX86_BUILTIN_VPERMT2VARQI128,
31557 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
31558 IX86_BUILTIN_VPERMI2VARQI256,
31559 IX86_BUILTIN_VPERMI2VARQI128,
31560
31561 /* SHA builtins. */
31562 IX86_BUILTIN_SHA1MSG1,
31563 IX86_BUILTIN_SHA1MSG2,
31564 IX86_BUILTIN_SHA1NEXTE,
31565 IX86_BUILTIN_SHA1RNDS4,
31566 IX86_BUILTIN_SHA256MSG1,
31567 IX86_BUILTIN_SHA256MSG2,
31568 IX86_BUILTIN_SHA256RNDS2,
31569
31570 /* CLWB instructions. */
31571 IX86_BUILTIN_CLWB,
31572
31573 /* PCOMMIT instructions. */
31574 IX86_BUILTIN_PCOMMIT,
31575
31576 /* CLFLUSHOPT instructions. */
31577 IX86_BUILTIN_CLFLUSHOPT,
31578
31579 /* TFmode support builtins. */
31580 IX86_BUILTIN_INFQ,
31581 IX86_BUILTIN_HUGE_VALQ,
31582 IX86_BUILTIN_FABSQ,
31583 IX86_BUILTIN_COPYSIGNQ,
31584
31585 /* Vectorizer support builtins. */
31586 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
31587 IX86_BUILTIN_CPYSGNPS,
31588 IX86_BUILTIN_CPYSGNPD,
31589 IX86_BUILTIN_CPYSGNPS256,
31590 IX86_BUILTIN_CPYSGNPS512,
31591 IX86_BUILTIN_CPYSGNPD256,
31592 IX86_BUILTIN_CPYSGNPD512,
31593 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
31594 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
31595
31596
31597 /* FMA4 instructions. */
31598 IX86_BUILTIN_VFMADDSS,
31599 IX86_BUILTIN_VFMADDSD,
31600 IX86_BUILTIN_VFMADDPS,
31601 IX86_BUILTIN_VFMADDPD,
31602 IX86_BUILTIN_VFMADDPS256,
31603 IX86_BUILTIN_VFMADDPD256,
31604 IX86_BUILTIN_VFMADDSUBPS,
31605 IX86_BUILTIN_VFMADDSUBPD,
31606 IX86_BUILTIN_VFMADDSUBPS256,
31607 IX86_BUILTIN_VFMADDSUBPD256,
31608
31609 /* FMA3 instructions. */
31610 IX86_BUILTIN_VFMADDSS3,
31611 IX86_BUILTIN_VFMADDSD3,
31612
31613 /* XOP instructions. */
31614 IX86_BUILTIN_VPCMOV,
31615 IX86_BUILTIN_VPCMOV_V2DI,
31616 IX86_BUILTIN_VPCMOV_V4SI,
31617 IX86_BUILTIN_VPCMOV_V8HI,
31618 IX86_BUILTIN_VPCMOV_V16QI,
31619 IX86_BUILTIN_VPCMOV_V4SF,
31620 IX86_BUILTIN_VPCMOV_V2DF,
31621 IX86_BUILTIN_VPCMOV256,
31622 IX86_BUILTIN_VPCMOV_V4DI256,
31623 IX86_BUILTIN_VPCMOV_V8SI256,
31624 IX86_BUILTIN_VPCMOV_V16HI256,
31625 IX86_BUILTIN_VPCMOV_V32QI256,
31626 IX86_BUILTIN_VPCMOV_V8SF256,
31627 IX86_BUILTIN_VPCMOV_V4DF256,
31628
31629 IX86_BUILTIN_VPPERM,
31630
31631 IX86_BUILTIN_VPMACSSWW,
31632 IX86_BUILTIN_VPMACSWW,
31633 IX86_BUILTIN_VPMACSSWD,
31634 IX86_BUILTIN_VPMACSWD,
31635 IX86_BUILTIN_VPMACSSDD,
31636 IX86_BUILTIN_VPMACSDD,
31637 IX86_BUILTIN_VPMACSSDQL,
31638 IX86_BUILTIN_VPMACSSDQH,
31639 IX86_BUILTIN_VPMACSDQL,
31640 IX86_BUILTIN_VPMACSDQH,
31641 IX86_BUILTIN_VPMADCSSWD,
31642 IX86_BUILTIN_VPMADCSWD,
31643
31644 IX86_BUILTIN_VPHADDBW,
31645 IX86_BUILTIN_VPHADDBD,
31646 IX86_BUILTIN_VPHADDBQ,
31647 IX86_BUILTIN_VPHADDWD,
31648 IX86_BUILTIN_VPHADDWQ,
31649 IX86_BUILTIN_VPHADDDQ,
31650 IX86_BUILTIN_VPHADDUBW,
31651 IX86_BUILTIN_VPHADDUBD,
31652 IX86_BUILTIN_VPHADDUBQ,
31653 IX86_BUILTIN_VPHADDUWD,
31654 IX86_BUILTIN_VPHADDUWQ,
31655 IX86_BUILTIN_VPHADDUDQ,
31656 IX86_BUILTIN_VPHSUBBW,
31657 IX86_BUILTIN_VPHSUBWD,
31658 IX86_BUILTIN_VPHSUBDQ,
31659
31660 IX86_BUILTIN_VPROTB,
31661 IX86_BUILTIN_VPROTW,
31662 IX86_BUILTIN_VPROTD,
31663 IX86_BUILTIN_VPROTQ,
31664 IX86_BUILTIN_VPROTB_IMM,
31665 IX86_BUILTIN_VPROTW_IMM,
31666 IX86_BUILTIN_VPROTD_IMM,
31667 IX86_BUILTIN_VPROTQ_IMM,
31668
31669 IX86_BUILTIN_VPSHLB,
31670 IX86_BUILTIN_VPSHLW,
31671 IX86_BUILTIN_VPSHLD,
31672 IX86_BUILTIN_VPSHLQ,
31673 IX86_BUILTIN_VPSHAB,
31674 IX86_BUILTIN_VPSHAW,
31675 IX86_BUILTIN_VPSHAD,
31676 IX86_BUILTIN_VPSHAQ,
31677
31678 IX86_BUILTIN_VFRCZSS,
31679 IX86_BUILTIN_VFRCZSD,
31680 IX86_BUILTIN_VFRCZPS,
31681 IX86_BUILTIN_VFRCZPD,
31682 IX86_BUILTIN_VFRCZPS256,
31683 IX86_BUILTIN_VFRCZPD256,
31684
31685 IX86_BUILTIN_VPCOMEQUB,
31686 IX86_BUILTIN_VPCOMNEUB,
31687 IX86_BUILTIN_VPCOMLTUB,
31688 IX86_BUILTIN_VPCOMLEUB,
31689 IX86_BUILTIN_VPCOMGTUB,
31690 IX86_BUILTIN_VPCOMGEUB,
31691 IX86_BUILTIN_VPCOMFALSEUB,
31692 IX86_BUILTIN_VPCOMTRUEUB,
31693
31694 IX86_BUILTIN_VPCOMEQUW,
31695 IX86_BUILTIN_VPCOMNEUW,
31696 IX86_BUILTIN_VPCOMLTUW,
31697 IX86_BUILTIN_VPCOMLEUW,
31698 IX86_BUILTIN_VPCOMGTUW,
31699 IX86_BUILTIN_VPCOMGEUW,
31700 IX86_BUILTIN_VPCOMFALSEUW,
31701 IX86_BUILTIN_VPCOMTRUEUW,
31702
31703 IX86_BUILTIN_VPCOMEQUD,
31704 IX86_BUILTIN_VPCOMNEUD,
31705 IX86_BUILTIN_VPCOMLTUD,
31706 IX86_BUILTIN_VPCOMLEUD,
31707 IX86_BUILTIN_VPCOMGTUD,
31708 IX86_BUILTIN_VPCOMGEUD,
31709 IX86_BUILTIN_VPCOMFALSEUD,
31710 IX86_BUILTIN_VPCOMTRUEUD,
31711
31712 IX86_BUILTIN_VPCOMEQUQ,
31713 IX86_BUILTIN_VPCOMNEUQ,
31714 IX86_BUILTIN_VPCOMLTUQ,
31715 IX86_BUILTIN_VPCOMLEUQ,
31716 IX86_BUILTIN_VPCOMGTUQ,
31717 IX86_BUILTIN_VPCOMGEUQ,
31718 IX86_BUILTIN_VPCOMFALSEUQ,
31719 IX86_BUILTIN_VPCOMTRUEUQ,
31720
31721 IX86_BUILTIN_VPCOMEQB,
31722 IX86_BUILTIN_VPCOMNEB,
31723 IX86_BUILTIN_VPCOMLTB,
31724 IX86_BUILTIN_VPCOMLEB,
31725 IX86_BUILTIN_VPCOMGTB,
31726 IX86_BUILTIN_VPCOMGEB,
31727 IX86_BUILTIN_VPCOMFALSEB,
31728 IX86_BUILTIN_VPCOMTRUEB,
31729
31730 IX86_BUILTIN_VPCOMEQW,
31731 IX86_BUILTIN_VPCOMNEW,
31732 IX86_BUILTIN_VPCOMLTW,
31733 IX86_BUILTIN_VPCOMLEW,
31734 IX86_BUILTIN_VPCOMGTW,
31735 IX86_BUILTIN_VPCOMGEW,
31736 IX86_BUILTIN_VPCOMFALSEW,
31737 IX86_BUILTIN_VPCOMTRUEW,
31738
31739 IX86_BUILTIN_VPCOMEQD,
31740 IX86_BUILTIN_VPCOMNED,
31741 IX86_BUILTIN_VPCOMLTD,
31742 IX86_BUILTIN_VPCOMLED,
31743 IX86_BUILTIN_VPCOMGTD,
31744 IX86_BUILTIN_VPCOMGED,
31745 IX86_BUILTIN_VPCOMFALSED,
31746 IX86_BUILTIN_VPCOMTRUED,
31747
31748 IX86_BUILTIN_VPCOMEQQ,
31749 IX86_BUILTIN_VPCOMNEQ,
31750 IX86_BUILTIN_VPCOMLTQ,
31751 IX86_BUILTIN_VPCOMLEQ,
31752 IX86_BUILTIN_VPCOMGTQ,
31753 IX86_BUILTIN_VPCOMGEQ,
31754 IX86_BUILTIN_VPCOMFALSEQ,
31755 IX86_BUILTIN_VPCOMTRUEQ,
31756
31757 /* LWP instructions. */
31758 IX86_BUILTIN_LLWPCB,
31759 IX86_BUILTIN_SLWPCB,
31760 IX86_BUILTIN_LWPVAL32,
31761 IX86_BUILTIN_LWPVAL64,
31762 IX86_BUILTIN_LWPINS32,
31763 IX86_BUILTIN_LWPINS64,
31764
31765 IX86_BUILTIN_CLZS,
31766
31767 /* RTM */
31768 IX86_BUILTIN_XBEGIN,
31769 IX86_BUILTIN_XEND,
31770 IX86_BUILTIN_XABORT,
31771 IX86_BUILTIN_XTEST,
31772
31773 /* MPX */
31774 IX86_BUILTIN_BNDMK,
31775 IX86_BUILTIN_BNDSTX,
31776 IX86_BUILTIN_BNDLDX,
31777 IX86_BUILTIN_BNDCL,
31778 IX86_BUILTIN_BNDCU,
31779 IX86_BUILTIN_BNDRET,
31780 IX86_BUILTIN_BNDNARROW,
31781 IX86_BUILTIN_BNDINT,
31782 IX86_BUILTIN_SIZEOF,
31783 IX86_BUILTIN_BNDLOWER,
31784 IX86_BUILTIN_BNDUPPER,
31785
31786 /* BMI instructions. */
31787 IX86_BUILTIN_BEXTR32,
31788 IX86_BUILTIN_BEXTR64,
31789 IX86_BUILTIN_CTZS,
31790
31791 /* TBM instructions. */
31792 IX86_BUILTIN_BEXTRI32,
31793 IX86_BUILTIN_BEXTRI64,
31794
31795 /* BMI2 instructions. */
31796 IX86_BUILTIN_BZHI32,
31797 IX86_BUILTIN_BZHI64,
31798 IX86_BUILTIN_PDEP32,
31799 IX86_BUILTIN_PDEP64,
31800 IX86_BUILTIN_PEXT32,
31801 IX86_BUILTIN_PEXT64,
31802
31803 /* ADX instructions. */
31804 IX86_BUILTIN_ADDCARRYX32,
31805 IX86_BUILTIN_ADDCARRYX64,
31806
31807 /* SBB instructions. */
31808 IX86_BUILTIN_SBB32,
31809 IX86_BUILTIN_SBB64,
31810
31811 /* FSGSBASE instructions. */
31812 IX86_BUILTIN_RDFSBASE32,
31813 IX86_BUILTIN_RDFSBASE64,
31814 IX86_BUILTIN_RDGSBASE32,
31815 IX86_BUILTIN_RDGSBASE64,
31816 IX86_BUILTIN_WRFSBASE32,
31817 IX86_BUILTIN_WRFSBASE64,
31818 IX86_BUILTIN_WRGSBASE32,
31819 IX86_BUILTIN_WRGSBASE64,
31820
31821 /* RDRND instructions. */
31822 IX86_BUILTIN_RDRAND16_STEP,
31823 IX86_BUILTIN_RDRAND32_STEP,
31824 IX86_BUILTIN_RDRAND64_STEP,
31825
31826 /* RDSEED instructions. */
31827 IX86_BUILTIN_RDSEED16_STEP,
31828 IX86_BUILTIN_RDSEED32_STEP,
31829 IX86_BUILTIN_RDSEED64_STEP,
31830
31831 /* F16C instructions. */
31832 IX86_BUILTIN_CVTPH2PS,
31833 IX86_BUILTIN_CVTPH2PS256,
31834 IX86_BUILTIN_CVTPS2PH,
31835 IX86_BUILTIN_CVTPS2PH256,
31836
31837 /* MONITORX and MWAITX instrucions. */
31838 IX86_BUILTIN_MONITORX,
31839 IX86_BUILTIN_MWAITX,
31840
31841 /* CFString built-in for darwin */
31842 IX86_BUILTIN_CFSTRING,
31843
31844 /* Builtins to get CPU type and supported features. */
31845 IX86_BUILTIN_CPU_INIT,
31846 IX86_BUILTIN_CPU_IS,
31847 IX86_BUILTIN_CPU_SUPPORTS,
31848
31849 /* Read/write FLAGS register built-ins. */
31850 IX86_BUILTIN_READ_FLAGS,
31851 IX86_BUILTIN_WRITE_FLAGS,
31852
31853 IX86_BUILTIN_MAX
31854 };
31855
31856 /* Table for the ix86 builtin decls. */
31857 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
31858
31859 /* Table of all of the builtin functions that are possible with different ISA's
31860 but are waiting to be built until a function is declared to use that
31861 ISA. */
31862 struct builtin_isa {
31863 const char *name; /* function name */
31864 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
31865 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
31866 bool const_p; /* true if the declaration is constant */
31867 bool leaf_p; /* true if the declaration has leaf attribute */
31868 bool nothrow_p; /* true if the declaration has nothrow attribute */
31869 bool set_and_not_built_p;
31870 };
31871
31872 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
31873
31874 /* Bits that can still enable any inclusion of a builtin. */
31875 static HOST_WIDE_INT deferred_isa_values = 0;
31876
31877 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
31878 of which isa_flags to use in the ix86_builtins_isa array. Stores the
31879 function decl in the ix86_builtins array. Returns the function decl or
31880 NULL_TREE, if the builtin was not added.
31881
31882 If the front end has a special hook for builtin functions, delay adding
31883 builtin functions that aren't in the current ISA until the ISA is changed
31884 with function specific optimization. Doing so, can save about 300K for the
31885 default compiler. When the builtin is expanded, check at that time whether
31886 it is valid.
31887
31888 If the front end doesn't have a special hook, record all builtins, even if
31889 it isn't an instruction set in the current ISA in case the user uses
31890 function specific options for a different ISA, so that we don't get scope
31891 errors if a builtin is added in the middle of a function scope. */
31892
31893 static inline tree
31894 def_builtin (HOST_WIDE_INT mask, const char *name,
31895 enum ix86_builtin_func_type tcode,
31896 enum ix86_builtins code)
31897 {
31898 tree decl = NULL_TREE;
31899
31900 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
31901 {
31902 ix86_builtins_isa[(int) code].isa = mask;
31903
31904 mask &= ~OPTION_MASK_ISA_64BIT;
31905 if (mask == 0
31906 || (mask & ix86_isa_flags) != 0
31907 || (lang_hooks.builtin_function
31908 == lang_hooks.builtin_function_ext_scope))
31909
31910 {
31911 tree type = ix86_get_builtin_func_type (tcode);
31912 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
31913 NULL, NULL_TREE);
31914 ix86_builtins[(int) code] = decl;
31915 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
31916 }
31917 else
31918 {
31919 /* Just a MASK where set_and_not_built_p == true can potentially
31920 include a builtin. */
31921 deferred_isa_values |= mask;
31922 ix86_builtins[(int) code] = NULL_TREE;
31923 ix86_builtins_isa[(int) code].tcode = tcode;
31924 ix86_builtins_isa[(int) code].name = name;
31925 ix86_builtins_isa[(int) code].leaf_p = false;
31926 ix86_builtins_isa[(int) code].nothrow_p = false;
31927 ix86_builtins_isa[(int) code].const_p = false;
31928 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
31929 }
31930 }
31931
31932 return decl;
31933 }
31934
31935 /* Like def_builtin, but also marks the function decl "const". */
31936
31937 static inline tree
31938 def_builtin_const (HOST_WIDE_INT mask, const char *name,
31939 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
31940 {
31941 tree decl = def_builtin (mask, name, tcode, code);
31942 if (decl)
31943 TREE_READONLY (decl) = 1;
31944 else
31945 ix86_builtins_isa[(int) code].const_p = true;
31946
31947 return decl;
31948 }
31949
31950 /* Add any new builtin functions for a given ISA that may not have been
31951 declared. This saves a bit of space compared to adding all of the
31952 declarations to the tree, even if we didn't use them. */
31953
31954 static void
31955 ix86_add_new_builtins (HOST_WIDE_INT isa)
31956 {
31957 if ((isa & deferred_isa_values) == 0)
31958 return;
31959
31960 /* Bits in ISA value can be removed from potential isa values. */
31961 deferred_isa_values &= ~isa;
31962
31963 int i;
31964 tree saved_current_target_pragma = current_target_pragma;
31965 current_target_pragma = NULL_TREE;
31966
31967 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
31968 {
31969 if ((ix86_builtins_isa[i].isa & isa) != 0
31970 && ix86_builtins_isa[i].set_and_not_built_p)
31971 {
31972 tree decl, type;
31973
31974 /* Don't define the builtin again. */
31975 ix86_builtins_isa[i].set_and_not_built_p = false;
31976
31977 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
31978 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
31979 type, i, BUILT_IN_MD, NULL,
31980 NULL_TREE);
31981
31982 ix86_builtins[i] = decl;
31983 if (ix86_builtins_isa[i].const_p)
31984 TREE_READONLY (decl) = 1;
31985 if (ix86_builtins_isa[i].leaf_p)
31986 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31987 NULL_TREE);
31988 if (ix86_builtins_isa[i].nothrow_p)
31989 TREE_NOTHROW (decl) = 1;
31990 }
31991 }
31992
31993 current_target_pragma = saved_current_target_pragma;
31994 }
31995
31996 /* Bits for builtin_description.flag. */
31997
31998 /* Set when we don't support the comparison natively, and should
31999 swap_comparison in order to support it. */
32000 #define BUILTIN_DESC_SWAP_OPERANDS 1
32001
32002 struct builtin_description
32003 {
32004 const HOST_WIDE_INT mask;
32005 const enum insn_code icode;
32006 const char *const name;
32007 const enum ix86_builtins code;
32008 const enum rtx_code comparison;
32009 const int flag;
32010 };
32011
32012 static const struct builtin_description bdesc_comi[] =
32013 {
32014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32038 };
32039
32040 static const struct builtin_description bdesc_pcmpestr[] =
32041 {
32042 /* SSE4.2 */
32043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32044 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32045 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32046 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32047 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32048 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32050 };
32051
32052 static const struct builtin_description bdesc_pcmpistr[] =
32053 {
32054 /* SSE4.2 */
32055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32062 };
32063
32064 /* Special builtins with variable number of arguments. */
32065 static const struct builtin_description bdesc_special_args[] =
32066 {
32067 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32068 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32069 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32070
32071 /* 80387 (for use internally for atomic compound assignment). */
32072 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32073 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32074 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32075 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32076
32077 /* MMX */
32078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32079
32080 /* 3DNow! */
32081 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32082
32083 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32084 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32085 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32086 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32087 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32088 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32089 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32090 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32091 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32092
32093 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32094 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32095 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32096 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32097 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32098 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32099 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32100 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32101
32102 /* SSE */
32103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32106
32107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32111
32112 /* SSE or 3DNow!A */
32113 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32114 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32115
32116 /* SSE2 */
32117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32124 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32127
32128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32130
32131 /* SSE3 */
32132 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32133
32134 /* SSE4.1 */
32135 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32136
32137 /* SSE4A */
32138 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32139 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32140
32141 /* AVX */
32142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32144
32145 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32146 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32147 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32150
32151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32158
32159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32162
32163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32171
32172 /* AVX2 */
32173 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32174 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32175 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32176 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32177 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32178 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32179 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32180 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32181 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32182
32183 /* AVX512F */
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32231
32232 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32233 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32234 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32235 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32236 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32237 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32238
32239 /* FSGSBASE */
32240 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32241 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32242 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32243 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32244 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32245 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32246 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32247 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32248
32249 /* RTM */
32250 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32251 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32252 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32253
32254 /* AVX512BW */
32255 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32256 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32257 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32258 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32259
32260 /* AVX512VL */
32261 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32262 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32297 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32298 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32299 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32355
32356 /* PCOMMIT. */
32357 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32358 };
32359
32360 /* Builtins with variable number of arguments. */
32361 static const struct builtin_description bdesc_args[] =
32362 {
32363 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32364 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32365 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32366 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32367 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32368 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32369 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32370
32371 /* MMX */
32372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32378
32379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32387
32388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32390
32391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32395
32396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32402
32403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32409
32410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32413
32414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32415
32416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32422
32423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32429
32430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32434
32435 /* 3DNow! */
32436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32440
32441 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32442 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32443 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32444 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32445 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32446 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32447 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32448 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32449 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32450 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32451 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32452 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32453 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32454 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32455 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32456
32457 /* 3DNow!A */
32458 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32459 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32460 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32461 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32462 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32463 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32464
32465 /* SSE */
32466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32468 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32470 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32478
32479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32480
32481 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32482 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32483 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32489
32490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
32501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32510
32511 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32512 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32515
32516 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32518 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32519 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32520
32521 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32522
32523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32526 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32527 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32528
32529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
32530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
32531 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
32532
32533 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
32534
32535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32538
32539 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
32540 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
32541
32542 /* SSE MMX or 3Dnow!A */
32543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32544 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32545 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32546
32547 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32548 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32549 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32550 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32551
32552 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
32553 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
32554
32555 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
32556
32557 /* SSE2 */
32558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32559
32560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
32561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
32562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
32563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
32564 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
32565
32566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
32567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
32568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
32569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
32570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
32571
32572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
32573
32574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
32575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
32576 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
32577 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
32578
32579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
32580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
32581 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
32582
32583 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32584 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32585 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32586 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32591
32592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
32593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
32594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
32595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
32596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
32597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
32598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
32599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
32600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
32601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
32602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
32603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
32604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
32605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
32606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
32607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
32608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
32609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
32610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
32611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
32612
32613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32614 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32617
32618 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32620 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32621 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32622
32623 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32624
32625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32626 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32627 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32628
32629 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
32630
32631 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32632 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32633 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32634 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32635 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32636 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32637 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32638 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32639
32640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32648
32649 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32650 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
32651
32652 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32654 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32655 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32656
32657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32659
32660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32666
32667 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32668 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32669 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32671
32672 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32673 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32674 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32675 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32676 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32677 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32678 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32679 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32680
32681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
32682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
32683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
32684
32685 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
32687
32688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
32689 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
32690
32691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
32692
32693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
32694 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
32695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
32696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
32697
32698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
32699 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
32700 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
32701 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
32702 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
32703 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
32704 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
32705
32706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
32707 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
32708 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
32709 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
32710 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
32711 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
32712 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
32713
32714 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
32715 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
32716 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
32717 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
32718
32719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
32720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
32721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
32722
32723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
32724
32725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32726
32727 /* SSE2 MMX */
32728 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
32729 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
32730
32731 /* SSE3 */
32732 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
32733 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32734
32735 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32736 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32737 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32738 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32739 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32740 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32741
32742 /* SSSE3 */
32743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
32745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
32747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32749
32750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32756 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32760 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32761 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32762 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
32763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
32764 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32765 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32766 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32768 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32769 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32770 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32771 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32772 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32773 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32774
32775 /* SSSE3. */
32776 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
32777 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
32778
32779 /* SSE4.1 */
32780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
32783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
32784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
32788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
32789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
32790
32791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
32792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
32793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
32794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
32795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
32796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
32797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
32798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
32799 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
32800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
32801 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
32802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
32803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32804
32805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
32806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32807 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32808 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32810 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
32812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
32815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
32816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32817
32818 /* SSE4.1 */
32819 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
32820 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
32821 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32822 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32823
32824 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
32825 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
32826 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
32827 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
32828
32829 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
32830 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
32831
32832 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
32833 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
32834
32835 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
32836 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
32837 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
32838 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
32839
32840 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
32841 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
32842
32843 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32844 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
32845
32846 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
32847 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
32848 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
32849
32850 /* SSE4.2 */
32851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32852 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
32853 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
32854 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32855 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32856
32857 /* SSE4A */
32858 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
32859 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
32860 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
32861 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32862
32863 /* AES */
32864 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
32865 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32866
32867 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32868 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32869 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32870 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32871
32872 /* PCLMUL */
32873 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
32874
32875 /* AVX */
32876 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32877 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32880 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32881 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32884 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32890 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32891 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32892 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32893 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32894 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32895 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32896 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32897 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32898 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32899 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32900 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32901 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32902
32903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
32904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
32905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
32906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32907
32908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
32909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
32910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
32911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
32912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
32913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
32914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
32915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
32920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
32921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
32922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
32923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
32924 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
32925 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
32926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
32927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
32928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
32929 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
32930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
32931 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
32932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
32933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
32934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
32936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
32937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
32939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
32940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
32941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
32942
32943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
32946
32947 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
32948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32949 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32951 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32952
32953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32954
32955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
32957
32958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
32959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
32960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
32961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
32962
32963 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
32964 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
32965
32966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
32967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
32968
32969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
32970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
32971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
32972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
32973
32974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
32975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
32976
32977 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
32978 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
32979
32980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
32982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
32984
32985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32988 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
32989 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
32990 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
32991
32992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
32993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
32994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
32995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
32996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
32997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
32998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
32999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33007
33008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33010
33011 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33012 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33013
33014 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33015
33016 /* AVX2 */
33017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33018 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33019 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33020 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33025 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33026 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33027 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33028 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33034 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33056 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33057 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33058 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33059 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33060 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33061 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33062 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33063 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33064 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33065 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33066 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33067 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33081 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33083 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33084 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33085 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33086 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33087 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33088 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33098 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33099 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33100 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33101 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33102 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33103 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33104 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33105 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33106 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33107 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33108 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33109 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33110 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33111 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33112 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33113 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33114 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33115 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33116 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33117 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33118 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33131 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33151 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33153 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33154 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33160 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33161 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33162 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33163
33164 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33165
33166 /* BMI */
33167 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33168 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33169 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33170
33171 /* TBM */
33172 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33173 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33174
33175 /* F16C */
33176 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33177 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33178 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33179 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33180
33181 /* BMI2 */
33182 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33183 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33184 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33185 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33186 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33187 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33188
33189 /* AVX512F */
33190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33245 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33246 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33356 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33357 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33358 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33359 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33386
33387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33391 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33395
33396 /* Mask arithmetic operations */
33397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33407
33408 /* SHA */
33409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33416
33417 /* AVX512VL. */
33418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33456 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33457 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33458 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33459 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33460 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33461 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33462 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33463 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33467 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
33472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
33473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
33474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
33475 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33476 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33477 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33478 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33479 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33480 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33481 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33482 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33485 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33486 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33487 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33488 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
33508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
33509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
33510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
33511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
33513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
33514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
33515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
33517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
33519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
33521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
33523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
33525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33528 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
33529 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
33530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
33531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
33532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
33538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
33539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
33540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
33541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
33542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
33543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
33544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
33550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
33551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
33552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
33553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
33554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
33555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
33556 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
33557 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
33558 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
33559 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
33560 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33561 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33598 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33599 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
33615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
33616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
33617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
33618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
33619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
33620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
33621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
33622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
33623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
33624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
33625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
33626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
33627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
33628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
33629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
33630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
33631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
33632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
33633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
33634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
33635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
33636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
33637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
33638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
33639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
33640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
33641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
33642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
33643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
33644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
33645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
33646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
33647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
33648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
33649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
33650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
33651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
33652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
33653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
33654 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
33655 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
33656 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
33657 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
33658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
33663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
33664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
33665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
33666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
33667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
33668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
33669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
33670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33682 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33683 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33684 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33685 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33686 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33687 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33688 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33689 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33690 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33691 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
33693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
33695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
33699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
33700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
33702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
33704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
33708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
33709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
33712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
33718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
33719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
33722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33736 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
33737 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
33738 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
33739 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
33740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
33741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
33742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
33743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
33744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
33745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
33746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
33747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
33748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
33749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
33750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
33751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
33752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33800 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
33801 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
33802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
33805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
33806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
33807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
33808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
33809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33814 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33815 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33816 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33817 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
33819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
33820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
33821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
33822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33828 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
33829 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
33830 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
33831 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
33832 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
33833 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
33834 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
33835 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
33836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
33843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
33844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
33845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
33846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
33847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
33848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
33855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
33856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
33857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
33858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
33859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
33860 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33861 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33862 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
33863 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
33864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
33865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
33866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
33869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
33870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
33871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
33872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
33876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
33880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33892 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
33893 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
33894 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
33895 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33896 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
33897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
33898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
33899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
33900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
33904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
33908 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
33909 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
33910 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
33911 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
33912 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
33913 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
33914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
33915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
33916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
33917 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
33918 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
33919 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
33920 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
33921 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
33922 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
33923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
33924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
33925 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
33926 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
33927 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
33928 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
33929 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
33930 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
33931 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
33932 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
33933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
33934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
33935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
33936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
33937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
33938 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
33939 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
33940 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
33941 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
33942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
33943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
33944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
33945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
33946 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
33947 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
33948 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
33949 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
33950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
33951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
33952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
33953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
33954 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
33955 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
33956 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
33957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
33958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
33959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
33960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
33961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
33962 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
33963 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
33964 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
33965 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
33966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34014 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34022 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34023 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34024 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34025 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34033 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34034 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34035 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34036 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34058 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34059 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34061 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34062 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34075 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34076 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34077 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34078 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34095 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34114 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34115 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34116 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34117 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34130
34131 /* AVX512DQ. */
34132 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34133 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34134 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34135 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34136 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34137 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34138 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34139 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34140 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34141 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34142 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34143 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34144 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34145 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34146 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34147 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34148 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34149 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34150 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34151 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34152 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34153 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34154 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34155 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34156 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34157 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34158 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34159 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34160 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34161 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34162 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34163
34164 /* AVX512BW. */
34165 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34166 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34167 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34168 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34169 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34170 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34171 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34172 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34173 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34174 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34175 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34176 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34177 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34178 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34179 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34180 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34181 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34182 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34183 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34184 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34185 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34186 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34187 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34188 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34189 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34190 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34191 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34192 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34193 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34194 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34195 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34196 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34197 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34198 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34199 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34200 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34201 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34202 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34203 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34204 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34205 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34206 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34207 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34208 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34209 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34210 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34211 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34212 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34213 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34214 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34215 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34216 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34217 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34218 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34219 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34220 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34221 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34222 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34223 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34224 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34225 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34226 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34227 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34228 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34229 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34230 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34231 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34232 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34233 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34234 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34235 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34236 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34237 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34238 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34239 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34240 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34241 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34242 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34243 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34244 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34245 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34246 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34247 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34248 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34249 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34250 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34251 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34252 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34253 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34254 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34255 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34256
34257 /* AVX512IFMA */
34258 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34259 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34260 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34261 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34262 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34263 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34264 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34265 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34266 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34267 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34268 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34269 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34270
34271 /* AVX512VBMI */
34272 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34273 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34274 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34275 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34276 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34277 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34278 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34279 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34280 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34281 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34282 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34283 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34284 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34285 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34286 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34287 };
34288
34289 /* Builtins with rounding support. */
34290 static const struct builtin_description bdesc_round_args[] =
34291 {
34292 /* AVX512F */
34293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34312 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34321 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34373 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34375 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34377 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34379 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34381 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34383 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34385 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34387 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34412
34413 /* AVX512ER */
34414 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34415 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34416 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34417 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34418 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34419 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34420 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34421 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34422 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34423 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34424
34425 /* AVX512DQ. */
34426 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34427 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34428 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34429 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34430 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34431 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34432 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34433 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34434 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34435 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34436 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34437 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34438 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34439 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34440 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34441 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34442 };
34443
34444 /* Bultins for MPX. */
34445 static const struct builtin_description bdesc_mpx[] =
34446 {
34447 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34448 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34449 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34450 };
34451
34452 /* Const builtins for MPX. */
34453 static const struct builtin_description bdesc_mpx_const[] =
34454 {
34455 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34456 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34457 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34458 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34459 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34460 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34461 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34462 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34463 };
34464
34465 /* FMA4 and XOP. */
34466 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34467 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
34468 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
34469 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
34470 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
34471 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
34472 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
34473 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
34474 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
34475 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
34476 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
34477 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
34478 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
34479 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
34480 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
34481 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
34482 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
34483 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
34484 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
34485 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
34486 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
34487 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
34488 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
34489 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
34490 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
34491 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
34492 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
34493 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
34494 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
34495 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
34496 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
34497 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
34498 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
34499 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
34500 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
34501 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
34502 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
34503 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
34504 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
34505 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
34506 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
34507 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
34508 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
34509 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
34510 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
34511 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
34512 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
34513 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
34514 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
34515 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
34516 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
34517 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
34518
34519 static const struct builtin_description bdesc_multi_arg[] =
34520 {
34521 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
34522 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
34523 UNKNOWN, (int)MULTI_ARG_3_SF },
34524 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
34525 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
34526 UNKNOWN, (int)MULTI_ARG_3_DF },
34527
34528 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
34529 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
34530 UNKNOWN, (int)MULTI_ARG_3_SF },
34531 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
34532 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
34533 UNKNOWN, (int)MULTI_ARG_3_DF },
34534
34535 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
34536 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
34537 UNKNOWN, (int)MULTI_ARG_3_SF },
34538 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
34539 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
34540 UNKNOWN, (int)MULTI_ARG_3_DF },
34541 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
34542 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
34543 UNKNOWN, (int)MULTI_ARG_3_SF2 },
34544 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
34545 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
34546 UNKNOWN, (int)MULTI_ARG_3_DF2 },
34547
34548 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
34549 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
34550 UNKNOWN, (int)MULTI_ARG_3_SF },
34551 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
34552 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
34553 UNKNOWN, (int)MULTI_ARG_3_DF },
34554 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
34555 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
34556 UNKNOWN, (int)MULTI_ARG_3_SF2 },
34557 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
34558 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
34559 UNKNOWN, (int)MULTI_ARG_3_DF2 },
34560
34561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
34562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
34563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
34564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
34565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
34566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
34567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
34568
34569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
34570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
34571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
34572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
34573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
34574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
34575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
34576
34577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
34578
34579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
34580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
34581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
34582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
34583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
34584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
34585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
34586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
34587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
34588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
34589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
34590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
34591
34592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
34593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
34594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
34595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
34596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
34597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
34598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
34599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
34600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
34601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
34602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
34603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
34604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
34605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
34606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
34607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
34608
34609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
34610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
34611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
34612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
34613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
34614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
34615
34616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
34617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
34618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
34619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
34620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
34621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
34622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
34623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
34624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
34625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
34626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
34627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
34628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
34629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
34630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
34631
34632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
34633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
34634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
34635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
34636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
34637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
34638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
34639
34640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
34641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
34642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
34643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
34644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
34645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
34646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
34647
34648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
34649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
34650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
34651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
34652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
34653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
34654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
34655
34656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
34657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
34658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
34659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
34660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
34661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
34662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
34663
34664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
34665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
34666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
34667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
34668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
34669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
34670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
34671
34672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
34673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
34674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
34675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
34676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
34677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
34678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
34679
34680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
34681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
34682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
34683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
34684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
34685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
34686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
34687
34688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
34689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
34690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
34691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
34692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
34693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
34694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
34695
34696 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
34697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
34698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
34699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
34700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
34701 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
34702 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
34703 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
34704
34705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
34706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
34707 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
34708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
34709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
34710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
34711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
34712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
34713
34714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
34715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
34716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
34717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
34718
34719 };
34720 \f
34721 /* TM vector builtins. */
34722
34723 /* Reuse the existing x86-specific `struct builtin_description' cause
34724 we're lazy. Add casts to make them fit. */
34725 static const struct builtin_description bdesc_tm[] =
34726 {
34727 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
34728 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
34729 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
34730 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
34731 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
34732 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
34733 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
34734
34735 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
34736 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
34737 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
34738 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
34739 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
34740 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
34741 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
34742
34743 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
34744 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
34745 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
34746 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
34747 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
34748 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
34749 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
34750
34751 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
34752 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
34753 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
34754 };
34755
34756 /* TM callbacks. */
34757
34758 /* Return the builtin decl needed to load a vector of TYPE. */
34759
34760 static tree
34761 ix86_builtin_tm_load (tree type)
34762 {
34763 if (TREE_CODE (type) == VECTOR_TYPE)
34764 {
34765 switch (tree_to_uhwi (TYPE_SIZE (type)))
34766 {
34767 case 64:
34768 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
34769 case 128:
34770 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
34771 case 256:
34772 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
34773 }
34774 }
34775 return NULL_TREE;
34776 }
34777
34778 /* Return the builtin decl needed to store a vector of TYPE. */
34779
34780 static tree
34781 ix86_builtin_tm_store (tree type)
34782 {
34783 if (TREE_CODE (type) == VECTOR_TYPE)
34784 {
34785 switch (tree_to_uhwi (TYPE_SIZE (type)))
34786 {
34787 case 64:
34788 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
34789 case 128:
34790 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
34791 case 256:
34792 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
34793 }
34794 }
34795 return NULL_TREE;
34796 }
34797 \f
34798 /* Initialize the transactional memory vector load/store builtins. */
34799
34800 static void
34801 ix86_init_tm_builtins (void)
34802 {
34803 enum ix86_builtin_func_type ftype;
34804 const struct builtin_description *d;
34805 size_t i;
34806 tree decl;
34807 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
34808 tree attrs_log, attrs_type_log;
34809
34810 if (!flag_tm)
34811 return;
34812
34813 /* If there are no builtins defined, we must be compiling in a
34814 language without trans-mem support. */
34815 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
34816 return;
34817
34818 /* Use whatever attributes a normal TM load has. */
34819 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
34820 attrs_load = DECL_ATTRIBUTES (decl);
34821 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
34822 /* Use whatever attributes a normal TM store has. */
34823 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
34824 attrs_store = DECL_ATTRIBUTES (decl);
34825 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
34826 /* Use whatever attributes a normal TM log has. */
34827 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
34828 attrs_log = DECL_ATTRIBUTES (decl);
34829 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
34830
34831 for (i = 0, d = bdesc_tm;
34832 i < ARRAY_SIZE (bdesc_tm);
34833 i++, d++)
34834 {
34835 if ((d->mask & ix86_isa_flags) != 0
34836 || (lang_hooks.builtin_function
34837 == lang_hooks.builtin_function_ext_scope))
34838 {
34839 tree type, attrs, attrs_type;
34840 enum built_in_function code = (enum built_in_function) d->code;
34841
34842 ftype = (enum ix86_builtin_func_type) d->flag;
34843 type = ix86_get_builtin_func_type (ftype);
34844
34845 if (BUILTIN_TM_LOAD_P (code))
34846 {
34847 attrs = attrs_load;
34848 attrs_type = attrs_type_load;
34849 }
34850 else if (BUILTIN_TM_STORE_P (code))
34851 {
34852 attrs = attrs_store;
34853 attrs_type = attrs_type_store;
34854 }
34855 else
34856 {
34857 attrs = attrs_log;
34858 attrs_type = attrs_type_log;
34859 }
34860 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
34861 /* The builtin without the prefix for
34862 calling it directly. */
34863 d->name + strlen ("__builtin_"),
34864 attrs);
34865 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
34866 set the TYPE_ATTRIBUTES. */
34867 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
34868
34869 set_builtin_decl (code, decl, false);
34870 }
34871 }
34872 }
34873
34874 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
34875 in the current target ISA to allow the user to compile particular modules
34876 with different target specific options that differ from the command line
34877 options. */
34878 static void
34879 ix86_init_mmx_sse_builtins (void)
34880 {
34881 const struct builtin_description * d;
34882 enum ix86_builtin_func_type ftype;
34883 size_t i;
34884
34885 /* Add all special builtins with variable number of operands. */
34886 for (i = 0, d = bdesc_special_args;
34887 i < ARRAY_SIZE (bdesc_special_args);
34888 i++, d++)
34889 {
34890 if (d->name == 0)
34891 continue;
34892
34893 ftype = (enum ix86_builtin_func_type) d->flag;
34894 def_builtin (d->mask, d->name, ftype, d->code);
34895 }
34896
34897 /* Add all builtins with variable number of operands. */
34898 for (i = 0, d = bdesc_args;
34899 i < ARRAY_SIZE (bdesc_args);
34900 i++, d++)
34901 {
34902 if (d->name == 0)
34903 continue;
34904
34905 ftype = (enum ix86_builtin_func_type) d->flag;
34906 def_builtin_const (d->mask, d->name, ftype, d->code);
34907 }
34908
34909 /* Add all builtins with rounding. */
34910 for (i = 0, d = bdesc_round_args;
34911 i < ARRAY_SIZE (bdesc_round_args);
34912 i++, d++)
34913 {
34914 if (d->name == 0)
34915 continue;
34916
34917 ftype = (enum ix86_builtin_func_type) d->flag;
34918 def_builtin_const (d->mask, d->name, ftype, d->code);
34919 }
34920
34921 /* pcmpestr[im] insns. */
34922 for (i = 0, d = bdesc_pcmpestr;
34923 i < ARRAY_SIZE (bdesc_pcmpestr);
34924 i++, d++)
34925 {
34926 if (d->code == IX86_BUILTIN_PCMPESTRM128)
34927 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
34928 else
34929 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
34930 def_builtin_const (d->mask, d->name, ftype, d->code);
34931 }
34932
34933 /* pcmpistr[im] insns. */
34934 for (i = 0, d = bdesc_pcmpistr;
34935 i < ARRAY_SIZE (bdesc_pcmpistr);
34936 i++, d++)
34937 {
34938 if (d->code == IX86_BUILTIN_PCMPISTRM128)
34939 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
34940 else
34941 ftype = INT_FTYPE_V16QI_V16QI_INT;
34942 def_builtin_const (d->mask, d->name, ftype, d->code);
34943 }
34944
34945 /* comi/ucomi insns. */
34946 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
34947 {
34948 if (d->mask == OPTION_MASK_ISA_SSE2)
34949 ftype = INT_FTYPE_V2DF_V2DF;
34950 else
34951 ftype = INT_FTYPE_V4SF_V4SF;
34952 def_builtin_const (d->mask, d->name, ftype, d->code);
34953 }
34954
34955 /* SSE */
34956 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
34957 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
34958 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
34959 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
34960
34961 /* SSE or 3DNow!A */
34962 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34963 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
34964 IX86_BUILTIN_MASKMOVQ);
34965
34966 /* SSE2 */
34967 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
34968 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
34969
34970 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
34971 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
34972 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
34973 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
34974
34975 /* SSE3. */
34976 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
34977 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
34978 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
34979 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
34980
34981 /* AES */
34982 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
34983 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
34984 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
34985 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
34986 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
34987 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
34988 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
34989 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
34990 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
34991 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
34992 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
34993 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
34994
34995 /* PCLMUL */
34996 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
34997 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
34998
34999 /* RDRND */
35000 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35001 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35002 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35003 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35004 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35005 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35006 IX86_BUILTIN_RDRAND64_STEP);
35007
35008 /* AVX2 */
35009 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35010 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35011 IX86_BUILTIN_GATHERSIV2DF);
35012
35013 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35014 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35015 IX86_BUILTIN_GATHERSIV4DF);
35016
35017 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35018 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35019 IX86_BUILTIN_GATHERDIV2DF);
35020
35021 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35022 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35023 IX86_BUILTIN_GATHERDIV4DF);
35024
35025 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35026 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35027 IX86_BUILTIN_GATHERSIV4SF);
35028
35029 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35030 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35031 IX86_BUILTIN_GATHERSIV8SF);
35032
35033 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35034 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35035 IX86_BUILTIN_GATHERDIV4SF);
35036
35037 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35038 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35039 IX86_BUILTIN_GATHERDIV8SF);
35040
35041 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35042 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35043 IX86_BUILTIN_GATHERSIV2DI);
35044
35045 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35046 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35047 IX86_BUILTIN_GATHERSIV4DI);
35048
35049 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35050 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35051 IX86_BUILTIN_GATHERDIV2DI);
35052
35053 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35054 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35055 IX86_BUILTIN_GATHERDIV4DI);
35056
35057 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35058 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35059 IX86_BUILTIN_GATHERSIV4SI);
35060
35061 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35062 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35063 IX86_BUILTIN_GATHERSIV8SI);
35064
35065 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35066 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35067 IX86_BUILTIN_GATHERDIV4SI);
35068
35069 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35070 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35071 IX86_BUILTIN_GATHERDIV8SI);
35072
35073 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35074 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35075 IX86_BUILTIN_GATHERALTSIV4DF);
35076
35077 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35078 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35079 IX86_BUILTIN_GATHERALTDIV8SF);
35080
35081 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35082 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35083 IX86_BUILTIN_GATHERALTSIV4DI);
35084
35085 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35086 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35087 IX86_BUILTIN_GATHERALTDIV8SI);
35088
35089 /* AVX512F */
35090 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35091 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35092 IX86_BUILTIN_GATHER3SIV16SF);
35093
35094 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35095 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35096 IX86_BUILTIN_GATHER3SIV8DF);
35097
35098 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35099 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35100 IX86_BUILTIN_GATHER3DIV16SF);
35101
35102 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35103 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35104 IX86_BUILTIN_GATHER3DIV8DF);
35105
35106 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35107 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35108 IX86_BUILTIN_GATHER3SIV16SI);
35109
35110 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35111 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35112 IX86_BUILTIN_GATHER3SIV8DI);
35113
35114 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35115 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35116 IX86_BUILTIN_GATHER3DIV16SI);
35117
35118 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35119 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35120 IX86_BUILTIN_GATHER3DIV8DI);
35121
35122 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35123 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35124 IX86_BUILTIN_GATHER3ALTSIV8DF);
35125
35126 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35127 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35128 IX86_BUILTIN_GATHER3ALTDIV16SF);
35129
35130 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35131 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35132 IX86_BUILTIN_GATHER3ALTSIV8DI);
35133
35134 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35135 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35136 IX86_BUILTIN_GATHER3ALTDIV16SI);
35137
35138 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35139 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35140 IX86_BUILTIN_SCATTERSIV16SF);
35141
35142 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35143 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35144 IX86_BUILTIN_SCATTERSIV8DF);
35145
35146 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35147 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35148 IX86_BUILTIN_SCATTERDIV16SF);
35149
35150 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35151 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35152 IX86_BUILTIN_SCATTERDIV8DF);
35153
35154 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35155 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35156 IX86_BUILTIN_SCATTERSIV16SI);
35157
35158 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35159 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35160 IX86_BUILTIN_SCATTERSIV8DI);
35161
35162 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35163 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35164 IX86_BUILTIN_SCATTERDIV16SI);
35165
35166 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35167 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35168 IX86_BUILTIN_SCATTERDIV8DI);
35169
35170 /* AVX512VL */
35171 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35172 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35173 IX86_BUILTIN_GATHER3SIV2DF);
35174
35175 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35176 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35177 IX86_BUILTIN_GATHER3SIV4DF);
35178
35179 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35180 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35181 IX86_BUILTIN_GATHER3DIV2DF);
35182
35183 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35184 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35185 IX86_BUILTIN_GATHER3DIV4DF);
35186
35187 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35188 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35189 IX86_BUILTIN_GATHER3SIV4SF);
35190
35191 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35192 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35193 IX86_BUILTIN_GATHER3SIV8SF);
35194
35195 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35196 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35197 IX86_BUILTIN_GATHER3DIV4SF);
35198
35199 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35200 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35201 IX86_BUILTIN_GATHER3DIV8SF);
35202
35203 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35204 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35205 IX86_BUILTIN_GATHER3SIV2DI);
35206
35207 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35208 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35209 IX86_BUILTIN_GATHER3SIV4DI);
35210
35211 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35212 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35213 IX86_BUILTIN_GATHER3DIV2DI);
35214
35215 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35216 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35217 IX86_BUILTIN_GATHER3DIV4DI);
35218
35219 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35220 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35221 IX86_BUILTIN_GATHER3SIV4SI);
35222
35223 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35224 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35225 IX86_BUILTIN_GATHER3SIV8SI);
35226
35227 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35228 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35229 IX86_BUILTIN_GATHER3DIV4SI);
35230
35231 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35232 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35233 IX86_BUILTIN_GATHER3DIV8SI);
35234
35235 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35236 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35237 IX86_BUILTIN_GATHER3ALTSIV4DF);
35238
35239 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35240 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35241 IX86_BUILTIN_GATHER3ALTDIV8SF);
35242
35243 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35244 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35245 IX86_BUILTIN_GATHER3ALTSIV4DI);
35246
35247 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35248 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35249 IX86_BUILTIN_GATHER3ALTDIV8SI);
35250
35251 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35252 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35253 IX86_BUILTIN_SCATTERSIV8SF);
35254
35255 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35256 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35257 IX86_BUILTIN_SCATTERSIV4SF);
35258
35259 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35260 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35261 IX86_BUILTIN_SCATTERSIV4DF);
35262
35263 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35264 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35265 IX86_BUILTIN_SCATTERSIV2DF);
35266
35267 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35268 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35269 IX86_BUILTIN_SCATTERDIV8SF);
35270
35271 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35272 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35273 IX86_BUILTIN_SCATTERDIV4SF);
35274
35275 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35276 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35277 IX86_BUILTIN_SCATTERDIV4DF);
35278
35279 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35280 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35281 IX86_BUILTIN_SCATTERDIV2DF);
35282
35283 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35284 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35285 IX86_BUILTIN_SCATTERSIV8SI);
35286
35287 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35288 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35289 IX86_BUILTIN_SCATTERSIV4SI);
35290
35291 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35292 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35293 IX86_BUILTIN_SCATTERSIV4DI);
35294
35295 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35296 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35297 IX86_BUILTIN_SCATTERSIV2DI);
35298
35299 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35300 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35301 IX86_BUILTIN_SCATTERDIV8SI);
35302
35303 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35304 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35305 IX86_BUILTIN_SCATTERDIV4SI);
35306
35307 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35308 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35309 IX86_BUILTIN_SCATTERDIV4DI);
35310
35311 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35312 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35313 IX86_BUILTIN_SCATTERDIV2DI);
35314 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35315 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35316 IX86_BUILTIN_SCATTERALTSIV8DF);
35317
35318 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35319 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35320 IX86_BUILTIN_SCATTERALTDIV16SF);
35321
35322 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35323 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35324 IX86_BUILTIN_SCATTERALTSIV8DI);
35325
35326 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35327 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35328 IX86_BUILTIN_SCATTERALTDIV16SI);
35329
35330 /* AVX512PF */
35331 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35332 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35333 IX86_BUILTIN_GATHERPFDPD);
35334 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35335 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35336 IX86_BUILTIN_GATHERPFDPS);
35337 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35338 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35339 IX86_BUILTIN_GATHERPFQPD);
35340 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35341 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35342 IX86_BUILTIN_GATHERPFQPS);
35343 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35344 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35345 IX86_BUILTIN_SCATTERPFDPD);
35346 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35347 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35348 IX86_BUILTIN_SCATTERPFDPS);
35349 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35350 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35351 IX86_BUILTIN_SCATTERPFQPD);
35352 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35353 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35354 IX86_BUILTIN_SCATTERPFQPS);
35355
35356 /* SHA */
35357 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35358 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35359 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35360 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35361 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35362 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35363 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35364 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35365 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35366 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35367 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35368 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35369 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35370 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35371
35372 /* RTM. */
35373 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35374 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35375
35376 /* MMX access to the vec_init patterns. */
35377 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35378 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35379
35380 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35381 V4HI_FTYPE_HI_HI_HI_HI,
35382 IX86_BUILTIN_VEC_INIT_V4HI);
35383
35384 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35385 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35386 IX86_BUILTIN_VEC_INIT_V8QI);
35387
35388 /* Access to the vec_extract patterns. */
35389 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35390 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35392 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35393 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35394 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35396 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35398 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35399
35400 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35401 "__builtin_ia32_vec_ext_v4hi",
35402 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35403
35404 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35405 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35406
35407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35408 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35409
35410 /* Access to the vec_set patterns. */
35411 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35412 "__builtin_ia32_vec_set_v2di",
35413 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35414
35415 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35416 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35417
35418 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35419 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35420
35421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35422 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35423
35424 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35425 "__builtin_ia32_vec_set_v4hi",
35426 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35427
35428 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35429 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35430
35431 /* RDSEED */
35432 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35433 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35434 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35435 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35436 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35437 "__builtin_ia32_rdseed_di_step",
35438 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35439
35440 /* ADCX */
35441 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35442 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35443 def_builtin (OPTION_MASK_ISA_64BIT,
35444 "__builtin_ia32_addcarryx_u64",
35445 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35446 IX86_BUILTIN_ADDCARRYX64);
35447
35448 /* SBB */
35449 def_builtin (0, "__builtin_ia32_sbb_u32",
35450 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35451 def_builtin (OPTION_MASK_ISA_64BIT,
35452 "__builtin_ia32_sbb_u64",
35453 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35454 IX86_BUILTIN_SBB64);
35455
35456 /* Read/write FLAGS. */
35457 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35458 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35459 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35460 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35461 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35462 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35463 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35464 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35465
35466 /* CLFLUSHOPT. */
35467 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35468 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35469
35470 /* CLWB. */
35471 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35472 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35473
35474 /* MONITORX and MWAITX. */
35475 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35476 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35477 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35478 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35479
35480 /* Add FMA4 multi-arg argument instructions */
35481 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35482 {
35483 if (d->name == 0)
35484 continue;
35485
35486 ftype = (enum ix86_builtin_func_type) d->flag;
35487 def_builtin_const (d->mask, d->name, ftype, d->code);
35488 }
35489 }
35490
35491 static void
35492 ix86_init_mpx_builtins ()
35493 {
35494 const struct builtin_description * d;
35495 enum ix86_builtin_func_type ftype;
35496 tree decl;
35497 size_t i;
35498
35499 for (i = 0, d = bdesc_mpx;
35500 i < ARRAY_SIZE (bdesc_mpx);
35501 i++, d++)
35502 {
35503 if (d->name == 0)
35504 continue;
35505
35506 ftype = (enum ix86_builtin_func_type) d->flag;
35507 decl = def_builtin (d->mask, d->name, ftype, d->code);
35508
35509 /* With no leaf and nothrow flags for MPX builtins
35510 abnormal edges may follow its call when setjmp
35511 presents in the function. Since we may have a lot
35512 of MPX builtins calls it causes lots of useless
35513 edges and enormous PHI nodes. To avoid this we mark
35514 MPX builtins as leaf and nothrow. */
35515 if (decl)
35516 {
35517 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35518 NULL_TREE);
35519 TREE_NOTHROW (decl) = 1;
35520 }
35521 else
35522 {
35523 ix86_builtins_isa[(int)d->code].leaf_p = true;
35524 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35525 }
35526 }
35527
35528 for (i = 0, d = bdesc_mpx_const;
35529 i < ARRAY_SIZE (bdesc_mpx_const);
35530 i++, d++)
35531 {
35532 if (d->name == 0)
35533 continue;
35534
35535 ftype = (enum ix86_builtin_func_type) d->flag;
35536 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
35537
35538 if (decl)
35539 {
35540 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35541 NULL_TREE);
35542 TREE_NOTHROW (decl) = 1;
35543 }
35544 else
35545 {
35546 ix86_builtins_isa[(int)d->code].leaf_p = true;
35547 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35548 }
35549 }
35550 }
35551
35552 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
35553 to return a pointer to VERSION_DECL if the outcome of the expression
35554 formed by PREDICATE_CHAIN is true. This function will be called during
35555 version dispatch to decide which function version to execute. It returns
35556 the basic block at the end, to which more conditions can be added. */
35557
35558 static basic_block
35559 add_condition_to_bb (tree function_decl, tree version_decl,
35560 tree predicate_chain, basic_block new_bb)
35561 {
35562 gimple *return_stmt;
35563 tree convert_expr, result_var;
35564 gimple *convert_stmt;
35565 gimple *call_cond_stmt;
35566 gimple *if_else_stmt;
35567
35568 basic_block bb1, bb2, bb3;
35569 edge e12, e23;
35570
35571 tree cond_var, and_expr_var = NULL_TREE;
35572 gimple_seq gseq;
35573
35574 tree predicate_decl, predicate_arg;
35575
35576 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
35577
35578 gcc_assert (new_bb != NULL);
35579 gseq = bb_seq (new_bb);
35580
35581
35582 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
35583 build_fold_addr_expr (version_decl));
35584 result_var = create_tmp_var (ptr_type_node);
35585 convert_stmt = gimple_build_assign (result_var, convert_expr);
35586 return_stmt = gimple_build_return (result_var);
35587
35588 if (predicate_chain == NULL_TREE)
35589 {
35590 gimple_seq_add_stmt (&gseq, convert_stmt);
35591 gimple_seq_add_stmt (&gseq, return_stmt);
35592 set_bb_seq (new_bb, gseq);
35593 gimple_set_bb (convert_stmt, new_bb);
35594 gimple_set_bb (return_stmt, new_bb);
35595 pop_cfun ();
35596 return new_bb;
35597 }
35598
35599 while (predicate_chain != NULL)
35600 {
35601 cond_var = create_tmp_var (integer_type_node);
35602 predicate_decl = TREE_PURPOSE (predicate_chain);
35603 predicate_arg = TREE_VALUE (predicate_chain);
35604 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
35605 gimple_call_set_lhs (call_cond_stmt, cond_var);
35606
35607 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
35608 gimple_set_bb (call_cond_stmt, new_bb);
35609 gimple_seq_add_stmt (&gseq, call_cond_stmt);
35610
35611 predicate_chain = TREE_CHAIN (predicate_chain);
35612
35613 if (and_expr_var == NULL)
35614 and_expr_var = cond_var;
35615 else
35616 {
35617 gimple *assign_stmt;
35618 /* Use MIN_EXPR to check if any integer is zero?.
35619 and_expr_var = min_expr <cond_var, and_expr_var> */
35620 assign_stmt = gimple_build_assign (and_expr_var,
35621 build2 (MIN_EXPR, integer_type_node,
35622 cond_var, and_expr_var));
35623
35624 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
35625 gimple_set_bb (assign_stmt, new_bb);
35626 gimple_seq_add_stmt (&gseq, assign_stmt);
35627 }
35628 }
35629
35630 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
35631 integer_zero_node,
35632 NULL_TREE, NULL_TREE);
35633 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
35634 gimple_set_bb (if_else_stmt, new_bb);
35635 gimple_seq_add_stmt (&gseq, if_else_stmt);
35636
35637 gimple_seq_add_stmt (&gseq, convert_stmt);
35638 gimple_seq_add_stmt (&gseq, return_stmt);
35639 set_bb_seq (new_bb, gseq);
35640
35641 bb1 = new_bb;
35642 e12 = split_block (bb1, if_else_stmt);
35643 bb2 = e12->dest;
35644 e12->flags &= ~EDGE_FALLTHRU;
35645 e12->flags |= EDGE_TRUE_VALUE;
35646
35647 e23 = split_block (bb2, return_stmt);
35648
35649 gimple_set_bb (convert_stmt, bb2);
35650 gimple_set_bb (return_stmt, bb2);
35651
35652 bb3 = e23->dest;
35653 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
35654
35655 remove_edge (e23);
35656 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
35657
35658 pop_cfun ();
35659
35660 return bb3;
35661 }
35662
35663 /* This parses the attribute arguments to target in DECL and determines
35664 the right builtin to use to match the platform specification.
35665 It returns the priority value for this version decl. If PREDICATE_LIST
35666 is not NULL, it stores the list of cpu features that need to be checked
35667 before dispatching this function. */
35668
35669 static unsigned int
35670 get_builtin_code_for_version (tree decl, tree *predicate_list)
35671 {
35672 tree attrs;
35673 struct cl_target_option cur_target;
35674 tree target_node;
35675 struct cl_target_option *new_target;
35676 const char *arg_str = NULL;
35677 const char *attrs_str = NULL;
35678 char *tok_str = NULL;
35679 char *token;
35680
35681 /* Priority of i386 features, greater value is higher priority. This is
35682 used to decide the order in which function dispatch must happen. For
35683 instance, a version specialized for SSE4.2 should be checked for dispatch
35684 before a version for SSE3, as SSE4.2 implies SSE3. */
35685 enum feature_priority
35686 {
35687 P_ZERO = 0,
35688 P_MMX,
35689 P_SSE,
35690 P_SSE2,
35691 P_SSE3,
35692 P_SSSE3,
35693 P_PROC_SSSE3,
35694 P_SSE4_A,
35695 P_PROC_SSE4_A,
35696 P_SSE4_1,
35697 P_SSE4_2,
35698 P_PROC_SSE4_2,
35699 P_POPCNT,
35700 P_AES,
35701 P_PCLMUL,
35702 P_AVX,
35703 P_PROC_AVX,
35704 P_BMI,
35705 P_PROC_BMI,
35706 P_FMA4,
35707 P_XOP,
35708 P_PROC_XOP,
35709 P_FMA,
35710 P_PROC_FMA,
35711 P_BMI2,
35712 P_AVX2,
35713 P_PROC_AVX2,
35714 P_AVX512F,
35715 P_PROC_AVX512F
35716 };
35717
35718 enum feature_priority priority = P_ZERO;
35719
35720 /* These are the target attribute strings for which a dispatcher is
35721 available, from fold_builtin_cpu. */
35722
35723 static struct _feature_list
35724 {
35725 const char *const name;
35726 const enum feature_priority priority;
35727 }
35728 const feature_list[] =
35729 {
35730 {"mmx", P_MMX},
35731 {"sse", P_SSE},
35732 {"sse2", P_SSE2},
35733 {"sse3", P_SSE3},
35734 {"sse4a", P_SSE4_A},
35735 {"ssse3", P_SSSE3},
35736 {"sse4.1", P_SSE4_1},
35737 {"sse4.2", P_SSE4_2},
35738 {"popcnt", P_POPCNT},
35739 {"aes", P_AES},
35740 {"pclmul", P_PCLMUL},
35741 {"avx", P_AVX},
35742 {"bmi", P_BMI},
35743 {"fma4", P_FMA4},
35744 {"xop", P_XOP},
35745 {"fma", P_FMA},
35746 {"bmi2", P_BMI2},
35747 {"avx2", P_AVX2},
35748 {"avx512f", P_AVX512F}
35749 };
35750
35751
35752 static unsigned int NUM_FEATURES
35753 = sizeof (feature_list) / sizeof (struct _feature_list);
35754
35755 unsigned int i;
35756
35757 tree predicate_chain = NULL_TREE;
35758 tree predicate_decl, predicate_arg;
35759
35760 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35761 gcc_assert (attrs != NULL);
35762
35763 attrs = TREE_VALUE (TREE_VALUE (attrs));
35764
35765 gcc_assert (TREE_CODE (attrs) == STRING_CST);
35766 attrs_str = TREE_STRING_POINTER (attrs);
35767
35768 /* Return priority zero for default function. */
35769 if (strcmp (attrs_str, "default") == 0)
35770 return 0;
35771
35772 /* Handle arch= if specified. For priority, set it to be 1 more than
35773 the best instruction set the processor can handle. For instance, if
35774 there is a version for atom and a version for ssse3 (the highest ISA
35775 priority for atom), the atom version must be checked for dispatch
35776 before the ssse3 version. */
35777 if (strstr (attrs_str, "arch=") != NULL)
35778 {
35779 cl_target_option_save (&cur_target, &global_options);
35780 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
35781 &global_options_set);
35782
35783 gcc_assert (target_node);
35784 new_target = TREE_TARGET_OPTION (target_node);
35785 gcc_assert (new_target);
35786
35787 if (new_target->arch_specified && new_target->arch > 0)
35788 {
35789 switch (new_target->arch)
35790 {
35791 case PROCESSOR_CORE2:
35792 arg_str = "core2";
35793 priority = P_PROC_SSSE3;
35794 break;
35795 case PROCESSOR_NEHALEM:
35796 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
35797 arg_str = "westmere";
35798 else
35799 /* We translate "arch=corei7" and "arch=nehalem" to
35800 "corei7" so that it will be mapped to M_INTEL_COREI7
35801 as cpu type to cover all M_INTEL_COREI7_XXXs. */
35802 arg_str = "corei7";
35803 priority = P_PROC_SSE4_2;
35804 break;
35805 case PROCESSOR_SANDYBRIDGE:
35806 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
35807 arg_str = "ivybridge";
35808 else
35809 arg_str = "sandybridge";
35810 priority = P_PROC_AVX;
35811 break;
35812 case PROCESSOR_HASWELL:
35813 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
35814 arg_str = "broadwell";
35815 else
35816 arg_str = "haswell";
35817 priority = P_PROC_AVX2;
35818 break;
35819 case PROCESSOR_BONNELL:
35820 arg_str = "bonnell";
35821 priority = P_PROC_SSSE3;
35822 break;
35823 case PROCESSOR_KNL:
35824 arg_str = "knl";
35825 priority = P_PROC_AVX512F;
35826 break;
35827 case PROCESSOR_SILVERMONT:
35828 arg_str = "silvermont";
35829 priority = P_PROC_SSE4_2;
35830 break;
35831 case PROCESSOR_AMDFAM10:
35832 arg_str = "amdfam10h";
35833 priority = P_PROC_SSE4_A;
35834 break;
35835 case PROCESSOR_BTVER1:
35836 arg_str = "btver1";
35837 priority = P_PROC_SSE4_A;
35838 break;
35839 case PROCESSOR_BTVER2:
35840 arg_str = "btver2";
35841 priority = P_PROC_BMI;
35842 break;
35843 case PROCESSOR_BDVER1:
35844 arg_str = "bdver1";
35845 priority = P_PROC_XOP;
35846 break;
35847 case PROCESSOR_BDVER2:
35848 arg_str = "bdver2";
35849 priority = P_PROC_FMA;
35850 break;
35851 case PROCESSOR_BDVER3:
35852 arg_str = "bdver3";
35853 priority = P_PROC_FMA;
35854 break;
35855 case PROCESSOR_BDVER4:
35856 arg_str = "bdver4";
35857 priority = P_PROC_AVX2;
35858 break;
35859 }
35860 }
35861
35862 cl_target_option_restore (&global_options, &cur_target);
35863
35864 if (predicate_list && arg_str == NULL)
35865 {
35866 error_at (DECL_SOURCE_LOCATION (decl),
35867 "No dispatcher found for the versioning attributes");
35868 return 0;
35869 }
35870
35871 if (predicate_list)
35872 {
35873 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
35874 /* For a C string literal the length includes the trailing NULL. */
35875 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
35876 predicate_chain = tree_cons (predicate_decl, predicate_arg,
35877 predicate_chain);
35878 }
35879 }
35880
35881 /* Process feature name. */
35882 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
35883 strcpy (tok_str, attrs_str);
35884 token = strtok (tok_str, ",");
35885 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
35886
35887 while (token != NULL)
35888 {
35889 /* Do not process "arch=" */
35890 if (strncmp (token, "arch=", 5) == 0)
35891 {
35892 token = strtok (NULL, ",");
35893 continue;
35894 }
35895 for (i = 0; i < NUM_FEATURES; ++i)
35896 {
35897 if (strcmp (token, feature_list[i].name) == 0)
35898 {
35899 if (predicate_list)
35900 {
35901 predicate_arg = build_string_literal (
35902 strlen (feature_list[i].name) + 1,
35903 feature_list[i].name);
35904 predicate_chain = tree_cons (predicate_decl, predicate_arg,
35905 predicate_chain);
35906 }
35907 /* Find the maximum priority feature. */
35908 if (feature_list[i].priority > priority)
35909 priority = feature_list[i].priority;
35910
35911 break;
35912 }
35913 }
35914 if (predicate_list && i == NUM_FEATURES)
35915 {
35916 error_at (DECL_SOURCE_LOCATION (decl),
35917 "No dispatcher found for %s", token);
35918 return 0;
35919 }
35920 token = strtok (NULL, ",");
35921 }
35922 free (tok_str);
35923
35924 if (predicate_list && predicate_chain == NULL_TREE)
35925 {
35926 error_at (DECL_SOURCE_LOCATION (decl),
35927 "No dispatcher found for the versioning attributes : %s",
35928 attrs_str);
35929 return 0;
35930 }
35931 else if (predicate_list)
35932 {
35933 predicate_chain = nreverse (predicate_chain);
35934 *predicate_list = predicate_chain;
35935 }
35936
35937 return priority;
35938 }
35939
35940 /* This compares the priority of target features in function DECL1
35941 and DECL2. It returns positive value if DECL1 is higher priority,
35942 negative value if DECL2 is higher priority and 0 if they are the
35943 same. */
35944
35945 static int
35946 ix86_compare_version_priority (tree decl1, tree decl2)
35947 {
35948 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
35949 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
35950
35951 return (int)priority1 - (int)priority2;
35952 }
35953
35954 /* V1 and V2 point to function versions with different priorities
35955 based on the target ISA. This function compares their priorities. */
35956
35957 static int
35958 feature_compare (const void *v1, const void *v2)
35959 {
35960 typedef struct _function_version_info
35961 {
35962 tree version_decl;
35963 tree predicate_chain;
35964 unsigned int dispatch_priority;
35965 } function_version_info;
35966
35967 const function_version_info c1 = *(const function_version_info *)v1;
35968 const function_version_info c2 = *(const function_version_info *)v2;
35969 return (c2.dispatch_priority - c1.dispatch_priority);
35970 }
35971
35972 /* This function generates the dispatch function for
35973 multi-versioned functions. DISPATCH_DECL is the function which will
35974 contain the dispatch logic. FNDECLS are the function choices for
35975 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
35976 in DISPATCH_DECL in which the dispatch code is generated. */
35977
35978 static int
35979 dispatch_function_versions (tree dispatch_decl,
35980 void *fndecls_p,
35981 basic_block *empty_bb)
35982 {
35983 tree default_decl;
35984 gimple *ifunc_cpu_init_stmt;
35985 gimple_seq gseq;
35986 int ix;
35987 tree ele;
35988 vec<tree> *fndecls;
35989 unsigned int num_versions = 0;
35990 unsigned int actual_versions = 0;
35991 unsigned int i;
35992
35993 struct _function_version_info
35994 {
35995 tree version_decl;
35996 tree predicate_chain;
35997 unsigned int dispatch_priority;
35998 }*function_version_info;
35999
36000 gcc_assert (dispatch_decl != NULL
36001 && fndecls_p != NULL
36002 && empty_bb != NULL);
36003
36004 /*fndecls_p is actually a vector. */
36005 fndecls = static_cast<vec<tree> *> (fndecls_p);
36006
36007 /* At least one more version other than the default. */
36008 num_versions = fndecls->length ();
36009 gcc_assert (num_versions >= 2);
36010
36011 function_version_info = (struct _function_version_info *)
36012 XNEWVEC (struct _function_version_info, (num_versions - 1));
36013
36014 /* The first version in the vector is the default decl. */
36015 default_decl = (*fndecls)[0];
36016
36017 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36018
36019 gseq = bb_seq (*empty_bb);
36020 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36021 constructors, so explicity call __builtin_cpu_init here. */
36022 ifunc_cpu_init_stmt = gimple_build_call_vec (
36023 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36024 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36025 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36026 set_bb_seq (*empty_bb, gseq);
36027
36028 pop_cfun ();
36029
36030
36031 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36032 {
36033 tree version_decl = ele;
36034 tree predicate_chain = NULL_TREE;
36035 unsigned int priority;
36036 /* Get attribute string, parse it and find the right predicate decl.
36037 The predicate function could be a lengthy combination of many
36038 features, like arch-type and various isa-variants. */
36039 priority = get_builtin_code_for_version (version_decl,
36040 &predicate_chain);
36041
36042 if (predicate_chain == NULL_TREE)
36043 continue;
36044
36045 function_version_info [actual_versions].version_decl = version_decl;
36046 function_version_info [actual_versions].predicate_chain
36047 = predicate_chain;
36048 function_version_info [actual_versions].dispatch_priority = priority;
36049 actual_versions++;
36050 }
36051
36052 /* Sort the versions according to descending order of dispatch priority. The
36053 priority is based on the ISA. This is not a perfect solution. There
36054 could still be ambiguity. If more than one function version is suitable
36055 to execute, which one should be dispatched? In future, allow the user
36056 to specify a dispatch priority next to the version. */
36057 qsort (function_version_info, actual_versions,
36058 sizeof (struct _function_version_info), feature_compare);
36059
36060 for (i = 0; i < actual_versions; ++i)
36061 *empty_bb = add_condition_to_bb (dispatch_decl,
36062 function_version_info[i].version_decl,
36063 function_version_info[i].predicate_chain,
36064 *empty_bb);
36065
36066 /* dispatch default version at the end. */
36067 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36068 NULL, *empty_bb);
36069
36070 free (function_version_info);
36071 return 0;
36072 }
36073
36074 /* Comparator function to be used in qsort routine to sort attribute
36075 specification strings to "target". */
36076
36077 static int
36078 attr_strcmp (const void *v1, const void *v2)
36079 {
36080 const char *c1 = *(char *const*)v1;
36081 const char *c2 = *(char *const*)v2;
36082 return strcmp (c1, c2);
36083 }
36084
36085 /* ARGLIST is the argument to target attribute. This function tokenizes
36086 the comma separated arguments, sorts them and returns a string which
36087 is a unique identifier for the comma separated arguments. It also
36088 replaces non-identifier characters "=,-" with "_". */
36089
36090 static char *
36091 sorted_attr_string (tree arglist)
36092 {
36093 tree arg;
36094 size_t str_len_sum = 0;
36095 char **args = NULL;
36096 char *attr_str, *ret_str;
36097 char *attr = NULL;
36098 unsigned int argnum = 1;
36099 unsigned int i;
36100
36101 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36102 {
36103 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36104 size_t len = strlen (str);
36105 str_len_sum += len + 1;
36106 if (arg != arglist)
36107 argnum++;
36108 for (i = 0; i < strlen (str); i++)
36109 if (str[i] == ',')
36110 argnum++;
36111 }
36112
36113 attr_str = XNEWVEC (char, str_len_sum);
36114 str_len_sum = 0;
36115 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36116 {
36117 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36118 size_t len = strlen (str);
36119 memcpy (attr_str + str_len_sum, str, len);
36120 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36121 str_len_sum += len + 1;
36122 }
36123
36124 /* Replace "=,-" with "_". */
36125 for (i = 0; i < strlen (attr_str); i++)
36126 if (attr_str[i] == '=' || attr_str[i]== '-')
36127 attr_str[i] = '_';
36128
36129 if (argnum == 1)
36130 return attr_str;
36131
36132 args = XNEWVEC (char *, argnum);
36133
36134 i = 0;
36135 attr = strtok (attr_str, ",");
36136 while (attr != NULL)
36137 {
36138 args[i] = attr;
36139 i++;
36140 attr = strtok (NULL, ",");
36141 }
36142
36143 qsort (args, argnum, sizeof (char *), attr_strcmp);
36144
36145 ret_str = XNEWVEC (char, str_len_sum);
36146 str_len_sum = 0;
36147 for (i = 0; i < argnum; i++)
36148 {
36149 size_t len = strlen (args[i]);
36150 memcpy (ret_str + str_len_sum, args[i], len);
36151 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36152 str_len_sum += len + 1;
36153 }
36154
36155 XDELETEVEC (args);
36156 XDELETEVEC (attr_str);
36157 return ret_str;
36158 }
36159
36160 /* This function changes the assembler name for functions that are
36161 versions. If DECL is a function version and has a "target"
36162 attribute, it appends the attribute string to its assembler name. */
36163
36164 static tree
36165 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36166 {
36167 tree version_attr;
36168 const char *orig_name, *version_string;
36169 char *attr_str, *assembler_name;
36170
36171 if (DECL_DECLARED_INLINE_P (decl)
36172 && lookup_attribute ("gnu_inline",
36173 DECL_ATTRIBUTES (decl)))
36174 error_at (DECL_SOURCE_LOCATION (decl),
36175 "Function versions cannot be marked as gnu_inline,"
36176 " bodies have to be generated");
36177
36178 if (DECL_VIRTUAL_P (decl)
36179 || DECL_VINDEX (decl))
36180 sorry ("Virtual function multiversioning not supported");
36181
36182 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36183
36184 /* target attribute string cannot be NULL. */
36185 gcc_assert (version_attr != NULL_TREE);
36186
36187 orig_name = IDENTIFIER_POINTER (id);
36188 version_string
36189 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36190
36191 if (strcmp (version_string, "default") == 0)
36192 return id;
36193
36194 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36195 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36196
36197 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36198
36199 /* Allow assembler name to be modified if already set. */
36200 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36201 SET_DECL_RTL (decl, NULL);
36202
36203 tree ret = get_identifier (assembler_name);
36204 XDELETEVEC (attr_str);
36205 XDELETEVEC (assembler_name);
36206 return ret;
36207 }
36208
36209 /* This function returns true if FN1 and FN2 are versions of the same function,
36210 that is, the target strings of the function decls are different. This assumes
36211 that FN1 and FN2 have the same signature. */
36212
36213 static bool
36214 ix86_function_versions (tree fn1, tree fn2)
36215 {
36216 tree attr1, attr2;
36217 char *target1, *target2;
36218 bool result;
36219
36220 if (TREE_CODE (fn1) != FUNCTION_DECL
36221 || TREE_CODE (fn2) != FUNCTION_DECL)
36222 return false;
36223
36224 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36225 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36226
36227 /* At least one function decl should have the target attribute specified. */
36228 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36229 return false;
36230
36231 /* Diagnose missing target attribute if one of the decls is already
36232 multi-versioned. */
36233 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36234 {
36235 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36236 {
36237 if (attr2 != NULL_TREE)
36238 {
36239 std::swap (fn1, fn2);
36240 attr1 = attr2;
36241 }
36242 error_at (DECL_SOURCE_LOCATION (fn2),
36243 "missing %<target%> attribute for multi-versioned %D",
36244 fn2);
36245 inform (DECL_SOURCE_LOCATION (fn1),
36246 "previous declaration of %D", fn1);
36247 /* Prevent diagnosing of the same error multiple times. */
36248 DECL_ATTRIBUTES (fn2)
36249 = tree_cons (get_identifier ("target"),
36250 copy_node (TREE_VALUE (attr1)),
36251 DECL_ATTRIBUTES (fn2));
36252 }
36253 return false;
36254 }
36255
36256 target1 = sorted_attr_string (TREE_VALUE (attr1));
36257 target2 = sorted_attr_string (TREE_VALUE (attr2));
36258
36259 /* The sorted target strings must be different for fn1 and fn2
36260 to be versions. */
36261 if (strcmp (target1, target2) == 0)
36262 result = false;
36263 else
36264 result = true;
36265
36266 XDELETEVEC (target1);
36267 XDELETEVEC (target2);
36268
36269 return result;
36270 }
36271
36272 static tree
36273 ix86_mangle_decl_assembler_name (tree decl, tree id)
36274 {
36275 /* For function version, add the target suffix to the assembler name. */
36276 if (TREE_CODE (decl) == FUNCTION_DECL
36277 && DECL_FUNCTION_VERSIONED (decl))
36278 id = ix86_mangle_function_version_assembler_name (decl, id);
36279 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36280 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36281 #endif
36282
36283 return id;
36284 }
36285
36286 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36287 is true, append the full path name of the source file. */
36288
36289 static char *
36290 make_name (tree decl, const char *suffix, bool make_unique)
36291 {
36292 char *global_var_name;
36293 int name_len;
36294 const char *name;
36295 const char *unique_name = NULL;
36296
36297 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36298
36299 /* Get a unique name that can be used globally without any chances
36300 of collision at link time. */
36301 if (make_unique)
36302 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36303
36304 name_len = strlen (name) + strlen (suffix) + 2;
36305
36306 if (make_unique)
36307 name_len += strlen (unique_name) + 1;
36308 global_var_name = XNEWVEC (char, name_len);
36309
36310 /* Use '.' to concatenate names as it is demangler friendly. */
36311 if (make_unique)
36312 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36313 suffix);
36314 else
36315 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36316
36317 return global_var_name;
36318 }
36319
36320 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36321
36322 /* Make a dispatcher declaration for the multi-versioned function DECL.
36323 Calls to DECL function will be replaced with calls to the dispatcher
36324 by the front-end. Return the decl created. */
36325
36326 static tree
36327 make_dispatcher_decl (const tree decl)
36328 {
36329 tree func_decl;
36330 char *func_name;
36331 tree fn_type, func_type;
36332 bool is_uniq = false;
36333
36334 if (TREE_PUBLIC (decl) == 0)
36335 is_uniq = true;
36336
36337 func_name = make_name (decl, "ifunc", is_uniq);
36338
36339 fn_type = TREE_TYPE (decl);
36340 func_type = build_function_type (TREE_TYPE (fn_type),
36341 TYPE_ARG_TYPES (fn_type));
36342
36343 func_decl = build_fn_decl (func_name, func_type);
36344 XDELETEVEC (func_name);
36345 TREE_USED (func_decl) = 1;
36346 DECL_CONTEXT (func_decl) = NULL_TREE;
36347 DECL_INITIAL (func_decl) = error_mark_node;
36348 DECL_ARTIFICIAL (func_decl) = 1;
36349 /* Mark this func as external, the resolver will flip it again if
36350 it gets generated. */
36351 DECL_EXTERNAL (func_decl) = 1;
36352 /* This will be of type IFUNCs have to be externally visible. */
36353 TREE_PUBLIC (func_decl) = 1;
36354
36355 return func_decl;
36356 }
36357
36358 #endif
36359
36360 /* Returns true if decl is multi-versioned and DECL is the default function,
36361 that is it is not tagged with target specific optimization. */
36362
36363 static bool
36364 is_function_default_version (const tree decl)
36365 {
36366 if (TREE_CODE (decl) != FUNCTION_DECL
36367 || !DECL_FUNCTION_VERSIONED (decl))
36368 return false;
36369 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36370 gcc_assert (attr);
36371 attr = TREE_VALUE (TREE_VALUE (attr));
36372 return (TREE_CODE (attr) == STRING_CST
36373 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36374 }
36375
36376 /* Make a dispatcher declaration for the multi-versioned function DECL.
36377 Calls to DECL function will be replaced with calls to the dispatcher
36378 by the front-end. Returns the decl of the dispatcher function. */
36379
36380 static tree
36381 ix86_get_function_versions_dispatcher (void *decl)
36382 {
36383 tree fn = (tree) decl;
36384 struct cgraph_node *node = NULL;
36385 struct cgraph_node *default_node = NULL;
36386 struct cgraph_function_version_info *node_v = NULL;
36387 struct cgraph_function_version_info *first_v = NULL;
36388
36389 tree dispatch_decl = NULL;
36390
36391 struct cgraph_function_version_info *default_version_info = NULL;
36392
36393 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36394
36395 node = cgraph_node::get (fn);
36396 gcc_assert (node != NULL);
36397
36398 node_v = node->function_version ();
36399 gcc_assert (node_v != NULL);
36400
36401 if (node_v->dispatcher_resolver != NULL)
36402 return node_v->dispatcher_resolver;
36403
36404 /* Find the default version and make it the first node. */
36405 first_v = node_v;
36406 /* Go to the beginning of the chain. */
36407 while (first_v->prev != NULL)
36408 first_v = first_v->prev;
36409 default_version_info = first_v;
36410 while (default_version_info != NULL)
36411 {
36412 if (is_function_default_version
36413 (default_version_info->this_node->decl))
36414 break;
36415 default_version_info = default_version_info->next;
36416 }
36417
36418 /* If there is no default node, just return NULL. */
36419 if (default_version_info == NULL)
36420 return NULL;
36421
36422 /* Make default info the first node. */
36423 if (first_v != default_version_info)
36424 {
36425 default_version_info->prev->next = default_version_info->next;
36426 if (default_version_info->next)
36427 default_version_info->next->prev = default_version_info->prev;
36428 first_v->prev = default_version_info;
36429 default_version_info->next = first_v;
36430 default_version_info->prev = NULL;
36431 }
36432
36433 default_node = default_version_info->this_node;
36434
36435 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36436 if (targetm.has_ifunc_p ())
36437 {
36438 struct cgraph_function_version_info *it_v = NULL;
36439 struct cgraph_node *dispatcher_node = NULL;
36440 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36441
36442 /* Right now, the dispatching is done via ifunc. */
36443 dispatch_decl = make_dispatcher_decl (default_node->decl);
36444
36445 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36446 gcc_assert (dispatcher_node != NULL);
36447 dispatcher_node->dispatcher_function = 1;
36448 dispatcher_version_info
36449 = dispatcher_node->insert_new_function_version ();
36450 dispatcher_version_info->next = default_version_info;
36451 dispatcher_node->definition = 1;
36452
36453 /* Set the dispatcher for all the versions. */
36454 it_v = default_version_info;
36455 while (it_v != NULL)
36456 {
36457 it_v->dispatcher_resolver = dispatch_decl;
36458 it_v = it_v->next;
36459 }
36460 }
36461 else
36462 #endif
36463 {
36464 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36465 "multiversioning needs ifunc which is not supported "
36466 "on this target");
36467 }
36468
36469 return dispatch_decl;
36470 }
36471
36472 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
36473 it to CHAIN. */
36474
36475 static tree
36476 make_attribute (const char *name, const char *arg_name, tree chain)
36477 {
36478 tree attr_name;
36479 tree attr_arg_name;
36480 tree attr_args;
36481 tree attr;
36482
36483 attr_name = get_identifier (name);
36484 attr_arg_name = build_string (strlen (arg_name), arg_name);
36485 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
36486 attr = tree_cons (attr_name, attr_args, chain);
36487 return attr;
36488 }
36489
36490 /* Make the resolver function decl to dispatch the versions of
36491 a multi-versioned function, DEFAULT_DECL. Create an
36492 empty basic block in the resolver and store the pointer in
36493 EMPTY_BB. Return the decl of the resolver function. */
36494
36495 static tree
36496 make_resolver_func (const tree default_decl,
36497 const tree dispatch_decl,
36498 basic_block *empty_bb)
36499 {
36500 char *resolver_name;
36501 tree decl, type, decl_name, t;
36502 bool is_uniq = false;
36503
36504 /* IFUNC's have to be globally visible. So, if the default_decl is
36505 not, then the name of the IFUNC should be made unique. */
36506 if (TREE_PUBLIC (default_decl) == 0)
36507 is_uniq = true;
36508
36509 /* Append the filename to the resolver function if the versions are
36510 not externally visible. This is because the resolver function has
36511 to be externally visible for the loader to find it. So, appending
36512 the filename will prevent conflicts with a resolver function from
36513 another module which is based on the same version name. */
36514 resolver_name = make_name (default_decl, "resolver", is_uniq);
36515
36516 /* The resolver function should return a (void *). */
36517 type = build_function_type_list (ptr_type_node, NULL_TREE);
36518
36519 decl = build_fn_decl (resolver_name, type);
36520 decl_name = get_identifier (resolver_name);
36521 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36522
36523 DECL_NAME (decl) = decl_name;
36524 TREE_USED (decl) = 1;
36525 DECL_ARTIFICIAL (decl) = 1;
36526 DECL_IGNORED_P (decl) = 0;
36527 /* IFUNC resolvers have to be externally visible. */
36528 TREE_PUBLIC (decl) = 1;
36529 DECL_UNINLINABLE (decl) = 1;
36530
36531 /* Resolver is not external, body is generated. */
36532 DECL_EXTERNAL (decl) = 0;
36533 DECL_EXTERNAL (dispatch_decl) = 0;
36534
36535 DECL_CONTEXT (decl) = NULL_TREE;
36536 DECL_INITIAL (decl) = make_node (BLOCK);
36537 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36538
36539 if (DECL_COMDAT_GROUP (default_decl)
36540 || TREE_PUBLIC (default_decl))
36541 {
36542 /* In this case, each translation unit with a call to this
36543 versioned function will put out a resolver. Ensure it
36544 is comdat to keep just one copy. */
36545 DECL_COMDAT (decl) = 1;
36546 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
36547 }
36548 /* Build result decl and add to function_decl. */
36549 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36550 DECL_ARTIFICIAL (t) = 1;
36551 DECL_IGNORED_P (t) = 1;
36552 DECL_RESULT (decl) = t;
36553
36554 gimplify_function_tree (decl);
36555 push_cfun (DECL_STRUCT_FUNCTION (decl));
36556 *empty_bb = init_lowered_empty_function (decl, false, 0);
36557
36558 cgraph_node::add_new_function (decl, true);
36559 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
36560
36561 pop_cfun ();
36562
36563 gcc_assert (dispatch_decl != NULL);
36564 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
36565 DECL_ATTRIBUTES (dispatch_decl)
36566 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
36567
36568 /* Create the alias for dispatch to resolver here. */
36569 /*cgraph_create_function_alias (dispatch_decl, decl);*/
36570 cgraph_node::create_same_body_alias (dispatch_decl, decl);
36571 XDELETEVEC (resolver_name);
36572 return decl;
36573 }
36574
36575 /* Generate the dispatching code body to dispatch multi-versioned function
36576 DECL. The target hook is called to process the "target" attributes and
36577 provide the code to dispatch the right function at run-time. NODE points
36578 to the dispatcher decl whose body will be created. */
36579
36580 static tree
36581 ix86_generate_version_dispatcher_body (void *node_p)
36582 {
36583 tree resolver_decl;
36584 basic_block empty_bb;
36585 tree default_ver_decl;
36586 struct cgraph_node *versn;
36587 struct cgraph_node *node;
36588
36589 struct cgraph_function_version_info *node_version_info = NULL;
36590 struct cgraph_function_version_info *versn_info = NULL;
36591
36592 node = (cgraph_node *)node_p;
36593
36594 node_version_info = node->function_version ();
36595 gcc_assert (node->dispatcher_function
36596 && node_version_info != NULL);
36597
36598 if (node_version_info->dispatcher_resolver)
36599 return node_version_info->dispatcher_resolver;
36600
36601 /* The first version in the chain corresponds to the default version. */
36602 default_ver_decl = node_version_info->next->this_node->decl;
36603
36604 /* node is going to be an alias, so remove the finalized bit. */
36605 node->definition = false;
36606
36607 resolver_decl = make_resolver_func (default_ver_decl,
36608 node->decl, &empty_bb);
36609
36610 node_version_info->dispatcher_resolver = resolver_decl;
36611
36612 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
36613
36614 auto_vec<tree, 2> fn_ver_vec;
36615
36616 for (versn_info = node_version_info->next; versn_info;
36617 versn_info = versn_info->next)
36618 {
36619 versn = versn_info->this_node;
36620 /* Check for virtual functions here again, as by this time it should
36621 have been determined if this function needs a vtable index or
36622 not. This happens for methods in derived classes that override
36623 virtual methods in base classes but are not explicitly marked as
36624 virtual. */
36625 if (DECL_VINDEX (versn->decl))
36626 sorry ("Virtual function multiversioning not supported");
36627
36628 fn_ver_vec.safe_push (versn->decl);
36629 }
36630
36631 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
36632 cgraph_edge::rebuild_edges ();
36633 pop_cfun ();
36634 return resolver_decl;
36635 }
36636 /* This builds the processor_model struct type defined in
36637 libgcc/config/i386/cpuinfo.c */
36638
36639 static tree
36640 build_processor_model_struct (void)
36641 {
36642 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
36643 "__cpu_features"};
36644 tree field = NULL_TREE, field_chain = NULL_TREE;
36645 int i;
36646 tree type = make_node (RECORD_TYPE);
36647
36648 /* The first 3 fields are unsigned int. */
36649 for (i = 0; i < 3; ++i)
36650 {
36651 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
36652 get_identifier (field_name[i]), unsigned_type_node);
36653 if (field_chain != NULL_TREE)
36654 DECL_CHAIN (field) = field_chain;
36655 field_chain = field;
36656 }
36657
36658 /* The last field is an array of unsigned integers of size one. */
36659 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
36660 get_identifier (field_name[3]),
36661 build_array_type (unsigned_type_node,
36662 build_index_type (size_one_node)));
36663 if (field_chain != NULL_TREE)
36664 DECL_CHAIN (field) = field_chain;
36665 field_chain = field;
36666
36667 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
36668 return type;
36669 }
36670
36671 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
36672
36673 static tree
36674 make_var_decl (tree type, const char *name)
36675 {
36676 tree new_decl;
36677
36678 new_decl = build_decl (UNKNOWN_LOCATION,
36679 VAR_DECL,
36680 get_identifier(name),
36681 type);
36682
36683 DECL_EXTERNAL (new_decl) = 1;
36684 TREE_STATIC (new_decl) = 1;
36685 TREE_PUBLIC (new_decl) = 1;
36686 DECL_INITIAL (new_decl) = 0;
36687 DECL_ARTIFICIAL (new_decl) = 0;
36688 DECL_PRESERVE_P (new_decl) = 1;
36689
36690 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
36691 assemble_variable (new_decl, 0, 0, 0);
36692
36693 return new_decl;
36694 }
36695
36696 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
36697 into an integer defined in libgcc/config/i386/cpuinfo.c */
36698
36699 static tree
36700 fold_builtin_cpu (tree fndecl, tree *args)
36701 {
36702 unsigned int i;
36703 enum ix86_builtins fn_code = (enum ix86_builtins)
36704 DECL_FUNCTION_CODE (fndecl);
36705 tree param_string_cst = NULL;
36706
36707 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
36708 enum processor_features
36709 {
36710 F_CMOV = 0,
36711 F_MMX,
36712 F_POPCNT,
36713 F_SSE,
36714 F_SSE2,
36715 F_SSE3,
36716 F_SSSE3,
36717 F_SSE4_1,
36718 F_SSE4_2,
36719 F_AVX,
36720 F_AVX2,
36721 F_SSE4_A,
36722 F_FMA4,
36723 F_XOP,
36724 F_FMA,
36725 F_AVX512F,
36726 F_BMI,
36727 F_BMI2,
36728 F_AES,
36729 F_PCLMUL,
36730 F_AVX512VL,
36731 F_AVX512BW,
36732 F_AVX512DQ,
36733 F_AVX512CD,
36734 F_AVX512ER,
36735 F_AVX512PF,
36736 F_AVX512VBMI,
36737 F_AVX512IFMA,
36738 F_MAX
36739 };
36740
36741 /* These are the values for vendor types and cpu types and subtypes
36742 in cpuinfo.c. Cpu types and subtypes should be subtracted by
36743 the corresponding start value. */
36744 enum processor_model
36745 {
36746 M_INTEL = 1,
36747 M_AMD,
36748 M_CPU_TYPE_START,
36749 M_INTEL_BONNELL,
36750 M_INTEL_CORE2,
36751 M_INTEL_COREI7,
36752 M_AMDFAM10H,
36753 M_AMDFAM15H,
36754 M_INTEL_SILVERMONT,
36755 M_INTEL_KNL,
36756 M_AMD_BTVER1,
36757 M_AMD_BTVER2,
36758 M_CPU_SUBTYPE_START,
36759 M_INTEL_COREI7_NEHALEM,
36760 M_INTEL_COREI7_WESTMERE,
36761 M_INTEL_COREI7_SANDYBRIDGE,
36762 M_AMDFAM10H_BARCELONA,
36763 M_AMDFAM10H_SHANGHAI,
36764 M_AMDFAM10H_ISTANBUL,
36765 M_AMDFAM15H_BDVER1,
36766 M_AMDFAM15H_BDVER2,
36767 M_AMDFAM15H_BDVER3,
36768 M_AMDFAM15H_BDVER4,
36769 M_INTEL_COREI7_IVYBRIDGE,
36770 M_INTEL_COREI7_HASWELL,
36771 M_INTEL_COREI7_BROADWELL,
36772 M_INTEL_COREI7_SKYLAKE,
36773 M_INTEL_COREI7_SKYLAKE_AVX512
36774 };
36775
36776 static struct _arch_names_table
36777 {
36778 const char *const name;
36779 const enum processor_model model;
36780 }
36781 const arch_names_table[] =
36782 {
36783 {"amd", M_AMD},
36784 {"intel", M_INTEL},
36785 {"atom", M_INTEL_BONNELL},
36786 {"slm", M_INTEL_SILVERMONT},
36787 {"core2", M_INTEL_CORE2},
36788 {"corei7", M_INTEL_COREI7},
36789 {"nehalem", M_INTEL_COREI7_NEHALEM},
36790 {"westmere", M_INTEL_COREI7_WESTMERE},
36791 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
36792 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
36793 {"haswell", M_INTEL_COREI7_HASWELL},
36794 {"broadwell", M_INTEL_COREI7_BROADWELL},
36795 {"skylake", M_INTEL_COREI7_SKYLAKE},
36796 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
36797 {"bonnell", M_INTEL_BONNELL},
36798 {"silvermont", M_INTEL_SILVERMONT},
36799 {"knl", M_INTEL_KNL},
36800 {"amdfam10h", M_AMDFAM10H},
36801 {"barcelona", M_AMDFAM10H_BARCELONA},
36802 {"shanghai", M_AMDFAM10H_SHANGHAI},
36803 {"istanbul", M_AMDFAM10H_ISTANBUL},
36804 {"btver1", M_AMD_BTVER1},
36805 {"amdfam15h", M_AMDFAM15H},
36806 {"bdver1", M_AMDFAM15H_BDVER1},
36807 {"bdver2", M_AMDFAM15H_BDVER2},
36808 {"bdver3", M_AMDFAM15H_BDVER3},
36809 {"bdver4", M_AMDFAM15H_BDVER4},
36810 {"btver2", M_AMD_BTVER2},
36811 };
36812
36813 static struct _isa_names_table
36814 {
36815 const char *const name;
36816 const enum processor_features feature;
36817 }
36818 const isa_names_table[] =
36819 {
36820 {"cmov", F_CMOV},
36821 {"mmx", F_MMX},
36822 {"popcnt", F_POPCNT},
36823 {"sse", F_SSE},
36824 {"sse2", F_SSE2},
36825 {"sse3", F_SSE3},
36826 {"ssse3", F_SSSE3},
36827 {"sse4a", F_SSE4_A},
36828 {"sse4.1", F_SSE4_1},
36829 {"sse4.2", F_SSE4_2},
36830 {"avx", F_AVX},
36831 {"fma4", F_FMA4},
36832 {"xop", F_XOP},
36833 {"fma", F_FMA},
36834 {"avx2", F_AVX2},
36835 {"avx512f", F_AVX512F},
36836 {"bmi", F_BMI},
36837 {"bmi2", F_BMI2},
36838 {"aes", F_AES},
36839 {"pclmul", F_PCLMUL},
36840 {"avx512vl",F_AVX512VL},
36841 {"avx512bw",F_AVX512BW},
36842 {"avx512dq",F_AVX512DQ},
36843 {"avx512cd",F_AVX512CD},
36844 {"avx512er",F_AVX512ER},
36845 {"avx512pf",F_AVX512PF},
36846 {"avx512vbmi",F_AVX512VBMI},
36847 {"avx512ifma",F_AVX512IFMA},
36848 };
36849
36850 tree __processor_model_type = build_processor_model_struct ();
36851 tree __cpu_model_var = make_var_decl (__processor_model_type,
36852 "__cpu_model");
36853
36854
36855 varpool_node::add (__cpu_model_var);
36856
36857 gcc_assert ((args != NULL) && (*args != NULL));
36858
36859 param_string_cst = *args;
36860 while (param_string_cst
36861 && TREE_CODE (param_string_cst) != STRING_CST)
36862 {
36863 /* *args must be a expr that can contain other EXPRS leading to a
36864 STRING_CST. */
36865 if (!EXPR_P (param_string_cst))
36866 {
36867 error ("Parameter to builtin must be a string constant or literal");
36868 return integer_zero_node;
36869 }
36870 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
36871 }
36872
36873 gcc_assert (param_string_cst);
36874
36875 if (fn_code == IX86_BUILTIN_CPU_IS)
36876 {
36877 tree ref;
36878 tree field;
36879 tree final;
36880
36881 unsigned int field_val = 0;
36882 unsigned int NUM_ARCH_NAMES
36883 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
36884
36885 for (i = 0; i < NUM_ARCH_NAMES; i++)
36886 if (strcmp (arch_names_table[i].name,
36887 TREE_STRING_POINTER (param_string_cst)) == 0)
36888 break;
36889
36890 if (i == NUM_ARCH_NAMES)
36891 {
36892 error ("Parameter to builtin not valid: %s",
36893 TREE_STRING_POINTER (param_string_cst));
36894 return integer_zero_node;
36895 }
36896
36897 field = TYPE_FIELDS (__processor_model_type);
36898 field_val = arch_names_table[i].model;
36899
36900 /* CPU types are stored in the next field. */
36901 if (field_val > M_CPU_TYPE_START
36902 && field_val < M_CPU_SUBTYPE_START)
36903 {
36904 field = DECL_CHAIN (field);
36905 field_val -= M_CPU_TYPE_START;
36906 }
36907
36908 /* CPU subtypes are stored in the next field. */
36909 if (field_val > M_CPU_SUBTYPE_START)
36910 {
36911 field = DECL_CHAIN ( DECL_CHAIN (field));
36912 field_val -= M_CPU_SUBTYPE_START;
36913 }
36914
36915 /* Get the appropriate field in __cpu_model. */
36916 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
36917 field, NULL_TREE);
36918
36919 /* Check the value. */
36920 final = build2 (EQ_EXPR, unsigned_type_node, ref,
36921 build_int_cstu (unsigned_type_node, field_val));
36922 return build1 (CONVERT_EXPR, integer_type_node, final);
36923 }
36924 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
36925 {
36926 tree ref;
36927 tree array_elt;
36928 tree field;
36929 tree final;
36930
36931 unsigned int field_val = 0;
36932 unsigned int NUM_ISA_NAMES
36933 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
36934
36935 for (i = 0; i < NUM_ISA_NAMES; i++)
36936 if (strcmp (isa_names_table[i].name,
36937 TREE_STRING_POINTER (param_string_cst)) == 0)
36938 break;
36939
36940 if (i == NUM_ISA_NAMES)
36941 {
36942 error ("Parameter to builtin not valid: %s",
36943 TREE_STRING_POINTER (param_string_cst));
36944 return integer_zero_node;
36945 }
36946
36947 field = TYPE_FIELDS (__processor_model_type);
36948 /* Get the last field, which is __cpu_features. */
36949 while (DECL_CHAIN (field))
36950 field = DECL_CHAIN (field);
36951
36952 /* Get the appropriate field: __cpu_model.__cpu_features */
36953 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
36954 field, NULL_TREE);
36955
36956 /* Access the 0th element of __cpu_features array. */
36957 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
36958 integer_zero_node, NULL_TREE, NULL_TREE);
36959
36960 field_val = (1 << isa_names_table[i].feature);
36961 /* Return __cpu_model.__cpu_features[0] & field_val */
36962 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
36963 build_int_cstu (unsigned_type_node, field_val));
36964 return build1 (CONVERT_EXPR, integer_type_node, final);
36965 }
36966 gcc_unreachable ();
36967 }
36968
36969 static tree
36970 ix86_fold_builtin (tree fndecl, int n_args,
36971 tree *args, bool ignore ATTRIBUTE_UNUSED)
36972 {
36973 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36974 {
36975 enum ix86_builtins fn_code = (enum ix86_builtins)
36976 DECL_FUNCTION_CODE (fndecl);
36977 if (fn_code == IX86_BUILTIN_CPU_IS
36978 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
36979 {
36980 gcc_assert (n_args == 1);
36981 return fold_builtin_cpu (fndecl, args);
36982 }
36983 }
36984
36985 #ifdef SUBTARGET_FOLD_BUILTIN
36986 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
36987 #endif
36988
36989 return NULL_TREE;
36990 }
36991
36992 /* Make builtins to detect cpu type and features supported. NAME is
36993 the builtin name, CODE is the builtin code, and FTYPE is the function
36994 type of the builtin. */
36995
36996 static void
36997 make_cpu_type_builtin (const char* name, int code,
36998 enum ix86_builtin_func_type ftype, bool is_const)
36999 {
37000 tree decl;
37001 tree type;
37002
37003 type = ix86_get_builtin_func_type (ftype);
37004 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37005 NULL, NULL_TREE);
37006 gcc_assert (decl != NULL_TREE);
37007 ix86_builtins[(int) code] = decl;
37008 TREE_READONLY (decl) = is_const;
37009 }
37010
37011 /* Make builtins to get CPU type and features supported. The created
37012 builtins are :
37013
37014 __builtin_cpu_init (), to detect cpu type and features,
37015 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37016 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37017 */
37018
37019 static void
37020 ix86_init_platform_type_builtins (void)
37021 {
37022 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37023 INT_FTYPE_VOID, false);
37024 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37025 INT_FTYPE_PCCHAR, true);
37026 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37027 INT_FTYPE_PCCHAR, true);
37028 }
37029
37030 /* Internal method for ix86_init_builtins. */
37031
37032 static void
37033 ix86_init_builtins_va_builtins_abi (void)
37034 {
37035 tree ms_va_ref, sysv_va_ref;
37036 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37037 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37038 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37039 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37040
37041 if (!TARGET_64BIT)
37042 return;
37043 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37044 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37045 ms_va_ref = build_reference_type (ms_va_list_type_node);
37046 sysv_va_ref =
37047 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37048
37049 fnvoid_va_end_ms =
37050 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37051 fnvoid_va_start_ms =
37052 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37053 fnvoid_va_end_sysv =
37054 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37055 fnvoid_va_start_sysv =
37056 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37057 NULL_TREE);
37058 fnvoid_va_copy_ms =
37059 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37060 NULL_TREE);
37061 fnvoid_va_copy_sysv =
37062 build_function_type_list (void_type_node, sysv_va_ref,
37063 sysv_va_ref, NULL_TREE);
37064
37065 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37066 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37067 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37068 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37069 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37070 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37071 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37072 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37073 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37074 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37075 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37076 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37077 }
37078
37079 static void
37080 ix86_init_builtin_types (void)
37081 {
37082 tree float128_type_node, float80_type_node;
37083
37084 /* The __float80 type. */
37085 float80_type_node = long_double_type_node;
37086 if (TYPE_MODE (float80_type_node) != XFmode)
37087 {
37088 /* The __float80 type. */
37089 float80_type_node = make_node (REAL_TYPE);
37090
37091 TYPE_PRECISION (float80_type_node) = 80;
37092 layout_type (float80_type_node);
37093 }
37094 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37095
37096 /* The __float128 type. */
37097 float128_type_node = make_node (REAL_TYPE);
37098 TYPE_PRECISION (float128_type_node) = 128;
37099 layout_type (float128_type_node);
37100 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37101
37102 /* This macro is built by i386-builtin-types.awk. */
37103 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37104 }
37105
37106 static void
37107 ix86_init_builtins (void)
37108 {
37109 tree t;
37110
37111 ix86_init_builtin_types ();
37112
37113 /* Builtins to get CPU type and features. */
37114 ix86_init_platform_type_builtins ();
37115
37116 /* TFmode support builtins. */
37117 def_builtin_const (0, "__builtin_infq",
37118 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37119 def_builtin_const (0, "__builtin_huge_valq",
37120 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37121
37122 /* We will expand them to normal call if SSE isn't available since
37123 they are used by libgcc. */
37124 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37125 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37126 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37127 TREE_READONLY (t) = 1;
37128 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37129
37130 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37131 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37132 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37133 TREE_READONLY (t) = 1;
37134 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37135
37136 ix86_init_tm_builtins ();
37137 ix86_init_mmx_sse_builtins ();
37138 ix86_init_mpx_builtins ();
37139
37140 if (TARGET_LP64)
37141 ix86_init_builtins_va_builtins_abi ();
37142
37143 #ifdef SUBTARGET_INIT_BUILTINS
37144 SUBTARGET_INIT_BUILTINS;
37145 #endif
37146 }
37147
37148 /* Return the ix86 builtin for CODE. */
37149
37150 static tree
37151 ix86_builtin_decl (unsigned code, bool)
37152 {
37153 if (code >= IX86_BUILTIN_MAX)
37154 return error_mark_node;
37155
37156 return ix86_builtins[code];
37157 }
37158
37159 /* Errors in the source file can cause expand_expr to return const0_rtx
37160 where we expect a vector. To avoid crashing, use one of the vector
37161 clear instructions. */
37162 static rtx
37163 safe_vector_operand (rtx x, machine_mode mode)
37164 {
37165 if (x == const0_rtx)
37166 x = CONST0_RTX (mode);
37167 return x;
37168 }
37169
37170 /* Fixup modeless constants to fit required mode. */
37171 static rtx
37172 fixup_modeless_constant (rtx x, machine_mode mode)
37173 {
37174 if (GET_MODE (x) == VOIDmode)
37175 x = convert_to_mode (mode, x, 1);
37176 return x;
37177 }
37178
37179 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37180
37181 static rtx
37182 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37183 {
37184 rtx pat;
37185 tree arg0 = CALL_EXPR_ARG (exp, 0);
37186 tree arg1 = CALL_EXPR_ARG (exp, 1);
37187 rtx op0 = expand_normal (arg0);
37188 rtx op1 = expand_normal (arg1);
37189 machine_mode tmode = insn_data[icode].operand[0].mode;
37190 machine_mode mode0 = insn_data[icode].operand[1].mode;
37191 machine_mode mode1 = insn_data[icode].operand[2].mode;
37192
37193 if (VECTOR_MODE_P (mode0))
37194 op0 = safe_vector_operand (op0, mode0);
37195 if (VECTOR_MODE_P (mode1))
37196 op1 = safe_vector_operand (op1, mode1);
37197
37198 if (optimize || !target
37199 || GET_MODE (target) != tmode
37200 || !insn_data[icode].operand[0].predicate (target, tmode))
37201 target = gen_reg_rtx (tmode);
37202
37203 if (GET_MODE (op1) == SImode && mode1 == TImode)
37204 {
37205 rtx x = gen_reg_rtx (V4SImode);
37206 emit_insn (gen_sse2_loadd (x, op1));
37207 op1 = gen_lowpart (TImode, x);
37208 }
37209
37210 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37211 op0 = copy_to_mode_reg (mode0, op0);
37212 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37213 op1 = copy_to_mode_reg (mode1, op1);
37214
37215 pat = GEN_FCN (icode) (target, op0, op1);
37216 if (! pat)
37217 return 0;
37218
37219 emit_insn (pat);
37220
37221 return target;
37222 }
37223
37224 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37225
37226 static rtx
37227 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37228 enum ix86_builtin_func_type m_type,
37229 enum rtx_code sub_code)
37230 {
37231 rtx pat;
37232 int i;
37233 int nargs;
37234 bool comparison_p = false;
37235 bool tf_p = false;
37236 bool last_arg_constant = false;
37237 int num_memory = 0;
37238 struct {
37239 rtx op;
37240 machine_mode mode;
37241 } args[4];
37242
37243 machine_mode tmode = insn_data[icode].operand[0].mode;
37244
37245 switch (m_type)
37246 {
37247 case MULTI_ARG_4_DF2_DI_I:
37248 case MULTI_ARG_4_DF2_DI_I1:
37249 case MULTI_ARG_4_SF2_SI_I:
37250 case MULTI_ARG_4_SF2_SI_I1:
37251 nargs = 4;
37252 last_arg_constant = true;
37253 break;
37254
37255 case MULTI_ARG_3_SF:
37256 case MULTI_ARG_3_DF:
37257 case MULTI_ARG_3_SF2:
37258 case MULTI_ARG_3_DF2:
37259 case MULTI_ARG_3_DI:
37260 case MULTI_ARG_3_SI:
37261 case MULTI_ARG_3_SI_DI:
37262 case MULTI_ARG_3_HI:
37263 case MULTI_ARG_3_HI_SI:
37264 case MULTI_ARG_3_QI:
37265 case MULTI_ARG_3_DI2:
37266 case MULTI_ARG_3_SI2:
37267 case MULTI_ARG_3_HI2:
37268 case MULTI_ARG_3_QI2:
37269 nargs = 3;
37270 break;
37271
37272 case MULTI_ARG_2_SF:
37273 case MULTI_ARG_2_DF:
37274 case MULTI_ARG_2_DI:
37275 case MULTI_ARG_2_SI:
37276 case MULTI_ARG_2_HI:
37277 case MULTI_ARG_2_QI:
37278 nargs = 2;
37279 break;
37280
37281 case MULTI_ARG_2_DI_IMM:
37282 case MULTI_ARG_2_SI_IMM:
37283 case MULTI_ARG_2_HI_IMM:
37284 case MULTI_ARG_2_QI_IMM:
37285 nargs = 2;
37286 last_arg_constant = true;
37287 break;
37288
37289 case MULTI_ARG_1_SF:
37290 case MULTI_ARG_1_DF:
37291 case MULTI_ARG_1_SF2:
37292 case MULTI_ARG_1_DF2:
37293 case MULTI_ARG_1_DI:
37294 case MULTI_ARG_1_SI:
37295 case MULTI_ARG_1_HI:
37296 case MULTI_ARG_1_QI:
37297 case MULTI_ARG_1_SI_DI:
37298 case MULTI_ARG_1_HI_DI:
37299 case MULTI_ARG_1_HI_SI:
37300 case MULTI_ARG_1_QI_DI:
37301 case MULTI_ARG_1_QI_SI:
37302 case MULTI_ARG_1_QI_HI:
37303 nargs = 1;
37304 break;
37305
37306 case MULTI_ARG_2_DI_CMP:
37307 case MULTI_ARG_2_SI_CMP:
37308 case MULTI_ARG_2_HI_CMP:
37309 case MULTI_ARG_2_QI_CMP:
37310 nargs = 2;
37311 comparison_p = true;
37312 break;
37313
37314 case MULTI_ARG_2_SF_TF:
37315 case MULTI_ARG_2_DF_TF:
37316 case MULTI_ARG_2_DI_TF:
37317 case MULTI_ARG_2_SI_TF:
37318 case MULTI_ARG_2_HI_TF:
37319 case MULTI_ARG_2_QI_TF:
37320 nargs = 2;
37321 tf_p = true;
37322 break;
37323
37324 default:
37325 gcc_unreachable ();
37326 }
37327
37328 if (optimize || !target
37329 || GET_MODE (target) != tmode
37330 || !insn_data[icode].operand[0].predicate (target, tmode))
37331 target = gen_reg_rtx (tmode);
37332
37333 gcc_assert (nargs <= 4);
37334
37335 for (i = 0; i < nargs; i++)
37336 {
37337 tree arg = CALL_EXPR_ARG (exp, i);
37338 rtx op = expand_normal (arg);
37339 int adjust = (comparison_p) ? 1 : 0;
37340 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37341
37342 if (last_arg_constant && i == nargs - 1)
37343 {
37344 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37345 {
37346 enum insn_code new_icode = icode;
37347 switch (icode)
37348 {
37349 case CODE_FOR_xop_vpermil2v2df3:
37350 case CODE_FOR_xop_vpermil2v4sf3:
37351 case CODE_FOR_xop_vpermil2v4df3:
37352 case CODE_FOR_xop_vpermil2v8sf3:
37353 error ("the last argument must be a 2-bit immediate");
37354 return gen_reg_rtx (tmode);
37355 case CODE_FOR_xop_rotlv2di3:
37356 new_icode = CODE_FOR_rotlv2di3;
37357 goto xop_rotl;
37358 case CODE_FOR_xop_rotlv4si3:
37359 new_icode = CODE_FOR_rotlv4si3;
37360 goto xop_rotl;
37361 case CODE_FOR_xop_rotlv8hi3:
37362 new_icode = CODE_FOR_rotlv8hi3;
37363 goto xop_rotl;
37364 case CODE_FOR_xop_rotlv16qi3:
37365 new_icode = CODE_FOR_rotlv16qi3;
37366 xop_rotl:
37367 if (CONST_INT_P (op))
37368 {
37369 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37370 op = GEN_INT (INTVAL (op) & mask);
37371 gcc_checking_assert
37372 (insn_data[icode].operand[i + 1].predicate (op, mode));
37373 }
37374 else
37375 {
37376 gcc_checking_assert
37377 (nargs == 2
37378 && insn_data[new_icode].operand[0].mode == tmode
37379 && insn_data[new_icode].operand[1].mode == tmode
37380 && insn_data[new_icode].operand[2].mode == mode
37381 && insn_data[new_icode].operand[0].predicate
37382 == insn_data[icode].operand[0].predicate
37383 && insn_data[new_icode].operand[1].predicate
37384 == insn_data[icode].operand[1].predicate);
37385 icode = new_icode;
37386 goto non_constant;
37387 }
37388 break;
37389 default:
37390 gcc_unreachable ();
37391 }
37392 }
37393 }
37394 else
37395 {
37396 non_constant:
37397 if (VECTOR_MODE_P (mode))
37398 op = safe_vector_operand (op, mode);
37399
37400 /* If we aren't optimizing, only allow one memory operand to be
37401 generated. */
37402 if (memory_operand (op, mode))
37403 num_memory++;
37404
37405 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37406
37407 if (optimize
37408 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37409 || num_memory > 1)
37410 op = force_reg (mode, op);
37411 }
37412
37413 args[i].op = op;
37414 args[i].mode = mode;
37415 }
37416
37417 switch (nargs)
37418 {
37419 case 1:
37420 pat = GEN_FCN (icode) (target, args[0].op);
37421 break;
37422
37423 case 2:
37424 if (tf_p)
37425 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37426 GEN_INT ((int)sub_code));
37427 else if (! comparison_p)
37428 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37429 else
37430 {
37431 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37432 args[0].op,
37433 args[1].op);
37434
37435 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37436 }
37437 break;
37438
37439 case 3:
37440 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37441 break;
37442
37443 case 4:
37444 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37445 break;
37446
37447 default:
37448 gcc_unreachable ();
37449 }
37450
37451 if (! pat)
37452 return 0;
37453
37454 emit_insn (pat);
37455 return target;
37456 }
37457
37458 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37459 insns with vec_merge. */
37460
37461 static rtx
37462 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37463 rtx target)
37464 {
37465 rtx pat;
37466 tree arg0 = CALL_EXPR_ARG (exp, 0);
37467 rtx op1, op0 = expand_normal (arg0);
37468 machine_mode tmode = insn_data[icode].operand[0].mode;
37469 machine_mode mode0 = insn_data[icode].operand[1].mode;
37470
37471 if (optimize || !target
37472 || GET_MODE (target) != tmode
37473 || !insn_data[icode].operand[0].predicate (target, tmode))
37474 target = gen_reg_rtx (tmode);
37475
37476 if (VECTOR_MODE_P (mode0))
37477 op0 = safe_vector_operand (op0, mode0);
37478
37479 if ((optimize && !register_operand (op0, mode0))
37480 || !insn_data[icode].operand[1].predicate (op0, mode0))
37481 op0 = copy_to_mode_reg (mode0, op0);
37482
37483 op1 = op0;
37484 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37485 op1 = copy_to_mode_reg (mode0, op1);
37486
37487 pat = GEN_FCN (icode) (target, op0, op1);
37488 if (! pat)
37489 return 0;
37490 emit_insn (pat);
37491 return target;
37492 }
37493
37494 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37495
37496 static rtx
37497 ix86_expand_sse_compare (const struct builtin_description *d,
37498 tree exp, rtx target, bool swap)
37499 {
37500 rtx pat;
37501 tree arg0 = CALL_EXPR_ARG (exp, 0);
37502 tree arg1 = CALL_EXPR_ARG (exp, 1);
37503 rtx op0 = expand_normal (arg0);
37504 rtx op1 = expand_normal (arg1);
37505 rtx op2;
37506 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37507 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37508 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37509 enum rtx_code comparison = d->comparison;
37510
37511 if (VECTOR_MODE_P (mode0))
37512 op0 = safe_vector_operand (op0, mode0);
37513 if (VECTOR_MODE_P (mode1))
37514 op1 = safe_vector_operand (op1, mode1);
37515
37516 /* Swap operands if we have a comparison that isn't available in
37517 hardware. */
37518 if (swap)
37519 std::swap (op0, op1);
37520
37521 if (optimize || !target
37522 || GET_MODE (target) != tmode
37523 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37524 target = gen_reg_rtx (tmode);
37525
37526 if ((optimize && !register_operand (op0, mode0))
37527 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
37528 op0 = copy_to_mode_reg (mode0, op0);
37529 if ((optimize && !register_operand (op1, mode1))
37530 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
37531 op1 = copy_to_mode_reg (mode1, op1);
37532
37533 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
37534 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37535 if (! pat)
37536 return 0;
37537 emit_insn (pat);
37538 return target;
37539 }
37540
37541 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
37542
37543 static rtx
37544 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
37545 rtx target)
37546 {
37547 rtx pat;
37548 tree arg0 = CALL_EXPR_ARG (exp, 0);
37549 tree arg1 = CALL_EXPR_ARG (exp, 1);
37550 rtx op0 = expand_normal (arg0);
37551 rtx op1 = expand_normal (arg1);
37552 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
37553 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
37554 enum rtx_code comparison = d->comparison;
37555
37556 if (VECTOR_MODE_P (mode0))
37557 op0 = safe_vector_operand (op0, mode0);
37558 if (VECTOR_MODE_P (mode1))
37559 op1 = safe_vector_operand (op1, mode1);
37560
37561 /* Swap operands if we have a comparison that isn't available in
37562 hardware. */
37563 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
37564 std::swap (op0, op1);
37565
37566 target = gen_reg_rtx (SImode);
37567 emit_move_insn (target, const0_rtx);
37568 target = gen_rtx_SUBREG (QImode, target, 0);
37569
37570 if ((optimize && !register_operand (op0, mode0))
37571 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
37572 op0 = copy_to_mode_reg (mode0, op0);
37573 if ((optimize && !register_operand (op1, mode1))
37574 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
37575 op1 = copy_to_mode_reg (mode1, op1);
37576
37577 pat = GEN_FCN (d->icode) (op0, op1);
37578 if (! pat)
37579 return 0;
37580 emit_insn (pat);
37581 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37582 gen_rtx_fmt_ee (comparison, QImode,
37583 SET_DEST (pat),
37584 const0_rtx)));
37585
37586 return SUBREG_REG (target);
37587 }
37588
37589 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
37590
37591 static rtx
37592 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
37593 rtx target)
37594 {
37595 rtx pat;
37596 tree arg0 = CALL_EXPR_ARG (exp, 0);
37597 rtx op1, op0 = expand_normal (arg0);
37598 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37599 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37600
37601 if (optimize || target == 0
37602 || GET_MODE (target) != tmode
37603 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37604 target = gen_reg_rtx (tmode);
37605
37606 if (VECTOR_MODE_P (mode0))
37607 op0 = safe_vector_operand (op0, mode0);
37608
37609 if ((optimize && !register_operand (op0, mode0))
37610 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
37611 op0 = copy_to_mode_reg (mode0, op0);
37612
37613 op1 = GEN_INT (d->comparison);
37614
37615 pat = GEN_FCN (d->icode) (target, op0, op1);
37616 if (! pat)
37617 return 0;
37618 emit_insn (pat);
37619 return target;
37620 }
37621
37622 static rtx
37623 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
37624 tree exp, rtx target)
37625 {
37626 rtx pat;
37627 tree arg0 = CALL_EXPR_ARG (exp, 0);
37628 tree arg1 = CALL_EXPR_ARG (exp, 1);
37629 rtx op0 = expand_normal (arg0);
37630 rtx op1 = expand_normal (arg1);
37631 rtx op2;
37632 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37633 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37634 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37635
37636 if (optimize || target == 0
37637 || GET_MODE (target) != tmode
37638 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37639 target = gen_reg_rtx (tmode);
37640
37641 op0 = safe_vector_operand (op0, mode0);
37642 op1 = safe_vector_operand (op1, mode1);
37643
37644 if ((optimize && !register_operand (op0, mode0))
37645 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
37646 op0 = copy_to_mode_reg (mode0, op0);
37647 if ((optimize && !register_operand (op1, mode1))
37648 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
37649 op1 = copy_to_mode_reg (mode1, op1);
37650
37651 op2 = GEN_INT (d->comparison);
37652
37653 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37654 if (! pat)
37655 return 0;
37656 emit_insn (pat);
37657 return target;
37658 }
37659
37660 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
37661
37662 static rtx
37663 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
37664 rtx target)
37665 {
37666 rtx pat;
37667 tree arg0 = CALL_EXPR_ARG (exp, 0);
37668 tree arg1 = CALL_EXPR_ARG (exp, 1);
37669 rtx op0 = expand_normal (arg0);
37670 rtx op1 = expand_normal (arg1);
37671 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
37672 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
37673 enum rtx_code comparison = d->comparison;
37674
37675 if (VECTOR_MODE_P (mode0))
37676 op0 = safe_vector_operand (op0, mode0);
37677 if (VECTOR_MODE_P (mode1))
37678 op1 = safe_vector_operand (op1, mode1);
37679
37680 target = gen_reg_rtx (SImode);
37681 emit_move_insn (target, const0_rtx);
37682 target = gen_rtx_SUBREG (QImode, target, 0);
37683
37684 if ((optimize && !register_operand (op0, mode0))
37685 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
37686 op0 = copy_to_mode_reg (mode0, op0);
37687 if ((optimize && !register_operand (op1, mode1))
37688 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
37689 op1 = copy_to_mode_reg (mode1, op1);
37690
37691 pat = GEN_FCN (d->icode) (op0, op1);
37692 if (! pat)
37693 return 0;
37694 emit_insn (pat);
37695 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37696 gen_rtx_fmt_ee (comparison, QImode,
37697 SET_DEST (pat),
37698 const0_rtx)));
37699
37700 return SUBREG_REG (target);
37701 }
37702
37703 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
37704
37705 static rtx
37706 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
37707 tree exp, rtx target)
37708 {
37709 rtx pat;
37710 tree arg0 = CALL_EXPR_ARG (exp, 0);
37711 tree arg1 = CALL_EXPR_ARG (exp, 1);
37712 tree arg2 = CALL_EXPR_ARG (exp, 2);
37713 tree arg3 = CALL_EXPR_ARG (exp, 3);
37714 tree arg4 = CALL_EXPR_ARG (exp, 4);
37715 rtx scratch0, scratch1;
37716 rtx op0 = expand_normal (arg0);
37717 rtx op1 = expand_normal (arg1);
37718 rtx op2 = expand_normal (arg2);
37719 rtx op3 = expand_normal (arg3);
37720 rtx op4 = expand_normal (arg4);
37721 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
37722
37723 tmode0 = insn_data[d->icode].operand[0].mode;
37724 tmode1 = insn_data[d->icode].operand[1].mode;
37725 modev2 = insn_data[d->icode].operand[2].mode;
37726 modei3 = insn_data[d->icode].operand[3].mode;
37727 modev4 = insn_data[d->icode].operand[4].mode;
37728 modei5 = insn_data[d->icode].operand[5].mode;
37729 modeimm = insn_data[d->icode].operand[6].mode;
37730
37731 if (VECTOR_MODE_P (modev2))
37732 op0 = safe_vector_operand (op0, modev2);
37733 if (VECTOR_MODE_P (modev4))
37734 op2 = safe_vector_operand (op2, modev4);
37735
37736 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
37737 op0 = copy_to_mode_reg (modev2, op0);
37738 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
37739 op1 = copy_to_mode_reg (modei3, op1);
37740 if ((optimize && !register_operand (op2, modev4))
37741 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
37742 op2 = copy_to_mode_reg (modev4, op2);
37743 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
37744 op3 = copy_to_mode_reg (modei5, op3);
37745
37746 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
37747 {
37748 error ("the fifth argument must be an 8-bit immediate");
37749 return const0_rtx;
37750 }
37751
37752 if (d->code == IX86_BUILTIN_PCMPESTRI128)
37753 {
37754 if (optimize || !target
37755 || GET_MODE (target) != tmode0
37756 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
37757 target = gen_reg_rtx (tmode0);
37758
37759 scratch1 = gen_reg_rtx (tmode1);
37760
37761 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
37762 }
37763 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
37764 {
37765 if (optimize || !target
37766 || GET_MODE (target) != tmode1
37767 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
37768 target = gen_reg_rtx (tmode1);
37769
37770 scratch0 = gen_reg_rtx (tmode0);
37771
37772 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
37773 }
37774 else
37775 {
37776 gcc_assert (d->flag);
37777
37778 scratch0 = gen_reg_rtx (tmode0);
37779 scratch1 = gen_reg_rtx (tmode1);
37780
37781 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
37782 }
37783
37784 if (! pat)
37785 return 0;
37786
37787 emit_insn (pat);
37788
37789 if (d->flag)
37790 {
37791 target = gen_reg_rtx (SImode);
37792 emit_move_insn (target, const0_rtx);
37793 target = gen_rtx_SUBREG (QImode, target, 0);
37794
37795 emit_insn
37796 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37797 gen_rtx_fmt_ee (EQ, QImode,
37798 gen_rtx_REG ((machine_mode) d->flag,
37799 FLAGS_REG),
37800 const0_rtx)));
37801 return SUBREG_REG (target);
37802 }
37803 else
37804 return target;
37805 }
37806
37807
37808 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
37809
37810 static rtx
37811 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
37812 tree exp, rtx target)
37813 {
37814 rtx pat;
37815 tree arg0 = CALL_EXPR_ARG (exp, 0);
37816 tree arg1 = CALL_EXPR_ARG (exp, 1);
37817 tree arg2 = CALL_EXPR_ARG (exp, 2);
37818 rtx scratch0, scratch1;
37819 rtx op0 = expand_normal (arg0);
37820 rtx op1 = expand_normal (arg1);
37821 rtx op2 = expand_normal (arg2);
37822 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
37823
37824 tmode0 = insn_data[d->icode].operand[0].mode;
37825 tmode1 = insn_data[d->icode].operand[1].mode;
37826 modev2 = insn_data[d->icode].operand[2].mode;
37827 modev3 = insn_data[d->icode].operand[3].mode;
37828 modeimm = insn_data[d->icode].operand[4].mode;
37829
37830 if (VECTOR_MODE_P (modev2))
37831 op0 = safe_vector_operand (op0, modev2);
37832 if (VECTOR_MODE_P (modev3))
37833 op1 = safe_vector_operand (op1, modev3);
37834
37835 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
37836 op0 = copy_to_mode_reg (modev2, op0);
37837 if ((optimize && !register_operand (op1, modev3))
37838 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
37839 op1 = copy_to_mode_reg (modev3, op1);
37840
37841 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
37842 {
37843 error ("the third argument must be an 8-bit immediate");
37844 return const0_rtx;
37845 }
37846
37847 if (d->code == IX86_BUILTIN_PCMPISTRI128)
37848 {
37849 if (optimize || !target
37850 || GET_MODE (target) != tmode0
37851 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
37852 target = gen_reg_rtx (tmode0);
37853
37854 scratch1 = gen_reg_rtx (tmode1);
37855
37856 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
37857 }
37858 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
37859 {
37860 if (optimize || !target
37861 || GET_MODE (target) != tmode1
37862 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
37863 target = gen_reg_rtx (tmode1);
37864
37865 scratch0 = gen_reg_rtx (tmode0);
37866
37867 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
37868 }
37869 else
37870 {
37871 gcc_assert (d->flag);
37872
37873 scratch0 = gen_reg_rtx (tmode0);
37874 scratch1 = gen_reg_rtx (tmode1);
37875
37876 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
37877 }
37878
37879 if (! pat)
37880 return 0;
37881
37882 emit_insn (pat);
37883
37884 if (d->flag)
37885 {
37886 target = gen_reg_rtx (SImode);
37887 emit_move_insn (target, const0_rtx);
37888 target = gen_rtx_SUBREG (QImode, target, 0);
37889
37890 emit_insn
37891 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37892 gen_rtx_fmt_ee (EQ, QImode,
37893 gen_rtx_REG ((machine_mode) d->flag,
37894 FLAGS_REG),
37895 const0_rtx)));
37896 return SUBREG_REG (target);
37897 }
37898 else
37899 return target;
37900 }
37901
37902 /* Subroutine of ix86_expand_builtin to take care of insns with
37903 variable number of operands. */
37904
37905 static rtx
37906 ix86_expand_args_builtin (const struct builtin_description *d,
37907 tree exp, rtx target)
37908 {
37909 rtx pat, real_target;
37910 unsigned int i, nargs;
37911 unsigned int nargs_constant = 0;
37912 unsigned int mask_pos = 0;
37913 int num_memory = 0;
37914 struct
37915 {
37916 rtx op;
37917 machine_mode mode;
37918 } args[6];
37919 bool last_arg_count = false;
37920 enum insn_code icode = d->icode;
37921 const struct insn_data_d *insn_p = &insn_data[icode];
37922 machine_mode tmode = insn_p->operand[0].mode;
37923 machine_mode rmode = VOIDmode;
37924 bool swap = false;
37925 enum rtx_code comparison = d->comparison;
37926
37927 switch ((enum ix86_builtin_func_type) d->flag)
37928 {
37929 case V2DF_FTYPE_V2DF_ROUND:
37930 case V4DF_FTYPE_V4DF_ROUND:
37931 case V4SF_FTYPE_V4SF_ROUND:
37932 case V8SF_FTYPE_V8SF_ROUND:
37933 case V4SI_FTYPE_V4SF_ROUND:
37934 case V8SI_FTYPE_V8SF_ROUND:
37935 return ix86_expand_sse_round (d, exp, target);
37936 case V4SI_FTYPE_V2DF_V2DF_ROUND:
37937 case V8SI_FTYPE_V4DF_V4DF_ROUND:
37938 case V16SI_FTYPE_V8DF_V8DF_ROUND:
37939 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
37940 case INT_FTYPE_V8SF_V8SF_PTEST:
37941 case INT_FTYPE_V4DI_V4DI_PTEST:
37942 case INT_FTYPE_V4DF_V4DF_PTEST:
37943 case INT_FTYPE_V4SF_V4SF_PTEST:
37944 case INT_FTYPE_V2DI_V2DI_PTEST:
37945 case INT_FTYPE_V2DF_V2DF_PTEST:
37946 return ix86_expand_sse_ptest (d, exp, target);
37947 case FLOAT128_FTYPE_FLOAT128:
37948 case FLOAT_FTYPE_FLOAT:
37949 case INT_FTYPE_INT:
37950 case UINT64_FTYPE_INT:
37951 case UINT16_FTYPE_UINT16:
37952 case INT64_FTYPE_INT64:
37953 case INT64_FTYPE_V4SF:
37954 case INT64_FTYPE_V2DF:
37955 case INT_FTYPE_V16QI:
37956 case INT_FTYPE_V8QI:
37957 case INT_FTYPE_V8SF:
37958 case INT_FTYPE_V4DF:
37959 case INT_FTYPE_V4SF:
37960 case INT_FTYPE_V2DF:
37961 case INT_FTYPE_V32QI:
37962 case V16QI_FTYPE_V16QI:
37963 case V8SI_FTYPE_V8SF:
37964 case V8SI_FTYPE_V4SI:
37965 case V8HI_FTYPE_V8HI:
37966 case V8HI_FTYPE_V16QI:
37967 case V8QI_FTYPE_V8QI:
37968 case V8SF_FTYPE_V8SF:
37969 case V8SF_FTYPE_V8SI:
37970 case V8SF_FTYPE_V4SF:
37971 case V8SF_FTYPE_V8HI:
37972 case V4SI_FTYPE_V4SI:
37973 case V4SI_FTYPE_V16QI:
37974 case V4SI_FTYPE_V4SF:
37975 case V4SI_FTYPE_V8SI:
37976 case V4SI_FTYPE_V8HI:
37977 case V4SI_FTYPE_V4DF:
37978 case V4SI_FTYPE_V2DF:
37979 case V4HI_FTYPE_V4HI:
37980 case V4DF_FTYPE_V4DF:
37981 case V4DF_FTYPE_V4SI:
37982 case V4DF_FTYPE_V4SF:
37983 case V4DF_FTYPE_V2DF:
37984 case V4SF_FTYPE_V4SF:
37985 case V4SF_FTYPE_V4SI:
37986 case V4SF_FTYPE_V8SF:
37987 case V4SF_FTYPE_V4DF:
37988 case V4SF_FTYPE_V8HI:
37989 case V4SF_FTYPE_V2DF:
37990 case V2DI_FTYPE_V2DI:
37991 case V2DI_FTYPE_V16QI:
37992 case V2DI_FTYPE_V8HI:
37993 case V2DI_FTYPE_V4SI:
37994 case V2DF_FTYPE_V2DF:
37995 case V2DF_FTYPE_V4SI:
37996 case V2DF_FTYPE_V4DF:
37997 case V2DF_FTYPE_V4SF:
37998 case V2DF_FTYPE_V2SI:
37999 case V2SI_FTYPE_V2SI:
38000 case V2SI_FTYPE_V4SF:
38001 case V2SI_FTYPE_V2SF:
38002 case V2SI_FTYPE_V2DF:
38003 case V2SF_FTYPE_V2SF:
38004 case V2SF_FTYPE_V2SI:
38005 case V32QI_FTYPE_V32QI:
38006 case V32QI_FTYPE_V16QI:
38007 case V16HI_FTYPE_V16HI:
38008 case V16HI_FTYPE_V8HI:
38009 case V8SI_FTYPE_V8SI:
38010 case V16HI_FTYPE_V16QI:
38011 case V8SI_FTYPE_V16QI:
38012 case V4DI_FTYPE_V16QI:
38013 case V8SI_FTYPE_V8HI:
38014 case V4DI_FTYPE_V8HI:
38015 case V4DI_FTYPE_V4SI:
38016 case V4DI_FTYPE_V2DI:
38017 case UHI_FTYPE_UHI:
38018 case UHI_FTYPE_V16QI:
38019 case USI_FTYPE_V32QI:
38020 case UDI_FTYPE_V64QI:
38021 case V16QI_FTYPE_UHI:
38022 case V32QI_FTYPE_USI:
38023 case V64QI_FTYPE_UDI:
38024 case V8HI_FTYPE_UQI:
38025 case V16HI_FTYPE_UHI:
38026 case V32HI_FTYPE_USI:
38027 case V4SI_FTYPE_UQI:
38028 case V8SI_FTYPE_UQI:
38029 case V4SI_FTYPE_UHI:
38030 case V8SI_FTYPE_UHI:
38031 case UQI_FTYPE_V8HI:
38032 case UHI_FTYPE_V16HI:
38033 case USI_FTYPE_V32HI:
38034 case UQI_FTYPE_V4SI:
38035 case UQI_FTYPE_V8SI:
38036 case UHI_FTYPE_V16SI:
38037 case UQI_FTYPE_V2DI:
38038 case UQI_FTYPE_V4DI:
38039 case UQI_FTYPE_V8DI:
38040 case V16SI_FTYPE_UHI:
38041 case V2DI_FTYPE_UQI:
38042 case V4DI_FTYPE_UQI:
38043 case V16SI_FTYPE_INT:
38044 case V16SF_FTYPE_V8SF:
38045 case V16SI_FTYPE_V8SI:
38046 case V16SF_FTYPE_V4SF:
38047 case V16SI_FTYPE_V4SI:
38048 case V16SF_FTYPE_V16SF:
38049 case V8DI_FTYPE_UQI:
38050 case V8DF_FTYPE_V4DF:
38051 case V8DF_FTYPE_V2DF:
38052 case V8DF_FTYPE_V8DF:
38053 nargs = 1;
38054 break;
38055 case V4SF_FTYPE_V4SF_VEC_MERGE:
38056 case V2DF_FTYPE_V2DF_VEC_MERGE:
38057 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38058 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38059 case V16QI_FTYPE_V16QI_V16QI:
38060 case V16QI_FTYPE_V8HI_V8HI:
38061 case V16SF_FTYPE_V16SF_V16SF:
38062 case V8QI_FTYPE_V8QI_V8QI:
38063 case V8QI_FTYPE_V4HI_V4HI:
38064 case V8HI_FTYPE_V8HI_V8HI:
38065 case V8HI_FTYPE_V16QI_V16QI:
38066 case V8HI_FTYPE_V4SI_V4SI:
38067 case V8SF_FTYPE_V8SF_V8SF:
38068 case V8SF_FTYPE_V8SF_V8SI:
38069 case V8DF_FTYPE_V8DF_V8DF:
38070 case V4SI_FTYPE_V4SI_V4SI:
38071 case V4SI_FTYPE_V8HI_V8HI:
38072 case V4SI_FTYPE_V2DF_V2DF:
38073 case V4HI_FTYPE_V4HI_V4HI:
38074 case V4HI_FTYPE_V8QI_V8QI:
38075 case V4HI_FTYPE_V2SI_V2SI:
38076 case V4DF_FTYPE_V4DF_V4DF:
38077 case V4DF_FTYPE_V4DF_V4DI:
38078 case V4SF_FTYPE_V4SF_V4SF:
38079 case V4SF_FTYPE_V4SF_V4SI:
38080 case V4SF_FTYPE_V4SF_V2SI:
38081 case V4SF_FTYPE_V4SF_V2DF:
38082 case V4SF_FTYPE_V4SF_UINT:
38083 case V4SF_FTYPE_V4SF_DI:
38084 case V4SF_FTYPE_V4SF_SI:
38085 case V2DI_FTYPE_V2DI_V2DI:
38086 case V2DI_FTYPE_V16QI_V16QI:
38087 case V2DI_FTYPE_V4SI_V4SI:
38088 case V2DI_FTYPE_V2DI_V16QI:
38089 case V2SI_FTYPE_V2SI_V2SI:
38090 case V2SI_FTYPE_V4HI_V4HI:
38091 case V2SI_FTYPE_V2SF_V2SF:
38092 case V2DF_FTYPE_V2DF_V2DF:
38093 case V2DF_FTYPE_V2DF_V4SF:
38094 case V2DF_FTYPE_V2DF_V2DI:
38095 case V2DF_FTYPE_V2DF_DI:
38096 case V2DF_FTYPE_V2DF_SI:
38097 case V2DF_FTYPE_V2DF_UINT:
38098 case V2SF_FTYPE_V2SF_V2SF:
38099 case V1DI_FTYPE_V1DI_V1DI:
38100 case V1DI_FTYPE_V8QI_V8QI:
38101 case V1DI_FTYPE_V2SI_V2SI:
38102 case V32QI_FTYPE_V16HI_V16HI:
38103 case V16HI_FTYPE_V8SI_V8SI:
38104 case V32QI_FTYPE_V32QI_V32QI:
38105 case V16HI_FTYPE_V32QI_V32QI:
38106 case V16HI_FTYPE_V16HI_V16HI:
38107 case V8SI_FTYPE_V4DF_V4DF:
38108 case V8SI_FTYPE_V8SI_V8SI:
38109 case V8SI_FTYPE_V16HI_V16HI:
38110 case V4DI_FTYPE_V4DI_V4DI:
38111 case V4DI_FTYPE_V8SI_V8SI:
38112 case V8DI_FTYPE_V64QI_V64QI:
38113 if (comparison == UNKNOWN)
38114 return ix86_expand_binop_builtin (icode, exp, target);
38115 nargs = 2;
38116 break;
38117 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38118 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38119 gcc_assert (comparison != UNKNOWN);
38120 nargs = 2;
38121 swap = true;
38122 break;
38123 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38124 case V16HI_FTYPE_V16HI_SI_COUNT:
38125 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38126 case V8SI_FTYPE_V8SI_SI_COUNT:
38127 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38128 case V4DI_FTYPE_V4DI_INT_COUNT:
38129 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38130 case V8HI_FTYPE_V8HI_SI_COUNT:
38131 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38132 case V4SI_FTYPE_V4SI_SI_COUNT:
38133 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38134 case V4HI_FTYPE_V4HI_SI_COUNT:
38135 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38136 case V2DI_FTYPE_V2DI_SI_COUNT:
38137 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38138 case V2SI_FTYPE_V2SI_SI_COUNT:
38139 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38140 case V1DI_FTYPE_V1DI_SI_COUNT:
38141 nargs = 2;
38142 last_arg_count = true;
38143 break;
38144 case UINT64_FTYPE_UINT64_UINT64:
38145 case UINT_FTYPE_UINT_UINT:
38146 case UINT_FTYPE_UINT_USHORT:
38147 case UINT_FTYPE_UINT_UCHAR:
38148 case UINT16_FTYPE_UINT16_INT:
38149 case UINT8_FTYPE_UINT8_INT:
38150 case UHI_FTYPE_UHI_UHI:
38151 case USI_FTYPE_USI_USI:
38152 case UDI_FTYPE_UDI_UDI:
38153 case V16SI_FTYPE_V8DF_V8DF:
38154 nargs = 2;
38155 break;
38156 case V2DI_FTYPE_V2DI_INT_CONVERT:
38157 nargs = 2;
38158 rmode = V1TImode;
38159 nargs_constant = 1;
38160 break;
38161 case V4DI_FTYPE_V4DI_INT_CONVERT:
38162 nargs = 2;
38163 rmode = V2TImode;
38164 nargs_constant = 1;
38165 break;
38166 case V8DI_FTYPE_V8DI_INT_CONVERT:
38167 nargs = 2;
38168 rmode = V4TImode;
38169 nargs_constant = 1;
38170 break;
38171 case V8HI_FTYPE_V8HI_INT:
38172 case V8HI_FTYPE_V8SF_INT:
38173 case V16HI_FTYPE_V16SF_INT:
38174 case V8HI_FTYPE_V4SF_INT:
38175 case V8SF_FTYPE_V8SF_INT:
38176 case V4SF_FTYPE_V16SF_INT:
38177 case V16SF_FTYPE_V16SF_INT:
38178 case V4SI_FTYPE_V4SI_INT:
38179 case V4SI_FTYPE_V8SI_INT:
38180 case V4HI_FTYPE_V4HI_INT:
38181 case V4DF_FTYPE_V4DF_INT:
38182 case V4DF_FTYPE_V8DF_INT:
38183 case V4SF_FTYPE_V4SF_INT:
38184 case V4SF_FTYPE_V8SF_INT:
38185 case V2DI_FTYPE_V2DI_INT:
38186 case V2DF_FTYPE_V2DF_INT:
38187 case V2DF_FTYPE_V4DF_INT:
38188 case V16HI_FTYPE_V16HI_INT:
38189 case V8SI_FTYPE_V8SI_INT:
38190 case V16SI_FTYPE_V16SI_INT:
38191 case V4SI_FTYPE_V16SI_INT:
38192 case V4DI_FTYPE_V4DI_INT:
38193 case V2DI_FTYPE_V4DI_INT:
38194 case V4DI_FTYPE_V8DI_INT:
38195 case QI_FTYPE_V4SF_INT:
38196 case QI_FTYPE_V2DF_INT:
38197 nargs = 2;
38198 nargs_constant = 1;
38199 break;
38200 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38201 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38202 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38203 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38204 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38205 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38206 case UHI_FTYPE_V16SI_V16SI_UHI:
38207 case UQI_FTYPE_V8DI_V8DI_UQI:
38208 case V16HI_FTYPE_V16SI_V16HI_UHI:
38209 case V16QI_FTYPE_V16SI_V16QI_UHI:
38210 case V16QI_FTYPE_V8DI_V16QI_UQI:
38211 case V16SF_FTYPE_V16SF_V16SF_UHI:
38212 case V16SF_FTYPE_V4SF_V16SF_UHI:
38213 case V16SI_FTYPE_SI_V16SI_UHI:
38214 case V16SI_FTYPE_V16HI_V16SI_UHI:
38215 case V16SI_FTYPE_V16QI_V16SI_UHI:
38216 case V8SF_FTYPE_V4SF_V8SF_UQI:
38217 case V4DF_FTYPE_V2DF_V4DF_UQI:
38218 case V8SI_FTYPE_V4SI_V8SI_UQI:
38219 case V8SI_FTYPE_SI_V8SI_UQI:
38220 case V4SI_FTYPE_V4SI_V4SI_UQI:
38221 case V4SI_FTYPE_SI_V4SI_UQI:
38222 case V4DI_FTYPE_V2DI_V4DI_UQI:
38223 case V4DI_FTYPE_DI_V4DI_UQI:
38224 case V2DI_FTYPE_V2DI_V2DI_UQI:
38225 case V2DI_FTYPE_DI_V2DI_UQI:
38226 case V64QI_FTYPE_V64QI_V64QI_UDI:
38227 case V64QI_FTYPE_V16QI_V64QI_UDI:
38228 case V64QI_FTYPE_QI_V64QI_UDI:
38229 case V32QI_FTYPE_V32QI_V32QI_USI:
38230 case V32QI_FTYPE_V16QI_V32QI_USI:
38231 case V32QI_FTYPE_QI_V32QI_USI:
38232 case V16QI_FTYPE_V16QI_V16QI_UHI:
38233 case V16QI_FTYPE_QI_V16QI_UHI:
38234 case V32HI_FTYPE_V8HI_V32HI_USI:
38235 case V32HI_FTYPE_HI_V32HI_USI:
38236 case V16HI_FTYPE_V8HI_V16HI_UHI:
38237 case V16HI_FTYPE_HI_V16HI_UHI:
38238 case V8HI_FTYPE_V8HI_V8HI_UQI:
38239 case V8HI_FTYPE_HI_V8HI_UQI:
38240 case V8SF_FTYPE_V8HI_V8SF_UQI:
38241 case V4SF_FTYPE_V8HI_V4SF_UQI:
38242 case V8SI_FTYPE_V8SF_V8SI_UQI:
38243 case V4SI_FTYPE_V4SF_V4SI_UQI:
38244 case V4DI_FTYPE_V4SF_V4DI_UQI:
38245 case V2DI_FTYPE_V4SF_V2DI_UQI:
38246 case V4SF_FTYPE_V4DI_V4SF_UQI:
38247 case V4SF_FTYPE_V2DI_V4SF_UQI:
38248 case V4DF_FTYPE_V4DI_V4DF_UQI:
38249 case V2DF_FTYPE_V2DI_V2DF_UQI:
38250 case V16QI_FTYPE_V8HI_V16QI_UQI:
38251 case V16QI_FTYPE_V16HI_V16QI_UHI:
38252 case V16QI_FTYPE_V4SI_V16QI_UQI:
38253 case V16QI_FTYPE_V8SI_V16QI_UQI:
38254 case V8HI_FTYPE_V4SI_V8HI_UQI:
38255 case V8HI_FTYPE_V8SI_V8HI_UQI:
38256 case V16QI_FTYPE_V2DI_V16QI_UQI:
38257 case V16QI_FTYPE_V4DI_V16QI_UQI:
38258 case V8HI_FTYPE_V2DI_V8HI_UQI:
38259 case V8HI_FTYPE_V4DI_V8HI_UQI:
38260 case V4SI_FTYPE_V2DI_V4SI_UQI:
38261 case V4SI_FTYPE_V4DI_V4SI_UQI:
38262 case V32QI_FTYPE_V32HI_V32QI_USI:
38263 case UHI_FTYPE_V16QI_V16QI_UHI:
38264 case USI_FTYPE_V32QI_V32QI_USI:
38265 case UDI_FTYPE_V64QI_V64QI_UDI:
38266 case UQI_FTYPE_V8HI_V8HI_UQI:
38267 case UHI_FTYPE_V16HI_V16HI_UHI:
38268 case USI_FTYPE_V32HI_V32HI_USI:
38269 case UQI_FTYPE_V4SI_V4SI_UQI:
38270 case UQI_FTYPE_V8SI_V8SI_UQI:
38271 case UQI_FTYPE_V2DI_V2DI_UQI:
38272 case UQI_FTYPE_V4DI_V4DI_UQI:
38273 case V4SF_FTYPE_V2DF_V4SF_UQI:
38274 case V4SF_FTYPE_V4DF_V4SF_UQI:
38275 case V16SI_FTYPE_V16SI_V16SI_UHI:
38276 case V16SI_FTYPE_V4SI_V16SI_UHI:
38277 case V2DI_FTYPE_V4SI_V2DI_UQI:
38278 case V2DI_FTYPE_V8HI_V2DI_UQI:
38279 case V2DI_FTYPE_V16QI_V2DI_UQI:
38280 case V4DI_FTYPE_V4DI_V4DI_UQI:
38281 case V4DI_FTYPE_V4SI_V4DI_UQI:
38282 case V4DI_FTYPE_V8HI_V4DI_UQI:
38283 case V4DI_FTYPE_V16QI_V4DI_UQI:
38284 case V4DI_FTYPE_V4DF_V4DI_UQI:
38285 case V2DI_FTYPE_V2DF_V2DI_UQI:
38286 case V4SI_FTYPE_V4DF_V4SI_UQI:
38287 case V4SI_FTYPE_V2DF_V4SI_UQI:
38288 case V4SI_FTYPE_V8HI_V4SI_UQI:
38289 case V4SI_FTYPE_V16QI_V4SI_UQI:
38290 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38291 case V8DF_FTYPE_V2DF_V8DF_UQI:
38292 case V8DF_FTYPE_V4DF_V8DF_UQI:
38293 case V8DF_FTYPE_V8DF_V8DF_UQI:
38294 case V8SF_FTYPE_V8SF_V8SF_UQI:
38295 case V8SF_FTYPE_V8SI_V8SF_UQI:
38296 case V4DF_FTYPE_V4DF_V4DF_UQI:
38297 case V4SF_FTYPE_V4SF_V4SF_UQI:
38298 case V2DF_FTYPE_V2DF_V2DF_UQI:
38299 case V2DF_FTYPE_V4SF_V2DF_UQI:
38300 case V2DF_FTYPE_V4SI_V2DF_UQI:
38301 case V4SF_FTYPE_V4SI_V4SF_UQI:
38302 case V4DF_FTYPE_V4SF_V4DF_UQI:
38303 case V4DF_FTYPE_V4SI_V4DF_UQI:
38304 case V8SI_FTYPE_V8SI_V8SI_UQI:
38305 case V8SI_FTYPE_V8HI_V8SI_UQI:
38306 case V8SI_FTYPE_V16QI_V8SI_UQI:
38307 case V8DF_FTYPE_V8SI_V8DF_UQI:
38308 case V8DI_FTYPE_DI_V8DI_UQI:
38309 case V16SF_FTYPE_V8SF_V16SF_UHI:
38310 case V16SI_FTYPE_V8SI_V16SI_UHI:
38311 case V16HI_FTYPE_V16HI_V16HI_UHI:
38312 case V8HI_FTYPE_V16QI_V8HI_UQI:
38313 case V16HI_FTYPE_V16QI_V16HI_UHI:
38314 case V32HI_FTYPE_V32HI_V32HI_USI:
38315 case V32HI_FTYPE_V32QI_V32HI_USI:
38316 case V8DI_FTYPE_V16QI_V8DI_UQI:
38317 case V8DI_FTYPE_V2DI_V8DI_UQI:
38318 case V8DI_FTYPE_V4DI_V8DI_UQI:
38319 case V8DI_FTYPE_V8DI_V8DI_UQI:
38320 case V8DI_FTYPE_V8HI_V8DI_UQI:
38321 case V8DI_FTYPE_V8SI_V8DI_UQI:
38322 case V8HI_FTYPE_V8DI_V8HI_UQI:
38323 case V8SI_FTYPE_V8DI_V8SI_UQI:
38324 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38325 nargs = 3;
38326 break;
38327 case V32QI_FTYPE_V32QI_V32QI_INT:
38328 case V16HI_FTYPE_V16HI_V16HI_INT:
38329 case V16QI_FTYPE_V16QI_V16QI_INT:
38330 case V4DI_FTYPE_V4DI_V4DI_INT:
38331 case V8HI_FTYPE_V8HI_V8HI_INT:
38332 case V8SI_FTYPE_V8SI_V8SI_INT:
38333 case V8SI_FTYPE_V8SI_V4SI_INT:
38334 case V8SF_FTYPE_V8SF_V8SF_INT:
38335 case V8SF_FTYPE_V8SF_V4SF_INT:
38336 case V4SI_FTYPE_V4SI_V4SI_INT:
38337 case V4DF_FTYPE_V4DF_V4DF_INT:
38338 case V16SF_FTYPE_V16SF_V16SF_INT:
38339 case V16SF_FTYPE_V16SF_V4SF_INT:
38340 case V16SI_FTYPE_V16SI_V4SI_INT:
38341 case V4DF_FTYPE_V4DF_V2DF_INT:
38342 case V4SF_FTYPE_V4SF_V4SF_INT:
38343 case V2DI_FTYPE_V2DI_V2DI_INT:
38344 case V4DI_FTYPE_V4DI_V2DI_INT:
38345 case V2DF_FTYPE_V2DF_V2DF_INT:
38346 case UQI_FTYPE_V8DI_V8UDI_INT:
38347 case UQI_FTYPE_V8DF_V8DF_INT:
38348 case UQI_FTYPE_V2DF_V2DF_INT:
38349 case UQI_FTYPE_V4SF_V4SF_INT:
38350 case UHI_FTYPE_V16SI_V16SI_INT:
38351 case UHI_FTYPE_V16SF_V16SF_INT:
38352 nargs = 3;
38353 nargs_constant = 1;
38354 break;
38355 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38356 nargs = 3;
38357 rmode = V4DImode;
38358 nargs_constant = 1;
38359 break;
38360 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38361 nargs = 3;
38362 rmode = V2DImode;
38363 nargs_constant = 1;
38364 break;
38365 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38366 nargs = 3;
38367 rmode = DImode;
38368 nargs_constant = 1;
38369 break;
38370 case V2DI_FTYPE_V2DI_UINT_UINT:
38371 nargs = 3;
38372 nargs_constant = 2;
38373 break;
38374 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38375 nargs = 3;
38376 rmode = V8DImode;
38377 nargs_constant = 1;
38378 break;
38379 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38380 nargs = 5;
38381 rmode = V8DImode;
38382 mask_pos = 2;
38383 nargs_constant = 1;
38384 break;
38385 case QI_FTYPE_V8DF_INT_UQI:
38386 case QI_FTYPE_V4DF_INT_UQI:
38387 case QI_FTYPE_V2DF_INT_UQI:
38388 case HI_FTYPE_V16SF_INT_UHI:
38389 case QI_FTYPE_V8SF_INT_UQI:
38390 case QI_FTYPE_V4SF_INT_UQI:
38391 nargs = 3;
38392 mask_pos = 1;
38393 nargs_constant = 1;
38394 break;
38395 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38396 nargs = 5;
38397 rmode = V4DImode;
38398 mask_pos = 2;
38399 nargs_constant = 1;
38400 break;
38401 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38402 nargs = 5;
38403 rmode = V2DImode;
38404 mask_pos = 2;
38405 nargs_constant = 1;
38406 break;
38407 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38408 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38409 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38410 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38411 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38412 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38413 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38414 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38415 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38416 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38417 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38418 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38419 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38420 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38421 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38422 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38423 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38424 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38425 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38426 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38427 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38428 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38429 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38430 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38431 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38432 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38433 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38434 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38435 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38436 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38437 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38438 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38439 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38440 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38441 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38442 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38443 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38444 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38445 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38446 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38447 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38448 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38449 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38450 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38451 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38452 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38453 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38454 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38455 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38456 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38457 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38458 nargs = 4;
38459 break;
38460 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38461 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38462 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38463 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38464 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38465 nargs = 4;
38466 nargs_constant = 1;
38467 break;
38468 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38469 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38470 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38471 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38472 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38473 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38474 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38475 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38476 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38477 case USI_FTYPE_V32QI_V32QI_INT_USI:
38478 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38479 case USI_FTYPE_V32HI_V32HI_INT_USI:
38480 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38481 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38482 nargs = 4;
38483 mask_pos = 1;
38484 nargs_constant = 1;
38485 break;
38486 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38487 nargs = 4;
38488 nargs_constant = 2;
38489 break;
38490 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38491 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38492 nargs = 4;
38493 break;
38494 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38495 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38496 mask_pos = 1;
38497 nargs = 4;
38498 nargs_constant = 1;
38499 break;
38500 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38501 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38502 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38503 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38504 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38505 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38506 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38507 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38508 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38509 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38510 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38511 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38512 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38513 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
38514 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
38515 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
38516 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
38517 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
38518 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
38519 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
38520 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
38521 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
38522 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
38523 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
38524 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
38525 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
38526 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
38527 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
38528 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
38529 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
38530 nargs = 4;
38531 mask_pos = 2;
38532 nargs_constant = 1;
38533 break;
38534 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
38535 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
38536 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
38537 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
38538 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
38539 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
38540 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
38541 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
38542 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
38543 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
38544 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
38545 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
38546 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
38547 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
38548 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
38549 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
38550 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
38551 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
38552 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
38553 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
38554 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
38555 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
38556 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
38557 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
38558 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
38559 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
38560 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
38561 nargs = 5;
38562 mask_pos = 2;
38563 nargs_constant = 1;
38564 break;
38565 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
38566 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
38567 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
38568 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
38569 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
38570 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
38571 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
38572 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
38573 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
38574 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
38575 nargs = 5;
38576 nargs = 5;
38577 mask_pos = 1;
38578 nargs_constant = 1;
38579 break;
38580
38581 default:
38582 gcc_unreachable ();
38583 }
38584
38585 gcc_assert (nargs <= ARRAY_SIZE (args));
38586
38587 if (comparison != UNKNOWN)
38588 {
38589 gcc_assert (nargs == 2);
38590 return ix86_expand_sse_compare (d, exp, target, swap);
38591 }
38592
38593 if (rmode == VOIDmode || rmode == tmode)
38594 {
38595 if (optimize
38596 || target == 0
38597 || GET_MODE (target) != tmode
38598 || !insn_p->operand[0].predicate (target, tmode))
38599 target = gen_reg_rtx (tmode);
38600 real_target = target;
38601 }
38602 else
38603 {
38604 real_target = gen_reg_rtx (tmode);
38605 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
38606 }
38607
38608 for (i = 0; i < nargs; i++)
38609 {
38610 tree arg = CALL_EXPR_ARG (exp, i);
38611 rtx op = expand_normal (arg);
38612 machine_mode mode = insn_p->operand[i + 1].mode;
38613 bool match = insn_p->operand[i + 1].predicate (op, mode);
38614
38615 if (last_arg_count && (i + 1) == nargs)
38616 {
38617 /* SIMD shift insns take either an 8-bit immediate or
38618 register as count. But builtin functions take int as
38619 count. If count doesn't match, we put it in register. */
38620 if (!match)
38621 {
38622 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
38623 if (!insn_p->operand[i + 1].predicate (op, mode))
38624 op = copy_to_reg (op);
38625 }
38626 }
38627 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
38628 (!mask_pos && (nargs - i) <= nargs_constant))
38629 {
38630 if (!match)
38631 switch (icode)
38632 {
38633 case CODE_FOR_avx_vinsertf128v4di:
38634 case CODE_FOR_avx_vextractf128v4di:
38635 error ("the last argument must be an 1-bit immediate");
38636 return const0_rtx;
38637
38638 case CODE_FOR_avx512f_cmpv8di3_mask:
38639 case CODE_FOR_avx512f_cmpv16si3_mask:
38640 case CODE_FOR_avx512f_ucmpv8di3_mask:
38641 case CODE_FOR_avx512f_ucmpv16si3_mask:
38642 case CODE_FOR_avx512vl_cmpv4di3_mask:
38643 case CODE_FOR_avx512vl_cmpv8si3_mask:
38644 case CODE_FOR_avx512vl_ucmpv4di3_mask:
38645 case CODE_FOR_avx512vl_ucmpv8si3_mask:
38646 case CODE_FOR_avx512vl_cmpv2di3_mask:
38647 case CODE_FOR_avx512vl_cmpv4si3_mask:
38648 case CODE_FOR_avx512vl_ucmpv2di3_mask:
38649 case CODE_FOR_avx512vl_ucmpv4si3_mask:
38650 error ("the last argument must be a 3-bit immediate");
38651 return const0_rtx;
38652
38653 case CODE_FOR_sse4_1_roundsd:
38654 case CODE_FOR_sse4_1_roundss:
38655
38656 case CODE_FOR_sse4_1_roundpd:
38657 case CODE_FOR_sse4_1_roundps:
38658 case CODE_FOR_avx_roundpd256:
38659 case CODE_FOR_avx_roundps256:
38660
38661 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
38662 case CODE_FOR_sse4_1_roundps_sfix:
38663 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
38664 case CODE_FOR_avx_roundps_sfix256:
38665
38666 case CODE_FOR_sse4_1_blendps:
38667 case CODE_FOR_avx_blendpd256:
38668 case CODE_FOR_avx_vpermilv4df:
38669 case CODE_FOR_avx_vpermilv4df_mask:
38670 case CODE_FOR_avx512f_getmantv8df_mask:
38671 case CODE_FOR_avx512f_getmantv16sf_mask:
38672 case CODE_FOR_avx512vl_getmantv8sf_mask:
38673 case CODE_FOR_avx512vl_getmantv4df_mask:
38674 case CODE_FOR_avx512vl_getmantv4sf_mask:
38675 case CODE_FOR_avx512vl_getmantv2df_mask:
38676 case CODE_FOR_avx512dq_rangepv8df_mask_round:
38677 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
38678 case CODE_FOR_avx512dq_rangepv4df_mask:
38679 case CODE_FOR_avx512dq_rangepv8sf_mask:
38680 case CODE_FOR_avx512dq_rangepv2df_mask:
38681 case CODE_FOR_avx512dq_rangepv4sf_mask:
38682 case CODE_FOR_avx_shufpd256_mask:
38683 error ("the last argument must be a 4-bit immediate");
38684 return const0_rtx;
38685
38686 case CODE_FOR_sha1rnds4:
38687 case CODE_FOR_sse4_1_blendpd:
38688 case CODE_FOR_avx_vpermilv2df:
38689 case CODE_FOR_avx_vpermilv2df_mask:
38690 case CODE_FOR_xop_vpermil2v2df3:
38691 case CODE_FOR_xop_vpermil2v4sf3:
38692 case CODE_FOR_xop_vpermil2v4df3:
38693 case CODE_FOR_xop_vpermil2v8sf3:
38694 case CODE_FOR_avx512f_vinsertf32x4_mask:
38695 case CODE_FOR_avx512f_vinserti32x4_mask:
38696 case CODE_FOR_avx512f_vextractf32x4_mask:
38697 case CODE_FOR_avx512f_vextracti32x4_mask:
38698 case CODE_FOR_sse2_shufpd:
38699 case CODE_FOR_sse2_shufpd_mask:
38700 case CODE_FOR_avx512dq_shuf_f64x2_mask:
38701 case CODE_FOR_avx512dq_shuf_i64x2_mask:
38702 case CODE_FOR_avx512vl_shuf_i32x4_mask:
38703 case CODE_FOR_avx512vl_shuf_f32x4_mask:
38704 error ("the last argument must be a 2-bit immediate");
38705 return const0_rtx;
38706
38707 case CODE_FOR_avx_vextractf128v4df:
38708 case CODE_FOR_avx_vextractf128v8sf:
38709 case CODE_FOR_avx_vextractf128v8si:
38710 case CODE_FOR_avx_vinsertf128v4df:
38711 case CODE_FOR_avx_vinsertf128v8sf:
38712 case CODE_FOR_avx_vinsertf128v8si:
38713 case CODE_FOR_avx512f_vinsertf64x4_mask:
38714 case CODE_FOR_avx512f_vinserti64x4_mask:
38715 case CODE_FOR_avx512f_vextractf64x4_mask:
38716 case CODE_FOR_avx512f_vextracti64x4_mask:
38717 case CODE_FOR_avx512dq_vinsertf32x8_mask:
38718 case CODE_FOR_avx512dq_vinserti32x8_mask:
38719 case CODE_FOR_avx512vl_vinsertv4df:
38720 case CODE_FOR_avx512vl_vinsertv4di:
38721 case CODE_FOR_avx512vl_vinsertv8sf:
38722 case CODE_FOR_avx512vl_vinsertv8si:
38723 error ("the last argument must be a 1-bit immediate");
38724 return const0_rtx;
38725
38726 case CODE_FOR_avx_vmcmpv2df3:
38727 case CODE_FOR_avx_vmcmpv4sf3:
38728 case CODE_FOR_avx_cmpv2df3:
38729 case CODE_FOR_avx_cmpv4sf3:
38730 case CODE_FOR_avx_cmpv4df3:
38731 case CODE_FOR_avx_cmpv8sf3:
38732 case CODE_FOR_avx512f_cmpv8df3_mask:
38733 case CODE_FOR_avx512f_cmpv16sf3_mask:
38734 case CODE_FOR_avx512f_vmcmpv2df3_mask:
38735 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
38736 error ("the last argument must be a 5-bit immediate");
38737 return const0_rtx;
38738
38739 default:
38740 switch (nargs_constant)
38741 {
38742 case 2:
38743 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
38744 (!mask_pos && (nargs - i) == nargs_constant))
38745 {
38746 error ("the next to last argument must be an 8-bit immediate");
38747 break;
38748 }
38749 case 1:
38750 error ("the last argument must be an 8-bit immediate");
38751 break;
38752 default:
38753 gcc_unreachable ();
38754 }
38755 return const0_rtx;
38756 }
38757 }
38758 else
38759 {
38760 if (VECTOR_MODE_P (mode))
38761 op = safe_vector_operand (op, mode);
38762
38763 /* If we aren't optimizing, only allow one memory operand to
38764 be generated. */
38765 if (memory_operand (op, mode))
38766 num_memory++;
38767
38768 op = fixup_modeless_constant (op, mode);
38769
38770 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38771 {
38772 if (optimize || !match || num_memory > 1)
38773 op = copy_to_mode_reg (mode, op);
38774 }
38775 else
38776 {
38777 op = copy_to_reg (op);
38778 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38779 }
38780 }
38781
38782 args[i].op = op;
38783 args[i].mode = mode;
38784 }
38785
38786 switch (nargs)
38787 {
38788 case 1:
38789 pat = GEN_FCN (icode) (real_target, args[0].op);
38790 break;
38791 case 2:
38792 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
38793 break;
38794 case 3:
38795 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
38796 args[2].op);
38797 break;
38798 case 4:
38799 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
38800 args[2].op, args[3].op);
38801 break;
38802 case 5:
38803 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
38804 args[2].op, args[3].op, args[4].op);
38805 case 6:
38806 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
38807 args[2].op, args[3].op, args[4].op,
38808 args[5].op);
38809 break;
38810 default:
38811 gcc_unreachable ();
38812 }
38813
38814 if (! pat)
38815 return 0;
38816
38817 emit_insn (pat);
38818 return target;
38819 }
38820
38821 /* Transform pattern of following layout:
38822 (parallel [
38823 set (A B)
38824 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
38825 ])
38826 into:
38827 (set (A B))
38828
38829 Or:
38830 (parallel [ A B
38831 ...
38832 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
38833 ...
38834 ])
38835 into:
38836 (parallel [ A B ... ]) */
38837
38838 static rtx
38839 ix86_erase_embedded_rounding (rtx pat)
38840 {
38841 if (GET_CODE (pat) == INSN)
38842 pat = PATTERN (pat);
38843
38844 gcc_assert (GET_CODE (pat) == PARALLEL);
38845
38846 if (XVECLEN (pat, 0) == 2)
38847 {
38848 rtx p0 = XVECEXP (pat, 0, 0);
38849 rtx p1 = XVECEXP (pat, 0, 1);
38850
38851 gcc_assert (GET_CODE (p0) == SET
38852 && GET_CODE (p1) == UNSPEC
38853 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
38854
38855 return p0;
38856 }
38857 else
38858 {
38859 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
38860 int i = 0;
38861 int j = 0;
38862
38863 for (; i < XVECLEN (pat, 0); ++i)
38864 {
38865 rtx elem = XVECEXP (pat, 0, i);
38866 if (GET_CODE (elem) != UNSPEC
38867 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
38868 res [j++] = elem;
38869 }
38870
38871 /* No more than 1 occurence was removed. */
38872 gcc_assert (j >= XVECLEN (pat, 0) - 1);
38873
38874 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
38875 }
38876 }
38877
38878 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
38879 with rounding. */
38880 static rtx
38881 ix86_expand_sse_comi_round (const struct builtin_description *d,
38882 tree exp, rtx target)
38883 {
38884 rtx pat, set_dst;
38885 tree arg0 = CALL_EXPR_ARG (exp, 0);
38886 tree arg1 = CALL_EXPR_ARG (exp, 1);
38887 tree arg2 = CALL_EXPR_ARG (exp, 2);
38888 tree arg3 = CALL_EXPR_ARG (exp, 3);
38889 rtx op0 = expand_normal (arg0);
38890 rtx op1 = expand_normal (arg1);
38891 rtx op2 = expand_normal (arg2);
38892 rtx op3 = expand_normal (arg3);
38893 enum insn_code icode = d->icode;
38894 const struct insn_data_d *insn_p = &insn_data[icode];
38895 machine_mode mode0 = insn_p->operand[0].mode;
38896 machine_mode mode1 = insn_p->operand[1].mode;
38897 enum rtx_code comparison = UNEQ;
38898 bool need_ucomi = false;
38899
38900 /* See avxintrin.h for values. */
38901 enum rtx_code comi_comparisons[32] =
38902 {
38903 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
38904 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
38905 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
38906 };
38907 bool need_ucomi_values[32] =
38908 {
38909 true, false, false, true, true, false, false, true,
38910 true, false, false, true, true, false, false, true,
38911 false, true, true, false, false, true, true, false,
38912 false, true, true, false, false, true, true, false
38913 };
38914
38915 if (!CONST_INT_P (op2))
38916 {
38917 error ("the third argument must be comparison constant");
38918 return const0_rtx;
38919 }
38920 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
38921 {
38922 error ("incorrect comparison mode");
38923 return const0_rtx;
38924 }
38925
38926 if (!insn_p->operand[2].predicate (op3, SImode))
38927 {
38928 error ("incorrect rounding operand");
38929 return const0_rtx;
38930 }
38931
38932 comparison = comi_comparisons[INTVAL (op2)];
38933 need_ucomi = need_ucomi_values[INTVAL (op2)];
38934
38935 if (VECTOR_MODE_P (mode0))
38936 op0 = safe_vector_operand (op0, mode0);
38937 if (VECTOR_MODE_P (mode1))
38938 op1 = safe_vector_operand (op1, mode1);
38939
38940 target = gen_reg_rtx (SImode);
38941 emit_move_insn (target, const0_rtx);
38942 target = gen_rtx_SUBREG (QImode, target, 0);
38943
38944 if ((optimize && !register_operand (op0, mode0))
38945 || !insn_p->operand[0].predicate (op0, mode0))
38946 op0 = copy_to_mode_reg (mode0, op0);
38947 if ((optimize && !register_operand (op1, mode1))
38948 || !insn_p->operand[1].predicate (op1, mode1))
38949 op1 = copy_to_mode_reg (mode1, op1);
38950
38951 if (need_ucomi)
38952 icode = icode == CODE_FOR_sse_comi_round
38953 ? CODE_FOR_sse_ucomi_round
38954 : CODE_FOR_sse2_ucomi_round;
38955
38956 pat = GEN_FCN (icode) (op0, op1, op3);
38957 if (! pat)
38958 return 0;
38959
38960 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
38961 if (INTVAL (op3) == NO_ROUND)
38962 {
38963 pat = ix86_erase_embedded_rounding (pat);
38964 if (! pat)
38965 return 0;
38966
38967 set_dst = SET_DEST (pat);
38968 }
38969 else
38970 {
38971 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
38972 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
38973 }
38974
38975 emit_insn (pat);
38976 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38977 gen_rtx_fmt_ee (comparison, QImode,
38978 set_dst,
38979 const0_rtx)));
38980
38981 return SUBREG_REG (target);
38982 }
38983
38984 static rtx
38985 ix86_expand_round_builtin (const struct builtin_description *d,
38986 tree exp, rtx target)
38987 {
38988 rtx pat;
38989 unsigned int i, nargs;
38990 struct
38991 {
38992 rtx op;
38993 machine_mode mode;
38994 } args[6];
38995 enum insn_code icode = d->icode;
38996 const struct insn_data_d *insn_p = &insn_data[icode];
38997 machine_mode tmode = insn_p->operand[0].mode;
38998 unsigned int nargs_constant = 0;
38999 unsigned int redundant_embed_rnd = 0;
39000
39001 switch ((enum ix86_builtin_func_type) d->flag)
39002 {
39003 case UINT64_FTYPE_V2DF_INT:
39004 case UINT64_FTYPE_V4SF_INT:
39005 case UINT_FTYPE_V2DF_INT:
39006 case UINT_FTYPE_V4SF_INT:
39007 case INT64_FTYPE_V2DF_INT:
39008 case INT64_FTYPE_V4SF_INT:
39009 case INT_FTYPE_V2DF_INT:
39010 case INT_FTYPE_V4SF_INT:
39011 nargs = 2;
39012 break;
39013 case V4SF_FTYPE_V4SF_UINT_INT:
39014 case V4SF_FTYPE_V4SF_UINT64_INT:
39015 case V2DF_FTYPE_V2DF_UINT64_INT:
39016 case V4SF_FTYPE_V4SF_INT_INT:
39017 case V4SF_FTYPE_V4SF_INT64_INT:
39018 case V2DF_FTYPE_V2DF_INT64_INT:
39019 case V4SF_FTYPE_V4SF_V4SF_INT:
39020 case V2DF_FTYPE_V2DF_V2DF_INT:
39021 case V4SF_FTYPE_V4SF_V2DF_INT:
39022 case V2DF_FTYPE_V2DF_V4SF_INT:
39023 nargs = 3;
39024 break;
39025 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39026 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39027 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39028 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39029 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39030 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39031 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39032 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39033 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39034 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39035 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39036 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39037 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39038 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39039 nargs = 4;
39040 break;
39041 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39042 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39043 nargs_constant = 2;
39044 nargs = 4;
39045 break;
39046 case INT_FTYPE_V4SF_V4SF_INT_INT:
39047 case INT_FTYPE_V2DF_V2DF_INT_INT:
39048 return ix86_expand_sse_comi_round (d, exp, target);
39049 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39050 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39051 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39052 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39053 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39054 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39055 nargs = 5;
39056 break;
39057 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39058 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39059 nargs_constant = 4;
39060 nargs = 5;
39061 break;
39062 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39063 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39064 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39065 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39066 nargs_constant = 3;
39067 nargs = 5;
39068 break;
39069 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39070 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39071 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39072 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39073 nargs = 6;
39074 nargs_constant = 4;
39075 break;
39076 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39077 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39078 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39079 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39080 nargs = 6;
39081 nargs_constant = 3;
39082 break;
39083 default:
39084 gcc_unreachable ();
39085 }
39086 gcc_assert (nargs <= ARRAY_SIZE (args));
39087
39088 if (optimize
39089 || target == 0
39090 || GET_MODE (target) != tmode
39091 || !insn_p->operand[0].predicate (target, tmode))
39092 target = gen_reg_rtx (tmode);
39093
39094 for (i = 0; i < nargs; i++)
39095 {
39096 tree arg = CALL_EXPR_ARG (exp, i);
39097 rtx op = expand_normal (arg);
39098 machine_mode mode = insn_p->operand[i + 1].mode;
39099 bool match = insn_p->operand[i + 1].predicate (op, mode);
39100
39101 if (i == nargs - nargs_constant)
39102 {
39103 if (!match)
39104 {
39105 switch (icode)
39106 {
39107 case CODE_FOR_avx512f_getmantv8df_mask_round:
39108 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39109 case CODE_FOR_avx512f_vgetmantv2df_round:
39110 case CODE_FOR_avx512f_vgetmantv4sf_round:
39111 error ("the immediate argument must be a 4-bit immediate");
39112 return const0_rtx;
39113 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39114 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39115 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39116 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39117 error ("the immediate argument must be a 5-bit immediate");
39118 return const0_rtx;
39119 default:
39120 error ("the immediate argument must be an 8-bit immediate");
39121 return const0_rtx;
39122 }
39123 }
39124 }
39125 else if (i == nargs-1)
39126 {
39127 if (!insn_p->operand[nargs].predicate (op, SImode))
39128 {
39129 error ("incorrect rounding operand");
39130 return const0_rtx;
39131 }
39132
39133 /* If there is no rounding use normal version of the pattern. */
39134 if (INTVAL (op) == NO_ROUND)
39135 redundant_embed_rnd = 1;
39136 }
39137 else
39138 {
39139 if (VECTOR_MODE_P (mode))
39140 op = safe_vector_operand (op, mode);
39141
39142 op = fixup_modeless_constant (op, mode);
39143
39144 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39145 {
39146 if (optimize || !match)
39147 op = copy_to_mode_reg (mode, op);
39148 }
39149 else
39150 {
39151 op = copy_to_reg (op);
39152 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39153 }
39154 }
39155
39156 args[i].op = op;
39157 args[i].mode = mode;
39158 }
39159
39160 switch (nargs)
39161 {
39162 case 1:
39163 pat = GEN_FCN (icode) (target, args[0].op);
39164 break;
39165 case 2:
39166 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39167 break;
39168 case 3:
39169 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39170 args[2].op);
39171 break;
39172 case 4:
39173 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39174 args[2].op, args[3].op);
39175 break;
39176 case 5:
39177 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39178 args[2].op, args[3].op, args[4].op);
39179 case 6:
39180 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39181 args[2].op, args[3].op, args[4].op,
39182 args[5].op);
39183 break;
39184 default:
39185 gcc_unreachable ();
39186 }
39187
39188 if (!pat)
39189 return 0;
39190
39191 if (redundant_embed_rnd)
39192 pat = ix86_erase_embedded_rounding (pat);
39193
39194 emit_insn (pat);
39195 return target;
39196 }
39197
39198 /* Subroutine of ix86_expand_builtin to take care of special insns
39199 with variable number of operands. */
39200
39201 static rtx
39202 ix86_expand_special_args_builtin (const struct builtin_description *d,
39203 tree exp, rtx target)
39204 {
39205 tree arg;
39206 rtx pat, op;
39207 unsigned int i, nargs, arg_adjust, memory;
39208 bool aligned_mem = false;
39209 struct
39210 {
39211 rtx op;
39212 machine_mode mode;
39213 } args[3];
39214 enum insn_code icode = d->icode;
39215 bool last_arg_constant = false;
39216 const struct insn_data_d *insn_p = &insn_data[icode];
39217 machine_mode tmode = insn_p->operand[0].mode;
39218 enum { load, store } klass;
39219
39220 switch ((enum ix86_builtin_func_type) d->flag)
39221 {
39222 case VOID_FTYPE_VOID:
39223 emit_insn (GEN_FCN (icode) (target));
39224 return 0;
39225 case VOID_FTYPE_UINT64:
39226 case VOID_FTYPE_UNSIGNED:
39227 nargs = 0;
39228 klass = store;
39229 memory = 0;
39230 break;
39231
39232 case INT_FTYPE_VOID:
39233 case USHORT_FTYPE_VOID:
39234 case UINT64_FTYPE_VOID:
39235 case UNSIGNED_FTYPE_VOID:
39236 nargs = 0;
39237 klass = load;
39238 memory = 0;
39239 break;
39240 case UINT64_FTYPE_PUNSIGNED:
39241 case V2DI_FTYPE_PV2DI:
39242 case V4DI_FTYPE_PV4DI:
39243 case V32QI_FTYPE_PCCHAR:
39244 case V16QI_FTYPE_PCCHAR:
39245 case V8SF_FTYPE_PCV4SF:
39246 case V8SF_FTYPE_PCFLOAT:
39247 case V4SF_FTYPE_PCFLOAT:
39248 case V4DF_FTYPE_PCV2DF:
39249 case V4DF_FTYPE_PCDOUBLE:
39250 case V2DF_FTYPE_PCDOUBLE:
39251 case VOID_FTYPE_PVOID:
39252 case V8DI_FTYPE_PV8DI:
39253 nargs = 1;
39254 klass = load;
39255 memory = 0;
39256 switch (icode)
39257 {
39258 case CODE_FOR_sse4_1_movntdqa:
39259 case CODE_FOR_avx2_movntdqa:
39260 case CODE_FOR_avx512f_movntdqa:
39261 aligned_mem = true;
39262 break;
39263 default:
39264 break;
39265 }
39266 break;
39267 case VOID_FTYPE_PV2SF_V4SF:
39268 case VOID_FTYPE_PV8DI_V8DI:
39269 case VOID_FTYPE_PV4DI_V4DI:
39270 case VOID_FTYPE_PV2DI_V2DI:
39271 case VOID_FTYPE_PCHAR_V32QI:
39272 case VOID_FTYPE_PCHAR_V16QI:
39273 case VOID_FTYPE_PFLOAT_V16SF:
39274 case VOID_FTYPE_PFLOAT_V8SF:
39275 case VOID_FTYPE_PFLOAT_V4SF:
39276 case VOID_FTYPE_PDOUBLE_V8DF:
39277 case VOID_FTYPE_PDOUBLE_V4DF:
39278 case VOID_FTYPE_PDOUBLE_V2DF:
39279 case VOID_FTYPE_PLONGLONG_LONGLONG:
39280 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39281 case VOID_FTYPE_PINT_INT:
39282 nargs = 1;
39283 klass = store;
39284 /* Reserve memory operand for target. */
39285 memory = ARRAY_SIZE (args);
39286 switch (icode)
39287 {
39288 /* These builtins and instructions require the memory
39289 to be properly aligned. */
39290 case CODE_FOR_avx_movntv4di:
39291 case CODE_FOR_sse2_movntv2di:
39292 case CODE_FOR_avx_movntv8sf:
39293 case CODE_FOR_sse_movntv4sf:
39294 case CODE_FOR_sse4a_vmmovntv4sf:
39295 case CODE_FOR_avx_movntv4df:
39296 case CODE_FOR_sse2_movntv2df:
39297 case CODE_FOR_sse4a_vmmovntv2df:
39298 case CODE_FOR_sse2_movntidi:
39299 case CODE_FOR_sse_movntq:
39300 case CODE_FOR_sse2_movntisi:
39301 case CODE_FOR_avx512f_movntv16sf:
39302 case CODE_FOR_avx512f_movntv8df:
39303 case CODE_FOR_avx512f_movntv8di:
39304 aligned_mem = true;
39305 break;
39306 default:
39307 break;
39308 }
39309 break;
39310 case V4SF_FTYPE_V4SF_PCV2SF:
39311 case V2DF_FTYPE_V2DF_PCDOUBLE:
39312 nargs = 2;
39313 klass = load;
39314 memory = 1;
39315 break;
39316 case V8SF_FTYPE_PCV8SF_V8SI:
39317 case V4DF_FTYPE_PCV4DF_V4DI:
39318 case V4SF_FTYPE_PCV4SF_V4SI:
39319 case V2DF_FTYPE_PCV2DF_V2DI:
39320 case V8SI_FTYPE_PCV8SI_V8SI:
39321 case V4DI_FTYPE_PCV4DI_V4DI:
39322 case V4SI_FTYPE_PCV4SI_V4SI:
39323 case V2DI_FTYPE_PCV2DI_V2DI:
39324 nargs = 2;
39325 klass = load;
39326 memory = 0;
39327 break;
39328 case VOID_FTYPE_PV8DF_V8DF_UQI:
39329 case VOID_FTYPE_PV16SF_V16SF_UHI:
39330 case VOID_FTYPE_PV8DI_V8DI_UQI:
39331 case VOID_FTYPE_PV4DI_V4DI_UQI:
39332 case VOID_FTYPE_PV2DI_V2DI_UQI:
39333 case VOID_FTYPE_PV16SI_V16SI_UHI:
39334 case VOID_FTYPE_PV8SI_V8SI_UQI:
39335 case VOID_FTYPE_PV4SI_V4SI_UQI:
39336 switch (icode)
39337 {
39338 /* These builtins and instructions require the memory
39339 to be properly aligned. */
39340 case CODE_FOR_avx512f_storev16sf_mask:
39341 case CODE_FOR_avx512f_storev16si_mask:
39342 case CODE_FOR_avx512f_storev8df_mask:
39343 case CODE_FOR_avx512f_storev8di_mask:
39344 case CODE_FOR_avx512vl_storev8sf_mask:
39345 case CODE_FOR_avx512vl_storev8si_mask:
39346 case CODE_FOR_avx512vl_storev4df_mask:
39347 case CODE_FOR_avx512vl_storev4di_mask:
39348 case CODE_FOR_avx512vl_storev4sf_mask:
39349 case CODE_FOR_avx512vl_storev4si_mask:
39350 case CODE_FOR_avx512vl_storev2df_mask:
39351 case CODE_FOR_avx512vl_storev2di_mask:
39352 aligned_mem = true;
39353 break;
39354 default:
39355 break;
39356 }
39357 /* FALLTHRU */
39358 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39359 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39360 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39361 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39362 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39363 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39364 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39365 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39366 case VOID_FTYPE_PV8SI_V8DI_UQI:
39367 case VOID_FTYPE_PV8HI_V8DI_UQI:
39368 case VOID_FTYPE_PV16HI_V16SI_UHI:
39369 case VOID_FTYPE_PV16QI_V8DI_UQI:
39370 case VOID_FTYPE_PV16QI_V16SI_UHI:
39371 case VOID_FTYPE_PV4SI_V4DI_UQI:
39372 case VOID_FTYPE_PV4SI_V2DI_UQI:
39373 case VOID_FTYPE_PV8HI_V4DI_UQI:
39374 case VOID_FTYPE_PV8HI_V2DI_UQI:
39375 case VOID_FTYPE_PV8HI_V8SI_UQI:
39376 case VOID_FTYPE_PV8HI_V4SI_UQI:
39377 case VOID_FTYPE_PV16QI_V4DI_UQI:
39378 case VOID_FTYPE_PV16QI_V2DI_UQI:
39379 case VOID_FTYPE_PV16QI_V8SI_UQI:
39380 case VOID_FTYPE_PV16QI_V4SI_UQI:
39381 case VOID_FTYPE_PV8HI_V8HI_UQI:
39382 case VOID_FTYPE_PV16HI_V16HI_UHI:
39383 case VOID_FTYPE_PV32HI_V32HI_USI:
39384 case VOID_FTYPE_PV16QI_V16QI_UHI:
39385 case VOID_FTYPE_PV32QI_V32QI_USI:
39386 case VOID_FTYPE_PV64QI_V64QI_UDI:
39387 case VOID_FTYPE_PV4DF_V4DF_UQI:
39388 case VOID_FTYPE_PV2DF_V2DF_UQI:
39389 case VOID_FTYPE_PV8SF_V8SF_UQI:
39390 case VOID_FTYPE_PV4SF_V4SF_UQI:
39391 nargs = 2;
39392 klass = store;
39393 /* Reserve memory operand for target. */
39394 memory = ARRAY_SIZE (args);
39395 break;
39396 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39397 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39398 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39399 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39400 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39401 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39402 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39403 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39404 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39405 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39406 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39407 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39408 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39409 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39410 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39411 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39412 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39413 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39414 nargs = 3;
39415 klass = load;
39416 memory = 0;
39417 switch (icode)
39418 {
39419 /* These builtins and instructions require the memory
39420 to be properly aligned. */
39421 case CODE_FOR_avx512f_loadv16sf_mask:
39422 case CODE_FOR_avx512f_loadv16si_mask:
39423 case CODE_FOR_avx512f_loadv8df_mask:
39424 case CODE_FOR_avx512f_loadv8di_mask:
39425 case CODE_FOR_avx512vl_loadv8sf_mask:
39426 case CODE_FOR_avx512vl_loadv8si_mask:
39427 case CODE_FOR_avx512vl_loadv4df_mask:
39428 case CODE_FOR_avx512vl_loadv4di_mask:
39429 case CODE_FOR_avx512vl_loadv4sf_mask:
39430 case CODE_FOR_avx512vl_loadv4si_mask:
39431 case CODE_FOR_avx512vl_loadv2df_mask:
39432 case CODE_FOR_avx512vl_loadv2di_mask:
39433 case CODE_FOR_avx512bw_loadv64qi_mask:
39434 case CODE_FOR_avx512vl_loadv32qi_mask:
39435 case CODE_FOR_avx512vl_loadv16qi_mask:
39436 case CODE_FOR_avx512bw_loadv32hi_mask:
39437 case CODE_FOR_avx512vl_loadv16hi_mask:
39438 case CODE_FOR_avx512vl_loadv8hi_mask:
39439 aligned_mem = true;
39440 break;
39441 default:
39442 break;
39443 }
39444 break;
39445 case VOID_FTYPE_UINT_UINT_UINT:
39446 case VOID_FTYPE_UINT64_UINT_UINT:
39447 case UCHAR_FTYPE_UINT_UINT_UINT:
39448 case UCHAR_FTYPE_UINT64_UINT_UINT:
39449 nargs = 3;
39450 klass = load;
39451 memory = ARRAY_SIZE (args);
39452 last_arg_constant = true;
39453 break;
39454 default:
39455 gcc_unreachable ();
39456 }
39457
39458 gcc_assert (nargs <= ARRAY_SIZE (args));
39459
39460 if (klass == store)
39461 {
39462 arg = CALL_EXPR_ARG (exp, 0);
39463 op = expand_normal (arg);
39464 gcc_assert (target == 0);
39465 if (memory)
39466 {
39467 op = ix86_zero_extend_to_Pmode (op);
39468 target = gen_rtx_MEM (tmode, op);
39469 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39470 on it. Try to improve it using get_pointer_alignment,
39471 and if the special builtin is one that requires strict
39472 mode alignment, also from it's GET_MODE_ALIGNMENT.
39473 Failure to do so could lead to ix86_legitimate_combined_insn
39474 rejecting all changes to such insns. */
39475 unsigned int align = get_pointer_alignment (arg);
39476 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39477 align = GET_MODE_ALIGNMENT (tmode);
39478 if (MEM_ALIGN (target) < align)
39479 set_mem_align (target, align);
39480 }
39481 else
39482 target = force_reg (tmode, op);
39483 arg_adjust = 1;
39484 }
39485 else
39486 {
39487 arg_adjust = 0;
39488 if (optimize
39489 || target == 0
39490 || !register_operand (target, tmode)
39491 || GET_MODE (target) != tmode)
39492 target = gen_reg_rtx (tmode);
39493 }
39494
39495 for (i = 0; i < nargs; i++)
39496 {
39497 machine_mode mode = insn_p->operand[i + 1].mode;
39498 bool match;
39499
39500 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39501 op = expand_normal (arg);
39502 match = insn_p->operand[i + 1].predicate (op, mode);
39503
39504 if (last_arg_constant && (i + 1) == nargs)
39505 {
39506 if (!match)
39507 {
39508 if (icode == CODE_FOR_lwp_lwpvalsi3
39509 || icode == CODE_FOR_lwp_lwpinssi3
39510 || icode == CODE_FOR_lwp_lwpvaldi3
39511 || icode == CODE_FOR_lwp_lwpinsdi3)
39512 error ("the last argument must be a 32-bit immediate");
39513 else
39514 error ("the last argument must be an 8-bit immediate");
39515 return const0_rtx;
39516 }
39517 }
39518 else
39519 {
39520 if (i == memory)
39521 {
39522 /* This must be the memory operand. */
39523 op = ix86_zero_extend_to_Pmode (op);
39524 op = gen_rtx_MEM (mode, op);
39525 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
39526 on it. Try to improve it using get_pointer_alignment,
39527 and if the special builtin is one that requires strict
39528 mode alignment, also from it's GET_MODE_ALIGNMENT.
39529 Failure to do so could lead to ix86_legitimate_combined_insn
39530 rejecting all changes to such insns. */
39531 unsigned int align = get_pointer_alignment (arg);
39532 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
39533 align = GET_MODE_ALIGNMENT (mode);
39534 if (MEM_ALIGN (op) < align)
39535 set_mem_align (op, align);
39536 }
39537 else
39538 {
39539 /* This must be register. */
39540 if (VECTOR_MODE_P (mode))
39541 op = safe_vector_operand (op, mode);
39542
39543 op = fixup_modeless_constant (op, mode);
39544
39545 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39546 op = copy_to_mode_reg (mode, op);
39547 else
39548 {
39549 op = copy_to_reg (op);
39550 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39551 }
39552 }
39553 }
39554
39555 args[i].op = op;
39556 args[i].mode = mode;
39557 }
39558
39559 switch (nargs)
39560 {
39561 case 0:
39562 pat = GEN_FCN (icode) (target);
39563 break;
39564 case 1:
39565 pat = GEN_FCN (icode) (target, args[0].op);
39566 break;
39567 case 2:
39568 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39569 break;
39570 case 3:
39571 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
39572 break;
39573 default:
39574 gcc_unreachable ();
39575 }
39576
39577 if (! pat)
39578 return 0;
39579 emit_insn (pat);
39580 return klass == store ? 0 : target;
39581 }
39582
39583 /* Return the integer constant in ARG. Constrain it to be in the range
39584 of the subparts of VEC_TYPE; issue an error if not. */
39585
39586 static int
39587 get_element_number (tree vec_type, tree arg)
39588 {
39589 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
39590
39591 if (!tree_fits_uhwi_p (arg)
39592 || (elt = tree_to_uhwi (arg), elt > max))
39593 {
39594 error ("selector must be an integer constant in the range 0..%wi", max);
39595 return 0;
39596 }
39597
39598 return elt;
39599 }
39600
39601 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
39602 ix86_expand_vector_init. We DO have language-level syntax for this, in
39603 the form of (type){ init-list }. Except that since we can't place emms
39604 instructions from inside the compiler, we can't allow the use of MMX
39605 registers unless the user explicitly asks for it. So we do *not* define
39606 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
39607 we have builtins invoked by mmintrin.h that gives us license to emit
39608 these sorts of instructions. */
39609
39610 static rtx
39611 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
39612 {
39613 machine_mode tmode = TYPE_MODE (type);
39614 machine_mode inner_mode = GET_MODE_INNER (tmode);
39615 int i, n_elt = GET_MODE_NUNITS (tmode);
39616 rtvec v = rtvec_alloc (n_elt);
39617
39618 gcc_assert (VECTOR_MODE_P (tmode));
39619 gcc_assert (call_expr_nargs (exp) == n_elt);
39620
39621 for (i = 0; i < n_elt; ++i)
39622 {
39623 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
39624 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
39625 }
39626
39627 if (!target || !register_operand (target, tmode))
39628 target = gen_reg_rtx (tmode);
39629
39630 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
39631 return target;
39632 }
39633
39634 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
39635 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
39636 had a language-level syntax for referencing vector elements. */
39637
39638 static rtx
39639 ix86_expand_vec_ext_builtin (tree exp, rtx target)
39640 {
39641 machine_mode tmode, mode0;
39642 tree arg0, arg1;
39643 int elt;
39644 rtx op0;
39645
39646 arg0 = CALL_EXPR_ARG (exp, 0);
39647 arg1 = CALL_EXPR_ARG (exp, 1);
39648
39649 op0 = expand_normal (arg0);
39650 elt = get_element_number (TREE_TYPE (arg0), arg1);
39651
39652 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
39653 mode0 = TYPE_MODE (TREE_TYPE (arg0));
39654 gcc_assert (VECTOR_MODE_P (mode0));
39655
39656 op0 = force_reg (mode0, op0);
39657
39658 if (optimize || !target || !register_operand (target, tmode))
39659 target = gen_reg_rtx (tmode);
39660
39661 ix86_expand_vector_extract (true, target, op0, elt);
39662
39663 return target;
39664 }
39665
39666 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
39667 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
39668 a language-level syntax for referencing vector elements. */
39669
39670 static rtx
39671 ix86_expand_vec_set_builtin (tree exp)
39672 {
39673 machine_mode tmode, mode1;
39674 tree arg0, arg1, arg2;
39675 int elt;
39676 rtx op0, op1, target;
39677
39678 arg0 = CALL_EXPR_ARG (exp, 0);
39679 arg1 = CALL_EXPR_ARG (exp, 1);
39680 arg2 = CALL_EXPR_ARG (exp, 2);
39681
39682 tmode = TYPE_MODE (TREE_TYPE (arg0));
39683 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
39684 gcc_assert (VECTOR_MODE_P (tmode));
39685
39686 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
39687 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
39688 elt = get_element_number (TREE_TYPE (arg0), arg2);
39689
39690 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
39691 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
39692
39693 op0 = force_reg (tmode, op0);
39694 op1 = force_reg (mode1, op1);
39695
39696 /* OP0 is the source of these builtin functions and shouldn't be
39697 modified. Create a copy, use it and return it as target. */
39698 target = gen_reg_rtx (tmode);
39699 emit_move_insn (target, op0);
39700 ix86_expand_vector_set (true, target, op1, elt);
39701
39702 return target;
39703 }
39704
39705 /* Emit conditional move of SRC to DST with condition
39706 OP1 CODE OP2. */
39707 static void
39708 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
39709 {
39710 rtx t;
39711
39712 if (TARGET_CMOVE)
39713 {
39714 t = ix86_expand_compare (code, op1, op2);
39715 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
39716 src, dst)));
39717 }
39718 else
39719 {
39720 rtx_code_label *nomove = gen_label_rtx ();
39721 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
39722 const0_rtx, GET_MODE (op1), 1, nomove);
39723 emit_move_insn (dst, src);
39724 emit_label (nomove);
39725 }
39726 }
39727
39728 /* Choose max of DST and SRC and put it to DST. */
39729 static void
39730 ix86_emit_move_max (rtx dst, rtx src)
39731 {
39732 ix86_emit_cmove (dst, src, LTU, dst, src);
39733 }
39734
39735 /* Expand an expression EXP that calls a built-in function,
39736 with result going to TARGET if that's convenient
39737 (and in mode MODE if that's convenient).
39738 SUBTARGET may be used as the target for computing one of EXP's operands.
39739 IGNORE is nonzero if the value is to be ignored. */
39740
39741 static rtx
39742 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
39743 machine_mode mode, int ignore)
39744 {
39745 const struct builtin_description *d;
39746 size_t i;
39747 enum insn_code icode;
39748 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
39749 tree arg0, arg1, arg2, arg3, arg4;
39750 rtx op0, op1, op2, op3, op4, pat, insn;
39751 machine_mode mode0, mode1, mode2, mode3, mode4;
39752 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
39753
39754 /* For CPU builtins that can be folded, fold first and expand the fold. */
39755 switch (fcode)
39756 {
39757 case IX86_BUILTIN_CPU_INIT:
39758 {
39759 /* Make it call __cpu_indicator_init in libgcc. */
39760 tree call_expr, fndecl, type;
39761 type = build_function_type_list (integer_type_node, NULL_TREE);
39762 fndecl = build_fn_decl ("__cpu_indicator_init", type);
39763 call_expr = build_call_expr (fndecl, 0);
39764 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
39765 }
39766 case IX86_BUILTIN_CPU_IS:
39767 case IX86_BUILTIN_CPU_SUPPORTS:
39768 {
39769 tree arg0 = CALL_EXPR_ARG (exp, 0);
39770 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
39771 gcc_assert (fold_expr != NULL_TREE);
39772 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
39773 }
39774 }
39775
39776 /* Determine whether the builtin function is available under the current ISA.
39777 Originally the builtin was not created if it wasn't applicable to the
39778 current ISA based on the command line switches. With function specific
39779 options, we need to check in the context of the function making the call
39780 whether it is supported. */
39781 if (ix86_builtins_isa[fcode].isa
39782 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
39783 {
39784 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
39785 NULL, (enum fpmath_unit) 0, false);
39786
39787 if (!opts)
39788 error ("%qE needs unknown isa option", fndecl);
39789 else
39790 {
39791 gcc_assert (opts != NULL);
39792 error ("%qE needs isa option %s", fndecl, opts);
39793 free (opts);
39794 }
39795 return const0_rtx;
39796 }
39797
39798 switch (fcode)
39799 {
39800 case IX86_BUILTIN_BNDMK:
39801 if (!target
39802 || GET_MODE (target) != BNDmode
39803 || !register_operand (target, BNDmode))
39804 target = gen_reg_rtx (BNDmode);
39805
39806 arg0 = CALL_EXPR_ARG (exp, 0);
39807 arg1 = CALL_EXPR_ARG (exp, 1);
39808
39809 op0 = expand_normal (arg0);
39810 op1 = expand_normal (arg1);
39811
39812 if (!register_operand (op0, Pmode))
39813 op0 = ix86_zero_extend_to_Pmode (op0);
39814 if (!register_operand (op1, Pmode))
39815 op1 = ix86_zero_extend_to_Pmode (op1);
39816
39817 /* Builtin arg1 is size of block but instruction op1 should
39818 be (size - 1). */
39819 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
39820 NULL_RTX, 1, OPTAB_DIRECT);
39821
39822 emit_insn (BNDmode == BND64mode
39823 ? gen_bnd64_mk (target, op0, op1)
39824 : gen_bnd32_mk (target, op0, op1));
39825 return target;
39826
39827 case IX86_BUILTIN_BNDSTX:
39828 arg0 = CALL_EXPR_ARG (exp, 0);
39829 arg1 = CALL_EXPR_ARG (exp, 1);
39830 arg2 = CALL_EXPR_ARG (exp, 2);
39831
39832 op0 = expand_normal (arg0);
39833 op1 = expand_normal (arg1);
39834 op2 = expand_normal (arg2);
39835
39836 if (!register_operand (op0, Pmode))
39837 op0 = ix86_zero_extend_to_Pmode (op0);
39838 if (!register_operand (op1, BNDmode))
39839 op1 = copy_to_mode_reg (BNDmode, op1);
39840 if (!register_operand (op2, Pmode))
39841 op2 = ix86_zero_extend_to_Pmode (op2);
39842
39843 emit_insn (BNDmode == BND64mode
39844 ? gen_bnd64_stx (op2, op0, op1)
39845 : gen_bnd32_stx (op2, op0, op1));
39846 return 0;
39847
39848 case IX86_BUILTIN_BNDLDX:
39849 if (!target
39850 || GET_MODE (target) != BNDmode
39851 || !register_operand (target, BNDmode))
39852 target = gen_reg_rtx (BNDmode);
39853
39854 arg0 = CALL_EXPR_ARG (exp, 0);
39855 arg1 = CALL_EXPR_ARG (exp, 1);
39856
39857 op0 = expand_normal (arg0);
39858 op1 = expand_normal (arg1);
39859
39860 if (!register_operand (op0, Pmode))
39861 op0 = ix86_zero_extend_to_Pmode (op0);
39862 if (!register_operand (op1, Pmode))
39863 op1 = ix86_zero_extend_to_Pmode (op1);
39864
39865 emit_insn (BNDmode == BND64mode
39866 ? gen_bnd64_ldx (target, op0, op1)
39867 : gen_bnd32_ldx (target, op0, op1));
39868 return target;
39869
39870 case IX86_BUILTIN_BNDCL:
39871 arg0 = CALL_EXPR_ARG (exp, 0);
39872 arg1 = CALL_EXPR_ARG (exp, 1);
39873
39874 op0 = expand_normal (arg0);
39875 op1 = expand_normal (arg1);
39876
39877 if (!register_operand (op0, Pmode))
39878 op0 = ix86_zero_extend_to_Pmode (op0);
39879 if (!register_operand (op1, BNDmode))
39880 op1 = copy_to_mode_reg (BNDmode, op1);
39881
39882 emit_insn (BNDmode == BND64mode
39883 ? gen_bnd64_cl (op1, op0)
39884 : gen_bnd32_cl (op1, op0));
39885 return 0;
39886
39887 case IX86_BUILTIN_BNDCU:
39888 arg0 = CALL_EXPR_ARG (exp, 0);
39889 arg1 = CALL_EXPR_ARG (exp, 1);
39890
39891 op0 = expand_normal (arg0);
39892 op1 = expand_normal (arg1);
39893
39894 if (!register_operand (op0, Pmode))
39895 op0 = ix86_zero_extend_to_Pmode (op0);
39896 if (!register_operand (op1, BNDmode))
39897 op1 = copy_to_mode_reg (BNDmode, op1);
39898
39899 emit_insn (BNDmode == BND64mode
39900 ? gen_bnd64_cu (op1, op0)
39901 : gen_bnd32_cu (op1, op0));
39902 return 0;
39903
39904 case IX86_BUILTIN_BNDRET:
39905 arg0 = CALL_EXPR_ARG (exp, 0);
39906 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
39907 target = chkp_get_rtl_bounds (arg0);
39908
39909 /* If no bounds were specified for returned value,
39910 then use INIT bounds. It usually happens when
39911 some built-in function is expanded. */
39912 if (!target)
39913 {
39914 rtx t1 = gen_reg_rtx (Pmode);
39915 rtx t2 = gen_reg_rtx (Pmode);
39916 target = gen_reg_rtx (BNDmode);
39917 emit_move_insn (t1, const0_rtx);
39918 emit_move_insn (t2, constm1_rtx);
39919 emit_insn (BNDmode == BND64mode
39920 ? gen_bnd64_mk (target, t1, t2)
39921 : gen_bnd32_mk (target, t1, t2));
39922 }
39923
39924 gcc_assert (target && REG_P (target));
39925 return target;
39926
39927 case IX86_BUILTIN_BNDNARROW:
39928 {
39929 rtx m1, m1h1, m1h2, lb, ub, t1;
39930
39931 /* Return value and lb. */
39932 arg0 = CALL_EXPR_ARG (exp, 0);
39933 /* Bounds. */
39934 arg1 = CALL_EXPR_ARG (exp, 1);
39935 /* Size. */
39936 arg2 = CALL_EXPR_ARG (exp, 2);
39937
39938 lb = expand_normal (arg0);
39939 op1 = expand_normal (arg1);
39940 op2 = expand_normal (arg2);
39941
39942 /* Size was passed but we need to use (size - 1) as for bndmk. */
39943 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
39944 NULL_RTX, 1, OPTAB_DIRECT);
39945
39946 /* Add LB to size and inverse to get UB. */
39947 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
39948 op2, 1, OPTAB_DIRECT);
39949 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
39950
39951 if (!register_operand (lb, Pmode))
39952 lb = ix86_zero_extend_to_Pmode (lb);
39953 if (!register_operand (ub, Pmode))
39954 ub = ix86_zero_extend_to_Pmode (ub);
39955
39956 /* We need to move bounds to memory before any computations. */
39957 if (MEM_P (op1))
39958 m1 = op1;
39959 else
39960 {
39961 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
39962 emit_move_insn (m1, op1);
39963 }
39964
39965 /* Generate mem expression to be used for access to LB and UB. */
39966 m1h1 = adjust_address (m1, Pmode, 0);
39967 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
39968
39969 t1 = gen_reg_rtx (Pmode);
39970
39971 /* Compute LB. */
39972 emit_move_insn (t1, m1h1);
39973 ix86_emit_move_max (t1, lb);
39974 emit_move_insn (m1h1, t1);
39975
39976 /* Compute UB. UB is stored in 1's complement form. Therefore
39977 we also use max here. */
39978 emit_move_insn (t1, m1h2);
39979 ix86_emit_move_max (t1, ub);
39980 emit_move_insn (m1h2, t1);
39981
39982 op2 = gen_reg_rtx (BNDmode);
39983 emit_move_insn (op2, m1);
39984
39985 return chkp_join_splitted_slot (lb, op2);
39986 }
39987
39988 case IX86_BUILTIN_BNDINT:
39989 {
39990 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
39991
39992 if (!target
39993 || GET_MODE (target) != BNDmode
39994 || !register_operand (target, BNDmode))
39995 target = gen_reg_rtx (BNDmode);
39996
39997 arg0 = CALL_EXPR_ARG (exp, 0);
39998 arg1 = CALL_EXPR_ARG (exp, 1);
39999
40000 op0 = expand_normal (arg0);
40001 op1 = expand_normal (arg1);
40002
40003 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40004 rh1 = adjust_address (res, Pmode, 0);
40005 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40006
40007 /* Put first bounds to temporaries. */
40008 lb1 = gen_reg_rtx (Pmode);
40009 ub1 = gen_reg_rtx (Pmode);
40010 if (MEM_P (op0))
40011 {
40012 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40013 emit_move_insn (ub1, adjust_address (op0, Pmode,
40014 GET_MODE_SIZE (Pmode)));
40015 }
40016 else
40017 {
40018 emit_move_insn (res, op0);
40019 emit_move_insn (lb1, rh1);
40020 emit_move_insn (ub1, rh2);
40021 }
40022
40023 /* Put second bounds to temporaries. */
40024 lb2 = gen_reg_rtx (Pmode);
40025 ub2 = gen_reg_rtx (Pmode);
40026 if (MEM_P (op1))
40027 {
40028 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40029 emit_move_insn (ub2, adjust_address (op1, Pmode,
40030 GET_MODE_SIZE (Pmode)));
40031 }
40032 else
40033 {
40034 emit_move_insn (res, op1);
40035 emit_move_insn (lb2, rh1);
40036 emit_move_insn (ub2, rh2);
40037 }
40038
40039 /* Compute LB. */
40040 ix86_emit_move_max (lb1, lb2);
40041 emit_move_insn (rh1, lb1);
40042
40043 /* Compute UB. UB is stored in 1's complement form. Therefore
40044 we also use max here. */
40045 ix86_emit_move_max (ub1, ub2);
40046 emit_move_insn (rh2, ub1);
40047
40048 emit_move_insn (target, res);
40049
40050 return target;
40051 }
40052
40053 case IX86_BUILTIN_SIZEOF:
40054 {
40055 tree name;
40056 rtx symbol;
40057
40058 if (!target
40059 || GET_MODE (target) != Pmode
40060 || !register_operand (target, Pmode))
40061 target = gen_reg_rtx (Pmode);
40062
40063 arg0 = CALL_EXPR_ARG (exp, 0);
40064 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40065
40066 name = DECL_ASSEMBLER_NAME (arg0);
40067 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40068
40069 emit_insn (Pmode == SImode
40070 ? gen_move_size_reloc_si (target, symbol)
40071 : gen_move_size_reloc_di (target, symbol));
40072
40073 return target;
40074 }
40075
40076 case IX86_BUILTIN_BNDLOWER:
40077 {
40078 rtx mem, hmem;
40079
40080 if (!target
40081 || GET_MODE (target) != Pmode
40082 || !register_operand (target, Pmode))
40083 target = gen_reg_rtx (Pmode);
40084
40085 arg0 = CALL_EXPR_ARG (exp, 0);
40086 op0 = expand_normal (arg0);
40087
40088 /* We need to move bounds to memory first. */
40089 if (MEM_P (op0))
40090 mem = op0;
40091 else
40092 {
40093 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40094 emit_move_insn (mem, op0);
40095 }
40096
40097 /* Generate mem expression to access LB and load it. */
40098 hmem = adjust_address (mem, Pmode, 0);
40099 emit_move_insn (target, hmem);
40100
40101 return target;
40102 }
40103
40104 case IX86_BUILTIN_BNDUPPER:
40105 {
40106 rtx mem, hmem, res;
40107
40108 if (!target
40109 || GET_MODE (target) != Pmode
40110 || !register_operand (target, Pmode))
40111 target = gen_reg_rtx (Pmode);
40112
40113 arg0 = CALL_EXPR_ARG (exp, 0);
40114 op0 = expand_normal (arg0);
40115
40116 /* We need to move bounds to memory first. */
40117 if (MEM_P (op0))
40118 mem = op0;
40119 else
40120 {
40121 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40122 emit_move_insn (mem, op0);
40123 }
40124
40125 /* Generate mem expression to access UB. */
40126 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40127
40128 /* We need to inverse all bits of UB. */
40129 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40130
40131 if (res != target)
40132 emit_move_insn (target, res);
40133
40134 return target;
40135 }
40136
40137 case IX86_BUILTIN_MASKMOVQ:
40138 case IX86_BUILTIN_MASKMOVDQU:
40139 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40140 ? CODE_FOR_mmx_maskmovq
40141 : CODE_FOR_sse2_maskmovdqu);
40142 /* Note the arg order is different from the operand order. */
40143 arg1 = CALL_EXPR_ARG (exp, 0);
40144 arg2 = CALL_EXPR_ARG (exp, 1);
40145 arg0 = CALL_EXPR_ARG (exp, 2);
40146 op0 = expand_normal (arg0);
40147 op1 = expand_normal (arg1);
40148 op2 = expand_normal (arg2);
40149 mode0 = insn_data[icode].operand[0].mode;
40150 mode1 = insn_data[icode].operand[1].mode;
40151 mode2 = insn_data[icode].operand[2].mode;
40152
40153 op0 = ix86_zero_extend_to_Pmode (op0);
40154 op0 = gen_rtx_MEM (mode1, op0);
40155
40156 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40157 op0 = copy_to_mode_reg (mode0, op0);
40158 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40159 op1 = copy_to_mode_reg (mode1, op1);
40160 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40161 op2 = copy_to_mode_reg (mode2, op2);
40162 pat = GEN_FCN (icode) (op0, op1, op2);
40163 if (! pat)
40164 return 0;
40165 emit_insn (pat);
40166 return 0;
40167
40168 case IX86_BUILTIN_LDMXCSR:
40169 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40170 target = assign_386_stack_local (SImode, SLOT_TEMP);
40171 emit_move_insn (target, op0);
40172 emit_insn (gen_sse_ldmxcsr (target));
40173 return 0;
40174
40175 case IX86_BUILTIN_STMXCSR:
40176 target = assign_386_stack_local (SImode, SLOT_TEMP);
40177 emit_insn (gen_sse_stmxcsr (target));
40178 return copy_to_mode_reg (SImode, target);
40179
40180 case IX86_BUILTIN_CLFLUSH:
40181 arg0 = CALL_EXPR_ARG (exp, 0);
40182 op0 = expand_normal (arg0);
40183 icode = CODE_FOR_sse2_clflush;
40184 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40185 op0 = ix86_zero_extend_to_Pmode (op0);
40186
40187 emit_insn (gen_sse2_clflush (op0));
40188 return 0;
40189
40190 case IX86_BUILTIN_CLWB:
40191 arg0 = CALL_EXPR_ARG (exp, 0);
40192 op0 = expand_normal (arg0);
40193 icode = CODE_FOR_clwb;
40194 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40195 op0 = ix86_zero_extend_to_Pmode (op0);
40196
40197 emit_insn (gen_clwb (op0));
40198 return 0;
40199
40200 case IX86_BUILTIN_CLFLUSHOPT:
40201 arg0 = CALL_EXPR_ARG (exp, 0);
40202 op0 = expand_normal (arg0);
40203 icode = CODE_FOR_clflushopt;
40204 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40205 op0 = ix86_zero_extend_to_Pmode (op0);
40206
40207 emit_insn (gen_clflushopt (op0));
40208 return 0;
40209
40210 case IX86_BUILTIN_MONITOR:
40211 case IX86_BUILTIN_MONITORX:
40212 arg0 = CALL_EXPR_ARG (exp, 0);
40213 arg1 = CALL_EXPR_ARG (exp, 1);
40214 arg2 = CALL_EXPR_ARG (exp, 2);
40215 op0 = expand_normal (arg0);
40216 op1 = expand_normal (arg1);
40217 op2 = expand_normal (arg2);
40218 if (!REG_P (op0))
40219 op0 = ix86_zero_extend_to_Pmode (op0);
40220 if (!REG_P (op1))
40221 op1 = copy_to_mode_reg (SImode, op1);
40222 if (!REG_P (op2))
40223 op2 = copy_to_mode_reg (SImode, op2);
40224
40225 emit_insn (fcode == IX86_BUILTIN_MONITOR
40226 ? ix86_gen_monitor (op0, op1, op2)
40227 : ix86_gen_monitorx (op0, op1, op2));
40228 return 0;
40229
40230 case IX86_BUILTIN_MWAIT:
40231 arg0 = CALL_EXPR_ARG (exp, 0);
40232 arg1 = CALL_EXPR_ARG (exp, 1);
40233 op0 = expand_normal (arg0);
40234 op1 = expand_normal (arg1);
40235 if (!REG_P (op0))
40236 op0 = copy_to_mode_reg (SImode, op0);
40237 if (!REG_P (op1))
40238 op1 = copy_to_mode_reg (SImode, op1);
40239 emit_insn (gen_sse3_mwait (op0, op1));
40240 return 0;
40241
40242 case IX86_BUILTIN_MWAITX:
40243 arg0 = CALL_EXPR_ARG (exp, 0);
40244 arg1 = CALL_EXPR_ARG (exp, 1);
40245 arg2 = CALL_EXPR_ARG (exp, 2);
40246 op0 = expand_normal (arg0);
40247 op1 = expand_normal (arg1);
40248 op2 = expand_normal (arg2);
40249 if (!REG_P (op0))
40250 op0 = copy_to_mode_reg (SImode, op0);
40251 if (!REG_P (op1))
40252 op1 = copy_to_mode_reg (SImode, op1);
40253 if (!REG_P (op2))
40254 op2 = copy_to_mode_reg (SImode, op2);
40255 emit_insn (gen_mwaitx (op0, op1, op2));
40256 return 0;
40257
40258 case IX86_BUILTIN_VEC_INIT_V2SI:
40259 case IX86_BUILTIN_VEC_INIT_V4HI:
40260 case IX86_BUILTIN_VEC_INIT_V8QI:
40261 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40262
40263 case IX86_BUILTIN_VEC_EXT_V2DF:
40264 case IX86_BUILTIN_VEC_EXT_V2DI:
40265 case IX86_BUILTIN_VEC_EXT_V4SF:
40266 case IX86_BUILTIN_VEC_EXT_V4SI:
40267 case IX86_BUILTIN_VEC_EXT_V8HI:
40268 case IX86_BUILTIN_VEC_EXT_V2SI:
40269 case IX86_BUILTIN_VEC_EXT_V4HI:
40270 case IX86_BUILTIN_VEC_EXT_V16QI:
40271 return ix86_expand_vec_ext_builtin (exp, target);
40272
40273 case IX86_BUILTIN_VEC_SET_V2DI:
40274 case IX86_BUILTIN_VEC_SET_V4SF:
40275 case IX86_BUILTIN_VEC_SET_V4SI:
40276 case IX86_BUILTIN_VEC_SET_V8HI:
40277 case IX86_BUILTIN_VEC_SET_V4HI:
40278 case IX86_BUILTIN_VEC_SET_V16QI:
40279 return ix86_expand_vec_set_builtin (exp);
40280
40281 case IX86_BUILTIN_INFQ:
40282 case IX86_BUILTIN_HUGE_VALQ:
40283 {
40284 REAL_VALUE_TYPE inf;
40285 rtx tmp;
40286
40287 real_inf (&inf);
40288 tmp = const_double_from_real_value (inf, mode);
40289
40290 tmp = validize_mem (force_const_mem (mode, tmp));
40291
40292 if (target == 0)
40293 target = gen_reg_rtx (mode);
40294
40295 emit_move_insn (target, tmp);
40296 return target;
40297 }
40298
40299 case IX86_BUILTIN_RDPMC:
40300 case IX86_BUILTIN_RDTSC:
40301 case IX86_BUILTIN_RDTSCP:
40302
40303 op0 = gen_reg_rtx (DImode);
40304 op1 = gen_reg_rtx (DImode);
40305
40306 if (fcode == IX86_BUILTIN_RDPMC)
40307 {
40308 arg0 = CALL_EXPR_ARG (exp, 0);
40309 op2 = expand_normal (arg0);
40310 if (!register_operand (op2, SImode))
40311 op2 = copy_to_mode_reg (SImode, op2);
40312
40313 insn = (TARGET_64BIT
40314 ? gen_rdpmc_rex64 (op0, op1, op2)
40315 : gen_rdpmc (op0, op2));
40316 emit_insn (insn);
40317 }
40318 else if (fcode == IX86_BUILTIN_RDTSC)
40319 {
40320 insn = (TARGET_64BIT
40321 ? gen_rdtsc_rex64 (op0, op1)
40322 : gen_rdtsc (op0));
40323 emit_insn (insn);
40324 }
40325 else
40326 {
40327 op2 = gen_reg_rtx (SImode);
40328
40329 insn = (TARGET_64BIT
40330 ? gen_rdtscp_rex64 (op0, op1, op2)
40331 : gen_rdtscp (op0, op2));
40332 emit_insn (insn);
40333
40334 arg0 = CALL_EXPR_ARG (exp, 0);
40335 op4 = expand_normal (arg0);
40336 if (!address_operand (op4, VOIDmode))
40337 {
40338 op4 = convert_memory_address (Pmode, op4);
40339 op4 = copy_addr_to_reg (op4);
40340 }
40341 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40342 }
40343
40344 if (target == 0)
40345 {
40346 /* mode is VOIDmode if __builtin_rd* has been called
40347 without lhs. */
40348 if (mode == VOIDmode)
40349 return target;
40350 target = gen_reg_rtx (mode);
40351 }
40352
40353 if (TARGET_64BIT)
40354 {
40355 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40356 op1, 1, OPTAB_DIRECT);
40357 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40358 op0, 1, OPTAB_DIRECT);
40359 }
40360
40361 emit_move_insn (target, op0);
40362 return target;
40363
40364 case IX86_BUILTIN_FXSAVE:
40365 case IX86_BUILTIN_FXRSTOR:
40366 case IX86_BUILTIN_FXSAVE64:
40367 case IX86_BUILTIN_FXRSTOR64:
40368 case IX86_BUILTIN_FNSTENV:
40369 case IX86_BUILTIN_FLDENV:
40370 mode0 = BLKmode;
40371 switch (fcode)
40372 {
40373 case IX86_BUILTIN_FXSAVE:
40374 icode = CODE_FOR_fxsave;
40375 break;
40376 case IX86_BUILTIN_FXRSTOR:
40377 icode = CODE_FOR_fxrstor;
40378 break;
40379 case IX86_BUILTIN_FXSAVE64:
40380 icode = CODE_FOR_fxsave64;
40381 break;
40382 case IX86_BUILTIN_FXRSTOR64:
40383 icode = CODE_FOR_fxrstor64;
40384 break;
40385 case IX86_BUILTIN_FNSTENV:
40386 icode = CODE_FOR_fnstenv;
40387 break;
40388 case IX86_BUILTIN_FLDENV:
40389 icode = CODE_FOR_fldenv;
40390 break;
40391 default:
40392 gcc_unreachable ();
40393 }
40394
40395 arg0 = CALL_EXPR_ARG (exp, 0);
40396 op0 = expand_normal (arg0);
40397
40398 if (!address_operand (op0, VOIDmode))
40399 {
40400 op0 = convert_memory_address (Pmode, op0);
40401 op0 = copy_addr_to_reg (op0);
40402 }
40403 op0 = gen_rtx_MEM (mode0, op0);
40404
40405 pat = GEN_FCN (icode) (op0);
40406 if (pat)
40407 emit_insn (pat);
40408 return 0;
40409
40410 case IX86_BUILTIN_XSAVE:
40411 case IX86_BUILTIN_XRSTOR:
40412 case IX86_BUILTIN_XSAVE64:
40413 case IX86_BUILTIN_XRSTOR64:
40414 case IX86_BUILTIN_XSAVEOPT:
40415 case IX86_BUILTIN_XSAVEOPT64:
40416 case IX86_BUILTIN_XSAVES:
40417 case IX86_BUILTIN_XRSTORS:
40418 case IX86_BUILTIN_XSAVES64:
40419 case IX86_BUILTIN_XRSTORS64:
40420 case IX86_BUILTIN_XSAVEC:
40421 case IX86_BUILTIN_XSAVEC64:
40422 arg0 = CALL_EXPR_ARG (exp, 0);
40423 arg1 = CALL_EXPR_ARG (exp, 1);
40424 op0 = expand_normal (arg0);
40425 op1 = expand_normal (arg1);
40426
40427 if (!address_operand (op0, VOIDmode))
40428 {
40429 op0 = convert_memory_address (Pmode, op0);
40430 op0 = copy_addr_to_reg (op0);
40431 }
40432 op0 = gen_rtx_MEM (BLKmode, op0);
40433
40434 op1 = force_reg (DImode, op1);
40435
40436 if (TARGET_64BIT)
40437 {
40438 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40439 NULL, 1, OPTAB_DIRECT);
40440 switch (fcode)
40441 {
40442 case IX86_BUILTIN_XSAVE:
40443 icode = CODE_FOR_xsave_rex64;
40444 break;
40445 case IX86_BUILTIN_XRSTOR:
40446 icode = CODE_FOR_xrstor_rex64;
40447 break;
40448 case IX86_BUILTIN_XSAVE64:
40449 icode = CODE_FOR_xsave64;
40450 break;
40451 case IX86_BUILTIN_XRSTOR64:
40452 icode = CODE_FOR_xrstor64;
40453 break;
40454 case IX86_BUILTIN_XSAVEOPT:
40455 icode = CODE_FOR_xsaveopt_rex64;
40456 break;
40457 case IX86_BUILTIN_XSAVEOPT64:
40458 icode = CODE_FOR_xsaveopt64;
40459 break;
40460 case IX86_BUILTIN_XSAVES:
40461 icode = CODE_FOR_xsaves_rex64;
40462 break;
40463 case IX86_BUILTIN_XRSTORS:
40464 icode = CODE_FOR_xrstors_rex64;
40465 break;
40466 case IX86_BUILTIN_XSAVES64:
40467 icode = CODE_FOR_xsaves64;
40468 break;
40469 case IX86_BUILTIN_XRSTORS64:
40470 icode = CODE_FOR_xrstors64;
40471 break;
40472 case IX86_BUILTIN_XSAVEC:
40473 icode = CODE_FOR_xsavec_rex64;
40474 break;
40475 case IX86_BUILTIN_XSAVEC64:
40476 icode = CODE_FOR_xsavec64;
40477 break;
40478 default:
40479 gcc_unreachable ();
40480 }
40481
40482 op2 = gen_lowpart (SImode, op2);
40483 op1 = gen_lowpart (SImode, op1);
40484 pat = GEN_FCN (icode) (op0, op1, op2);
40485 }
40486 else
40487 {
40488 switch (fcode)
40489 {
40490 case IX86_BUILTIN_XSAVE:
40491 icode = CODE_FOR_xsave;
40492 break;
40493 case IX86_BUILTIN_XRSTOR:
40494 icode = CODE_FOR_xrstor;
40495 break;
40496 case IX86_BUILTIN_XSAVEOPT:
40497 icode = CODE_FOR_xsaveopt;
40498 break;
40499 case IX86_BUILTIN_XSAVES:
40500 icode = CODE_FOR_xsaves;
40501 break;
40502 case IX86_BUILTIN_XRSTORS:
40503 icode = CODE_FOR_xrstors;
40504 break;
40505 case IX86_BUILTIN_XSAVEC:
40506 icode = CODE_FOR_xsavec;
40507 break;
40508 default:
40509 gcc_unreachable ();
40510 }
40511 pat = GEN_FCN (icode) (op0, op1);
40512 }
40513
40514 if (pat)
40515 emit_insn (pat);
40516 return 0;
40517
40518 case IX86_BUILTIN_LLWPCB:
40519 arg0 = CALL_EXPR_ARG (exp, 0);
40520 op0 = expand_normal (arg0);
40521 icode = CODE_FOR_lwp_llwpcb;
40522 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40523 op0 = ix86_zero_extend_to_Pmode (op0);
40524 emit_insn (gen_lwp_llwpcb (op0));
40525 return 0;
40526
40527 case IX86_BUILTIN_SLWPCB:
40528 icode = CODE_FOR_lwp_slwpcb;
40529 if (!target
40530 || !insn_data[icode].operand[0].predicate (target, Pmode))
40531 target = gen_reg_rtx (Pmode);
40532 emit_insn (gen_lwp_slwpcb (target));
40533 return target;
40534
40535 case IX86_BUILTIN_BEXTRI32:
40536 case IX86_BUILTIN_BEXTRI64:
40537 arg0 = CALL_EXPR_ARG (exp, 0);
40538 arg1 = CALL_EXPR_ARG (exp, 1);
40539 op0 = expand_normal (arg0);
40540 op1 = expand_normal (arg1);
40541 icode = (fcode == IX86_BUILTIN_BEXTRI32
40542 ? CODE_FOR_tbm_bextri_si
40543 : CODE_FOR_tbm_bextri_di);
40544 if (!CONST_INT_P (op1))
40545 {
40546 error ("last argument must be an immediate");
40547 return const0_rtx;
40548 }
40549 else
40550 {
40551 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
40552 unsigned char lsb_index = INTVAL (op1) & 0xFF;
40553 op1 = GEN_INT (length);
40554 op2 = GEN_INT (lsb_index);
40555 pat = GEN_FCN (icode) (target, op0, op1, op2);
40556 if (pat)
40557 emit_insn (pat);
40558 return target;
40559 }
40560
40561 case IX86_BUILTIN_RDRAND16_STEP:
40562 icode = CODE_FOR_rdrandhi_1;
40563 mode0 = HImode;
40564 goto rdrand_step;
40565
40566 case IX86_BUILTIN_RDRAND32_STEP:
40567 icode = CODE_FOR_rdrandsi_1;
40568 mode0 = SImode;
40569 goto rdrand_step;
40570
40571 case IX86_BUILTIN_RDRAND64_STEP:
40572 icode = CODE_FOR_rdranddi_1;
40573 mode0 = DImode;
40574
40575 rdrand_step:
40576 op0 = gen_reg_rtx (mode0);
40577 emit_insn (GEN_FCN (icode) (op0));
40578
40579 arg0 = CALL_EXPR_ARG (exp, 0);
40580 op1 = expand_normal (arg0);
40581 if (!address_operand (op1, VOIDmode))
40582 {
40583 op1 = convert_memory_address (Pmode, op1);
40584 op1 = copy_addr_to_reg (op1);
40585 }
40586 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
40587
40588 op1 = gen_reg_rtx (SImode);
40589 emit_move_insn (op1, CONST1_RTX (SImode));
40590
40591 /* Emit SImode conditional move. */
40592 if (mode0 == HImode)
40593 {
40594 op2 = gen_reg_rtx (SImode);
40595 emit_insn (gen_zero_extendhisi2 (op2, op0));
40596 }
40597 else if (mode0 == SImode)
40598 op2 = op0;
40599 else
40600 op2 = gen_rtx_SUBREG (SImode, op0, 0);
40601
40602 if (target == 0
40603 || !register_operand (target, SImode))
40604 target = gen_reg_rtx (SImode);
40605
40606 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
40607 const0_rtx);
40608 emit_insn (gen_rtx_SET (target,
40609 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
40610 return target;
40611
40612 case IX86_BUILTIN_RDSEED16_STEP:
40613 icode = CODE_FOR_rdseedhi_1;
40614 mode0 = HImode;
40615 goto rdseed_step;
40616
40617 case IX86_BUILTIN_RDSEED32_STEP:
40618 icode = CODE_FOR_rdseedsi_1;
40619 mode0 = SImode;
40620 goto rdseed_step;
40621
40622 case IX86_BUILTIN_RDSEED64_STEP:
40623 icode = CODE_FOR_rdseeddi_1;
40624 mode0 = DImode;
40625
40626 rdseed_step:
40627 op0 = gen_reg_rtx (mode0);
40628 emit_insn (GEN_FCN (icode) (op0));
40629
40630 arg0 = CALL_EXPR_ARG (exp, 0);
40631 op1 = expand_normal (arg0);
40632 if (!address_operand (op1, VOIDmode))
40633 {
40634 op1 = convert_memory_address (Pmode, op1);
40635 op1 = copy_addr_to_reg (op1);
40636 }
40637 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
40638
40639 op2 = gen_reg_rtx (QImode);
40640
40641 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
40642 const0_rtx);
40643 emit_insn (gen_rtx_SET (op2, pat));
40644
40645 if (target == 0
40646 || !register_operand (target, SImode))
40647 target = gen_reg_rtx (SImode);
40648
40649 emit_insn (gen_zero_extendqisi2 (target, op2));
40650 return target;
40651
40652 case IX86_BUILTIN_SBB32:
40653 icode = CODE_FOR_subborrowsi;
40654 mode0 = SImode;
40655 goto handlecarry;
40656
40657 case IX86_BUILTIN_SBB64:
40658 icode = CODE_FOR_subborrowdi;
40659 mode0 = DImode;
40660 goto handlecarry;
40661
40662 case IX86_BUILTIN_ADDCARRYX32:
40663 icode = CODE_FOR_addcarrysi;
40664 mode0 = SImode;
40665 goto handlecarry;
40666
40667 case IX86_BUILTIN_ADDCARRYX64:
40668 icode = CODE_FOR_addcarrydi;
40669 mode0 = DImode;
40670
40671 handlecarry:
40672 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
40673 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
40674 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
40675 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
40676
40677 op1 = expand_normal (arg0);
40678 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
40679
40680 op2 = expand_normal (arg1);
40681 if (!register_operand (op2, mode0))
40682 op2 = copy_to_mode_reg (mode0, op2);
40683
40684 op3 = expand_normal (arg2);
40685 if (!register_operand (op3, mode0))
40686 op3 = copy_to_mode_reg (mode0, op3);
40687
40688 op4 = expand_normal (arg3);
40689 if (!address_operand (op4, VOIDmode))
40690 {
40691 op4 = convert_memory_address (Pmode, op4);
40692 op4 = copy_addr_to_reg (op4);
40693 }
40694
40695 /* Generate CF from input operand. */
40696 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
40697
40698 /* Generate instruction that consumes CF. */
40699 op0 = gen_reg_rtx (mode0);
40700
40701 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
40702 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
40703 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
40704
40705 /* Return current CF value. */
40706 if (target == 0)
40707 target = gen_reg_rtx (QImode);
40708
40709 PUT_MODE (pat, QImode);
40710 emit_insn (gen_rtx_SET (target, pat));
40711
40712 /* Store the result. */
40713 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
40714
40715 return target;
40716
40717 case IX86_BUILTIN_READ_FLAGS:
40718 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
40719
40720 if (optimize
40721 || target == NULL_RTX
40722 || !nonimmediate_operand (target, word_mode)
40723 || GET_MODE (target) != word_mode)
40724 target = gen_reg_rtx (word_mode);
40725
40726 emit_insn (gen_pop (target));
40727 return target;
40728
40729 case IX86_BUILTIN_WRITE_FLAGS:
40730
40731 arg0 = CALL_EXPR_ARG (exp, 0);
40732 op0 = expand_normal (arg0);
40733 if (!general_no_elim_operand (op0, word_mode))
40734 op0 = copy_to_mode_reg (word_mode, op0);
40735
40736 emit_insn (gen_push (op0));
40737 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
40738 return 0;
40739
40740 case IX86_BUILTIN_KORTESTC16:
40741 icode = CODE_FOR_kortestchi;
40742 mode0 = HImode;
40743 mode1 = CCCmode;
40744 goto kortest;
40745
40746 case IX86_BUILTIN_KORTESTZ16:
40747 icode = CODE_FOR_kortestzhi;
40748 mode0 = HImode;
40749 mode1 = CCZmode;
40750
40751 kortest:
40752 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
40753 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
40754 op0 = expand_normal (arg0);
40755 op1 = expand_normal (arg1);
40756
40757 op0 = copy_to_reg (op0);
40758 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40759 op1 = copy_to_reg (op1);
40760 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
40761
40762 target = gen_reg_rtx (QImode);
40763 emit_insn (gen_rtx_SET (target, const0_rtx));
40764
40765 /* Emit kortest. */
40766 emit_insn (GEN_FCN (icode) (op0, op1));
40767 /* And use setcc to return result from flags. */
40768 ix86_expand_setcc (target, EQ,
40769 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
40770 return target;
40771
40772 case IX86_BUILTIN_GATHERSIV2DF:
40773 icode = CODE_FOR_avx2_gathersiv2df;
40774 goto gather_gen;
40775 case IX86_BUILTIN_GATHERSIV4DF:
40776 icode = CODE_FOR_avx2_gathersiv4df;
40777 goto gather_gen;
40778 case IX86_BUILTIN_GATHERDIV2DF:
40779 icode = CODE_FOR_avx2_gatherdiv2df;
40780 goto gather_gen;
40781 case IX86_BUILTIN_GATHERDIV4DF:
40782 icode = CODE_FOR_avx2_gatherdiv4df;
40783 goto gather_gen;
40784 case IX86_BUILTIN_GATHERSIV4SF:
40785 icode = CODE_FOR_avx2_gathersiv4sf;
40786 goto gather_gen;
40787 case IX86_BUILTIN_GATHERSIV8SF:
40788 icode = CODE_FOR_avx2_gathersiv8sf;
40789 goto gather_gen;
40790 case IX86_BUILTIN_GATHERDIV4SF:
40791 icode = CODE_FOR_avx2_gatherdiv4sf;
40792 goto gather_gen;
40793 case IX86_BUILTIN_GATHERDIV8SF:
40794 icode = CODE_FOR_avx2_gatherdiv8sf;
40795 goto gather_gen;
40796 case IX86_BUILTIN_GATHERSIV2DI:
40797 icode = CODE_FOR_avx2_gathersiv2di;
40798 goto gather_gen;
40799 case IX86_BUILTIN_GATHERSIV4DI:
40800 icode = CODE_FOR_avx2_gathersiv4di;
40801 goto gather_gen;
40802 case IX86_BUILTIN_GATHERDIV2DI:
40803 icode = CODE_FOR_avx2_gatherdiv2di;
40804 goto gather_gen;
40805 case IX86_BUILTIN_GATHERDIV4DI:
40806 icode = CODE_FOR_avx2_gatherdiv4di;
40807 goto gather_gen;
40808 case IX86_BUILTIN_GATHERSIV4SI:
40809 icode = CODE_FOR_avx2_gathersiv4si;
40810 goto gather_gen;
40811 case IX86_BUILTIN_GATHERSIV8SI:
40812 icode = CODE_FOR_avx2_gathersiv8si;
40813 goto gather_gen;
40814 case IX86_BUILTIN_GATHERDIV4SI:
40815 icode = CODE_FOR_avx2_gatherdiv4si;
40816 goto gather_gen;
40817 case IX86_BUILTIN_GATHERDIV8SI:
40818 icode = CODE_FOR_avx2_gatherdiv8si;
40819 goto gather_gen;
40820 case IX86_BUILTIN_GATHERALTSIV4DF:
40821 icode = CODE_FOR_avx2_gathersiv4df;
40822 goto gather_gen;
40823 case IX86_BUILTIN_GATHERALTDIV8SF:
40824 icode = CODE_FOR_avx2_gatherdiv8sf;
40825 goto gather_gen;
40826 case IX86_BUILTIN_GATHERALTSIV4DI:
40827 icode = CODE_FOR_avx2_gathersiv4di;
40828 goto gather_gen;
40829 case IX86_BUILTIN_GATHERALTDIV8SI:
40830 icode = CODE_FOR_avx2_gatherdiv8si;
40831 goto gather_gen;
40832 case IX86_BUILTIN_GATHER3SIV16SF:
40833 icode = CODE_FOR_avx512f_gathersiv16sf;
40834 goto gather_gen;
40835 case IX86_BUILTIN_GATHER3SIV8DF:
40836 icode = CODE_FOR_avx512f_gathersiv8df;
40837 goto gather_gen;
40838 case IX86_BUILTIN_GATHER3DIV16SF:
40839 icode = CODE_FOR_avx512f_gatherdiv16sf;
40840 goto gather_gen;
40841 case IX86_BUILTIN_GATHER3DIV8DF:
40842 icode = CODE_FOR_avx512f_gatherdiv8df;
40843 goto gather_gen;
40844 case IX86_BUILTIN_GATHER3SIV16SI:
40845 icode = CODE_FOR_avx512f_gathersiv16si;
40846 goto gather_gen;
40847 case IX86_BUILTIN_GATHER3SIV8DI:
40848 icode = CODE_FOR_avx512f_gathersiv8di;
40849 goto gather_gen;
40850 case IX86_BUILTIN_GATHER3DIV16SI:
40851 icode = CODE_FOR_avx512f_gatherdiv16si;
40852 goto gather_gen;
40853 case IX86_BUILTIN_GATHER3DIV8DI:
40854 icode = CODE_FOR_avx512f_gatherdiv8di;
40855 goto gather_gen;
40856 case IX86_BUILTIN_GATHER3ALTSIV8DF:
40857 icode = CODE_FOR_avx512f_gathersiv8df;
40858 goto gather_gen;
40859 case IX86_BUILTIN_GATHER3ALTDIV16SF:
40860 icode = CODE_FOR_avx512f_gatherdiv16sf;
40861 goto gather_gen;
40862 case IX86_BUILTIN_GATHER3ALTSIV8DI:
40863 icode = CODE_FOR_avx512f_gathersiv8di;
40864 goto gather_gen;
40865 case IX86_BUILTIN_GATHER3ALTDIV16SI:
40866 icode = CODE_FOR_avx512f_gatherdiv16si;
40867 goto gather_gen;
40868 case IX86_BUILTIN_GATHER3SIV2DF:
40869 icode = CODE_FOR_avx512vl_gathersiv2df;
40870 goto gather_gen;
40871 case IX86_BUILTIN_GATHER3SIV4DF:
40872 icode = CODE_FOR_avx512vl_gathersiv4df;
40873 goto gather_gen;
40874 case IX86_BUILTIN_GATHER3DIV2DF:
40875 icode = CODE_FOR_avx512vl_gatherdiv2df;
40876 goto gather_gen;
40877 case IX86_BUILTIN_GATHER3DIV4DF:
40878 icode = CODE_FOR_avx512vl_gatherdiv4df;
40879 goto gather_gen;
40880 case IX86_BUILTIN_GATHER3SIV4SF:
40881 icode = CODE_FOR_avx512vl_gathersiv4sf;
40882 goto gather_gen;
40883 case IX86_BUILTIN_GATHER3SIV8SF:
40884 icode = CODE_FOR_avx512vl_gathersiv8sf;
40885 goto gather_gen;
40886 case IX86_BUILTIN_GATHER3DIV4SF:
40887 icode = CODE_FOR_avx512vl_gatherdiv4sf;
40888 goto gather_gen;
40889 case IX86_BUILTIN_GATHER3DIV8SF:
40890 icode = CODE_FOR_avx512vl_gatherdiv8sf;
40891 goto gather_gen;
40892 case IX86_BUILTIN_GATHER3SIV2DI:
40893 icode = CODE_FOR_avx512vl_gathersiv2di;
40894 goto gather_gen;
40895 case IX86_BUILTIN_GATHER3SIV4DI:
40896 icode = CODE_FOR_avx512vl_gathersiv4di;
40897 goto gather_gen;
40898 case IX86_BUILTIN_GATHER3DIV2DI:
40899 icode = CODE_FOR_avx512vl_gatherdiv2di;
40900 goto gather_gen;
40901 case IX86_BUILTIN_GATHER3DIV4DI:
40902 icode = CODE_FOR_avx512vl_gatherdiv4di;
40903 goto gather_gen;
40904 case IX86_BUILTIN_GATHER3SIV4SI:
40905 icode = CODE_FOR_avx512vl_gathersiv4si;
40906 goto gather_gen;
40907 case IX86_BUILTIN_GATHER3SIV8SI:
40908 icode = CODE_FOR_avx512vl_gathersiv8si;
40909 goto gather_gen;
40910 case IX86_BUILTIN_GATHER3DIV4SI:
40911 icode = CODE_FOR_avx512vl_gatherdiv4si;
40912 goto gather_gen;
40913 case IX86_BUILTIN_GATHER3DIV8SI:
40914 icode = CODE_FOR_avx512vl_gatherdiv8si;
40915 goto gather_gen;
40916 case IX86_BUILTIN_GATHER3ALTSIV4DF:
40917 icode = CODE_FOR_avx512vl_gathersiv4df;
40918 goto gather_gen;
40919 case IX86_BUILTIN_GATHER3ALTDIV8SF:
40920 icode = CODE_FOR_avx512vl_gatherdiv8sf;
40921 goto gather_gen;
40922 case IX86_BUILTIN_GATHER3ALTSIV4DI:
40923 icode = CODE_FOR_avx512vl_gathersiv4di;
40924 goto gather_gen;
40925 case IX86_BUILTIN_GATHER3ALTDIV8SI:
40926 icode = CODE_FOR_avx512vl_gatherdiv8si;
40927 goto gather_gen;
40928 case IX86_BUILTIN_SCATTERSIV16SF:
40929 icode = CODE_FOR_avx512f_scattersiv16sf;
40930 goto scatter_gen;
40931 case IX86_BUILTIN_SCATTERSIV8DF:
40932 icode = CODE_FOR_avx512f_scattersiv8df;
40933 goto scatter_gen;
40934 case IX86_BUILTIN_SCATTERDIV16SF:
40935 icode = CODE_FOR_avx512f_scatterdiv16sf;
40936 goto scatter_gen;
40937 case IX86_BUILTIN_SCATTERDIV8DF:
40938 icode = CODE_FOR_avx512f_scatterdiv8df;
40939 goto scatter_gen;
40940 case IX86_BUILTIN_SCATTERSIV16SI:
40941 icode = CODE_FOR_avx512f_scattersiv16si;
40942 goto scatter_gen;
40943 case IX86_BUILTIN_SCATTERSIV8DI:
40944 icode = CODE_FOR_avx512f_scattersiv8di;
40945 goto scatter_gen;
40946 case IX86_BUILTIN_SCATTERDIV16SI:
40947 icode = CODE_FOR_avx512f_scatterdiv16si;
40948 goto scatter_gen;
40949 case IX86_BUILTIN_SCATTERDIV8DI:
40950 icode = CODE_FOR_avx512f_scatterdiv8di;
40951 goto scatter_gen;
40952 case IX86_BUILTIN_SCATTERSIV8SF:
40953 icode = CODE_FOR_avx512vl_scattersiv8sf;
40954 goto scatter_gen;
40955 case IX86_BUILTIN_SCATTERSIV4SF:
40956 icode = CODE_FOR_avx512vl_scattersiv4sf;
40957 goto scatter_gen;
40958 case IX86_BUILTIN_SCATTERSIV4DF:
40959 icode = CODE_FOR_avx512vl_scattersiv4df;
40960 goto scatter_gen;
40961 case IX86_BUILTIN_SCATTERSIV2DF:
40962 icode = CODE_FOR_avx512vl_scattersiv2df;
40963 goto scatter_gen;
40964 case IX86_BUILTIN_SCATTERDIV8SF:
40965 icode = CODE_FOR_avx512vl_scatterdiv8sf;
40966 goto scatter_gen;
40967 case IX86_BUILTIN_SCATTERDIV4SF:
40968 icode = CODE_FOR_avx512vl_scatterdiv4sf;
40969 goto scatter_gen;
40970 case IX86_BUILTIN_SCATTERDIV4DF:
40971 icode = CODE_FOR_avx512vl_scatterdiv4df;
40972 goto scatter_gen;
40973 case IX86_BUILTIN_SCATTERDIV2DF:
40974 icode = CODE_FOR_avx512vl_scatterdiv2df;
40975 goto scatter_gen;
40976 case IX86_BUILTIN_SCATTERSIV8SI:
40977 icode = CODE_FOR_avx512vl_scattersiv8si;
40978 goto scatter_gen;
40979 case IX86_BUILTIN_SCATTERSIV4SI:
40980 icode = CODE_FOR_avx512vl_scattersiv4si;
40981 goto scatter_gen;
40982 case IX86_BUILTIN_SCATTERSIV4DI:
40983 icode = CODE_FOR_avx512vl_scattersiv4di;
40984 goto scatter_gen;
40985 case IX86_BUILTIN_SCATTERSIV2DI:
40986 icode = CODE_FOR_avx512vl_scattersiv2di;
40987 goto scatter_gen;
40988 case IX86_BUILTIN_SCATTERDIV8SI:
40989 icode = CODE_FOR_avx512vl_scatterdiv8si;
40990 goto scatter_gen;
40991 case IX86_BUILTIN_SCATTERDIV4SI:
40992 icode = CODE_FOR_avx512vl_scatterdiv4si;
40993 goto scatter_gen;
40994 case IX86_BUILTIN_SCATTERDIV4DI:
40995 icode = CODE_FOR_avx512vl_scatterdiv4di;
40996 goto scatter_gen;
40997 case IX86_BUILTIN_SCATTERDIV2DI:
40998 icode = CODE_FOR_avx512vl_scatterdiv2di;
40999 goto scatter_gen;
41000 case IX86_BUILTIN_GATHERPFDPD:
41001 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41002 goto vec_prefetch_gen;
41003 case IX86_BUILTIN_SCATTERALTSIV8DF:
41004 icode = CODE_FOR_avx512f_scattersiv8df;
41005 goto scatter_gen;
41006 case IX86_BUILTIN_SCATTERALTDIV16SF:
41007 icode = CODE_FOR_avx512f_scatterdiv16sf;
41008 goto scatter_gen;
41009 case IX86_BUILTIN_SCATTERALTSIV8DI:
41010 icode = CODE_FOR_avx512f_scattersiv8di;
41011 goto scatter_gen;
41012 case IX86_BUILTIN_SCATTERALTDIV16SI:
41013 icode = CODE_FOR_avx512f_scatterdiv16si;
41014 goto scatter_gen;
41015 case IX86_BUILTIN_GATHERPFDPS:
41016 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41017 goto vec_prefetch_gen;
41018 case IX86_BUILTIN_GATHERPFQPD:
41019 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41020 goto vec_prefetch_gen;
41021 case IX86_BUILTIN_GATHERPFQPS:
41022 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41023 goto vec_prefetch_gen;
41024 case IX86_BUILTIN_SCATTERPFDPD:
41025 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41026 goto vec_prefetch_gen;
41027 case IX86_BUILTIN_SCATTERPFDPS:
41028 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41029 goto vec_prefetch_gen;
41030 case IX86_BUILTIN_SCATTERPFQPD:
41031 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41032 goto vec_prefetch_gen;
41033 case IX86_BUILTIN_SCATTERPFQPS:
41034 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41035 goto vec_prefetch_gen;
41036
41037 gather_gen:
41038 rtx half;
41039 rtx (*gen) (rtx, rtx);
41040
41041 arg0 = CALL_EXPR_ARG (exp, 0);
41042 arg1 = CALL_EXPR_ARG (exp, 1);
41043 arg2 = CALL_EXPR_ARG (exp, 2);
41044 arg3 = CALL_EXPR_ARG (exp, 3);
41045 arg4 = CALL_EXPR_ARG (exp, 4);
41046 op0 = expand_normal (arg0);
41047 op1 = expand_normal (arg1);
41048 op2 = expand_normal (arg2);
41049 op3 = expand_normal (arg3);
41050 op4 = expand_normal (arg4);
41051 /* Note the arg order is different from the operand order. */
41052 mode0 = insn_data[icode].operand[1].mode;
41053 mode2 = insn_data[icode].operand[3].mode;
41054 mode3 = insn_data[icode].operand[4].mode;
41055 mode4 = insn_data[icode].operand[5].mode;
41056
41057 if (target == NULL_RTX
41058 || GET_MODE (target) != insn_data[icode].operand[0].mode
41059 || !insn_data[icode].operand[0].predicate (target,
41060 GET_MODE (target)))
41061 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41062 else
41063 subtarget = target;
41064
41065 switch (fcode)
41066 {
41067 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41068 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41069 half = gen_reg_rtx (V8SImode);
41070 if (!nonimmediate_operand (op2, V16SImode))
41071 op2 = copy_to_mode_reg (V16SImode, op2);
41072 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41073 op2 = half;
41074 break;
41075 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41076 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41077 case IX86_BUILTIN_GATHERALTSIV4DF:
41078 case IX86_BUILTIN_GATHERALTSIV4DI:
41079 half = gen_reg_rtx (V4SImode);
41080 if (!nonimmediate_operand (op2, V8SImode))
41081 op2 = copy_to_mode_reg (V8SImode, op2);
41082 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41083 op2 = half;
41084 break;
41085 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41086 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41087 half = gen_reg_rtx (mode0);
41088 if (mode0 == V8SFmode)
41089 gen = gen_vec_extract_lo_v16sf;
41090 else
41091 gen = gen_vec_extract_lo_v16si;
41092 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41093 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41094 emit_insn (gen (half, op0));
41095 op0 = half;
41096 if (GET_MODE (op3) != VOIDmode)
41097 {
41098 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41099 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41100 emit_insn (gen (half, op3));
41101 op3 = half;
41102 }
41103 break;
41104 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41105 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41106 case IX86_BUILTIN_GATHERALTDIV8SF:
41107 case IX86_BUILTIN_GATHERALTDIV8SI:
41108 half = gen_reg_rtx (mode0);
41109 if (mode0 == V4SFmode)
41110 gen = gen_vec_extract_lo_v8sf;
41111 else
41112 gen = gen_vec_extract_lo_v8si;
41113 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41114 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41115 emit_insn (gen (half, op0));
41116 op0 = half;
41117 if (GET_MODE (op3) != VOIDmode)
41118 {
41119 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41120 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41121 emit_insn (gen (half, op3));
41122 op3 = half;
41123 }
41124 break;
41125 default:
41126 break;
41127 }
41128
41129 /* Force memory operand only with base register here. But we
41130 don't want to do it on memory operand for other builtin
41131 functions. */
41132 op1 = ix86_zero_extend_to_Pmode (op1);
41133
41134 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41135 op0 = copy_to_mode_reg (mode0, op0);
41136 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41137 op1 = copy_to_mode_reg (Pmode, op1);
41138 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41139 op2 = copy_to_mode_reg (mode2, op2);
41140
41141 op3 = fixup_modeless_constant (op3, mode3);
41142
41143 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41144 {
41145 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41146 op3 = copy_to_mode_reg (mode3, op3);
41147 }
41148 else
41149 {
41150 op3 = copy_to_reg (op3);
41151 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41152 }
41153 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41154 {
41155 error ("the last argument must be scale 1, 2, 4, 8");
41156 return const0_rtx;
41157 }
41158
41159 /* Optimize. If mask is known to have all high bits set,
41160 replace op0 with pc_rtx to signal that the instruction
41161 overwrites the whole destination and doesn't use its
41162 previous contents. */
41163 if (optimize)
41164 {
41165 if (TREE_CODE (arg3) == INTEGER_CST)
41166 {
41167 if (integer_all_onesp (arg3))
41168 op0 = pc_rtx;
41169 }
41170 else if (TREE_CODE (arg3) == VECTOR_CST)
41171 {
41172 unsigned int negative = 0;
41173 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41174 {
41175 tree cst = VECTOR_CST_ELT (arg3, i);
41176 if (TREE_CODE (cst) == INTEGER_CST
41177 && tree_int_cst_sign_bit (cst))
41178 negative++;
41179 else if (TREE_CODE (cst) == REAL_CST
41180 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41181 negative++;
41182 }
41183 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41184 op0 = pc_rtx;
41185 }
41186 else if (TREE_CODE (arg3) == SSA_NAME
41187 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41188 {
41189 /* Recognize also when mask is like:
41190 __v2df src = _mm_setzero_pd ();
41191 __v2df mask = _mm_cmpeq_pd (src, src);
41192 or
41193 __v8sf src = _mm256_setzero_ps ();
41194 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41195 as that is a cheaper way to load all ones into
41196 a register than having to load a constant from
41197 memory. */
41198 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41199 if (is_gimple_call (def_stmt))
41200 {
41201 tree fndecl = gimple_call_fndecl (def_stmt);
41202 if (fndecl
41203 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41204 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41205 {
41206 case IX86_BUILTIN_CMPPD:
41207 case IX86_BUILTIN_CMPPS:
41208 case IX86_BUILTIN_CMPPD256:
41209 case IX86_BUILTIN_CMPPS256:
41210 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41211 break;
41212 /* FALLTHRU */
41213 case IX86_BUILTIN_CMPEQPD:
41214 case IX86_BUILTIN_CMPEQPS:
41215 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41216 && initializer_zerop (gimple_call_arg (def_stmt,
41217 1)))
41218 op0 = pc_rtx;
41219 break;
41220 default:
41221 break;
41222 }
41223 }
41224 }
41225 }
41226
41227 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41228 if (! pat)
41229 return const0_rtx;
41230 emit_insn (pat);
41231
41232 switch (fcode)
41233 {
41234 case IX86_BUILTIN_GATHER3DIV16SF:
41235 if (target == NULL_RTX)
41236 target = gen_reg_rtx (V8SFmode);
41237 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41238 break;
41239 case IX86_BUILTIN_GATHER3DIV16SI:
41240 if (target == NULL_RTX)
41241 target = gen_reg_rtx (V8SImode);
41242 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41243 break;
41244 case IX86_BUILTIN_GATHER3DIV8SF:
41245 case IX86_BUILTIN_GATHERDIV8SF:
41246 if (target == NULL_RTX)
41247 target = gen_reg_rtx (V4SFmode);
41248 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41249 break;
41250 case IX86_BUILTIN_GATHER3DIV8SI:
41251 case IX86_BUILTIN_GATHERDIV8SI:
41252 if (target == NULL_RTX)
41253 target = gen_reg_rtx (V4SImode);
41254 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41255 break;
41256 default:
41257 target = subtarget;
41258 break;
41259 }
41260 return target;
41261
41262 scatter_gen:
41263 arg0 = CALL_EXPR_ARG (exp, 0);
41264 arg1 = CALL_EXPR_ARG (exp, 1);
41265 arg2 = CALL_EXPR_ARG (exp, 2);
41266 arg3 = CALL_EXPR_ARG (exp, 3);
41267 arg4 = CALL_EXPR_ARG (exp, 4);
41268 op0 = expand_normal (arg0);
41269 op1 = expand_normal (arg1);
41270 op2 = expand_normal (arg2);
41271 op3 = expand_normal (arg3);
41272 op4 = expand_normal (arg4);
41273 mode1 = insn_data[icode].operand[1].mode;
41274 mode2 = insn_data[icode].operand[2].mode;
41275 mode3 = insn_data[icode].operand[3].mode;
41276 mode4 = insn_data[icode].operand[4].mode;
41277
41278 /* Scatter instruction stores operand op3 to memory with
41279 indices from op2 and scale from op4 under writemask op1.
41280 If index operand op2 has more elements then source operand
41281 op3 one need to use only its low half. And vice versa. */
41282 switch (fcode)
41283 {
41284 case IX86_BUILTIN_SCATTERALTSIV8DF:
41285 case IX86_BUILTIN_SCATTERALTSIV8DI:
41286 half = gen_reg_rtx (V8SImode);
41287 if (!nonimmediate_operand (op2, V16SImode))
41288 op2 = copy_to_mode_reg (V16SImode, op2);
41289 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41290 op2 = half;
41291 break;
41292 case IX86_BUILTIN_SCATTERALTDIV16SF:
41293 case IX86_BUILTIN_SCATTERALTDIV16SI:
41294 half = gen_reg_rtx (mode3);
41295 if (mode3 == V8SFmode)
41296 gen = gen_vec_extract_lo_v16sf;
41297 else
41298 gen = gen_vec_extract_lo_v16si;
41299 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41300 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41301 emit_insn (gen (half, op3));
41302 op3 = half;
41303 break;
41304 default:
41305 break;
41306 }
41307
41308 /* Force memory operand only with base register here. But we
41309 don't want to do it on memory operand for other builtin
41310 functions. */
41311 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41312
41313 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41314 op0 = copy_to_mode_reg (Pmode, op0);
41315
41316 op1 = fixup_modeless_constant (op1, mode1);
41317
41318 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41319 {
41320 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41321 op1 = copy_to_mode_reg (mode1, op1);
41322 }
41323 else
41324 {
41325 op1 = copy_to_reg (op1);
41326 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41327 }
41328
41329 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41330 op2 = copy_to_mode_reg (mode2, op2);
41331
41332 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41333 op3 = copy_to_mode_reg (mode3, op3);
41334
41335 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41336 {
41337 error ("the last argument must be scale 1, 2, 4, 8");
41338 return const0_rtx;
41339 }
41340
41341 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41342 if (! pat)
41343 return const0_rtx;
41344
41345 emit_insn (pat);
41346 return 0;
41347
41348 vec_prefetch_gen:
41349 arg0 = CALL_EXPR_ARG (exp, 0);
41350 arg1 = CALL_EXPR_ARG (exp, 1);
41351 arg2 = CALL_EXPR_ARG (exp, 2);
41352 arg3 = CALL_EXPR_ARG (exp, 3);
41353 arg4 = CALL_EXPR_ARG (exp, 4);
41354 op0 = expand_normal (arg0);
41355 op1 = expand_normal (arg1);
41356 op2 = expand_normal (arg2);
41357 op3 = expand_normal (arg3);
41358 op4 = expand_normal (arg4);
41359 mode0 = insn_data[icode].operand[0].mode;
41360 mode1 = insn_data[icode].operand[1].mode;
41361 mode3 = insn_data[icode].operand[3].mode;
41362 mode4 = insn_data[icode].operand[4].mode;
41363
41364 op0 = fixup_modeless_constant (op0, mode0);
41365
41366 if (GET_MODE (op0) == mode0
41367 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
41368 {
41369 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41370 op0 = copy_to_mode_reg (mode0, op0);
41371 }
41372 else if (op0 != constm1_rtx)
41373 {
41374 op0 = copy_to_reg (op0);
41375 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41376 }
41377
41378 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41379 op1 = copy_to_mode_reg (mode1, op1);
41380
41381 /* Force memory operand only with base register here. But we
41382 don't want to do it on memory operand for other builtin
41383 functions. */
41384 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41385
41386 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41387 op2 = copy_to_mode_reg (Pmode, op2);
41388
41389 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41390 {
41391 error ("the forth argument must be scale 1, 2, 4, 8");
41392 return const0_rtx;
41393 }
41394
41395 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41396 {
41397 error ("incorrect hint operand");
41398 return const0_rtx;
41399 }
41400
41401 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41402 if (! pat)
41403 return const0_rtx;
41404
41405 emit_insn (pat);
41406
41407 return 0;
41408
41409 case IX86_BUILTIN_XABORT:
41410 icode = CODE_FOR_xabort;
41411 arg0 = CALL_EXPR_ARG (exp, 0);
41412 op0 = expand_normal (arg0);
41413 mode0 = insn_data[icode].operand[0].mode;
41414 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41415 {
41416 error ("the xabort's argument must be an 8-bit immediate");
41417 return const0_rtx;
41418 }
41419 emit_insn (gen_xabort (op0));
41420 return 0;
41421
41422 default:
41423 break;
41424 }
41425
41426 for (i = 0, d = bdesc_special_args;
41427 i < ARRAY_SIZE (bdesc_special_args);
41428 i++, d++)
41429 if (d->code == fcode)
41430 return ix86_expand_special_args_builtin (d, exp, target);
41431
41432 for (i = 0, d = bdesc_args;
41433 i < ARRAY_SIZE (bdesc_args);
41434 i++, d++)
41435 if (d->code == fcode)
41436 switch (fcode)
41437 {
41438 case IX86_BUILTIN_FABSQ:
41439 case IX86_BUILTIN_COPYSIGNQ:
41440 if (!TARGET_SSE)
41441 /* Emit a normal call if SSE isn't available. */
41442 return expand_call (exp, target, ignore);
41443 default:
41444 return ix86_expand_args_builtin (d, exp, target);
41445 }
41446
41447 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41448 if (d->code == fcode)
41449 return ix86_expand_sse_comi (d, exp, target);
41450
41451 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41452 if (d->code == fcode)
41453 return ix86_expand_round_builtin (d, exp, target);
41454
41455 for (i = 0, d = bdesc_pcmpestr;
41456 i < ARRAY_SIZE (bdesc_pcmpestr);
41457 i++, d++)
41458 if (d->code == fcode)
41459 return ix86_expand_sse_pcmpestr (d, exp, target);
41460
41461 for (i = 0, d = bdesc_pcmpistr;
41462 i < ARRAY_SIZE (bdesc_pcmpistr);
41463 i++, d++)
41464 if (d->code == fcode)
41465 return ix86_expand_sse_pcmpistr (d, exp, target);
41466
41467 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41468 if (d->code == fcode)
41469 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41470 (enum ix86_builtin_func_type)
41471 d->flag, d->comparison);
41472
41473 gcc_unreachable ();
41474 }
41475
41476 /* This returns the target-specific builtin with code CODE if
41477 current_function_decl has visibility on this builtin, which is checked
41478 using isa flags. Returns NULL_TREE otherwise. */
41479
41480 static tree ix86_get_builtin (enum ix86_builtins code)
41481 {
41482 struct cl_target_option *opts;
41483 tree target_tree = NULL_TREE;
41484
41485 /* Determine the isa flags of current_function_decl. */
41486
41487 if (current_function_decl)
41488 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41489
41490 if (target_tree == NULL)
41491 target_tree = target_option_default_node;
41492
41493 opts = TREE_TARGET_OPTION (target_tree);
41494
41495 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41496 return ix86_builtin_decl (code, true);
41497 else
41498 return NULL_TREE;
41499 }
41500
41501 /* Return function decl for target specific builtin
41502 for given MPX builtin passed i FCODE. */
41503 static tree
41504 ix86_builtin_mpx_function (unsigned fcode)
41505 {
41506 switch (fcode)
41507 {
41508 case BUILT_IN_CHKP_BNDMK:
41509 return ix86_builtins[IX86_BUILTIN_BNDMK];
41510
41511 case BUILT_IN_CHKP_BNDSTX:
41512 return ix86_builtins[IX86_BUILTIN_BNDSTX];
41513
41514 case BUILT_IN_CHKP_BNDLDX:
41515 return ix86_builtins[IX86_BUILTIN_BNDLDX];
41516
41517 case BUILT_IN_CHKP_BNDCL:
41518 return ix86_builtins[IX86_BUILTIN_BNDCL];
41519
41520 case BUILT_IN_CHKP_BNDCU:
41521 return ix86_builtins[IX86_BUILTIN_BNDCU];
41522
41523 case BUILT_IN_CHKP_BNDRET:
41524 return ix86_builtins[IX86_BUILTIN_BNDRET];
41525
41526 case BUILT_IN_CHKP_INTERSECT:
41527 return ix86_builtins[IX86_BUILTIN_BNDINT];
41528
41529 case BUILT_IN_CHKP_NARROW:
41530 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
41531
41532 case BUILT_IN_CHKP_SIZEOF:
41533 return ix86_builtins[IX86_BUILTIN_SIZEOF];
41534
41535 case BUILT_IN_CHKP_EXTRACT_LOWER:
41536 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
41537
41538 case BUILT_IN_CHKP_EXTRACT_UPPER:
41539 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
41540
41541 default:
41542 return NULL_TREE;
41543 }
41544
41545 gcc_unreachable ();
41546 }
41547
41548 /* Helper function for ix86_load_bounds and ix86_store_bounds.
41549
41550 Return an address to be used to load/store bounds for pointer
41551 passed in SLOT.
41552
41553 SLOT_NO is an integer constant holding number of a target
41554 dependent special slot to be used in case SLOT is not a memory.
41555
41556 SPECIAL_BASE is a pointer to be used as a base of fake address
41557 to access special slots in Bounds Table. SPECIAL_BASE[-1],
41558 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
41559
41560 static rtx
41561 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
41562 {
41563 rtx addr = NULL;
41564
41565 /* NULL slot means we pass bounds for pointer not passed to the
41566 function at all. Register slot means we pass pointer in a
41567 register. In both these cases bounds are passed via Bounds
41568 Table. Since we do not have actual pointer stored in memory,
41569 we have to use fake addresses to access Bounds Table. We
41570 start with (special_base - sizeof (void*)) and decrease this
41571 address by pointer size to get addresses for other slots. */
41572 if (!slot || REG_P (slot))
41573 {
41574 gcc_assert (CONST_INT_P (slot_no));
41575 addr = plus_constant (Pmode, special_base,
41576 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
41577 }
41578 /* If pointer is passed in a memory then its address is used to
41579 access Bounds Table. */
41580 else if (MEM_P (slot))
41581 {
41582 addr = XEXP (slot, 0);
41583 if (!register_operand (addr, Pmode))
41584 addr = copy_addr_to_reg (addr);
41585 }
41586 else
41587 gcc_unreachable ();
41588
41589 return addr;
41590 }
41591
41592 /* Expand pass uses this hook to load bounds for function parameter
41593 PTR passed in SLOT in case its bounds are not passed in a register.
41594
41595 If SLOT is a memory, then bounds are loaded as for regular pointer
41596 loaded from memory. PTR may be NULL in case SLOT is a memory.
41597 In such case value of PTR (if required) may be loaded from SLOT.
41598
41599 If SLOT is NULL or a register then SLOT_NO is an integer constant
41600 holding number of the target dependent special slot which should be
41601 used to obtain bounds.
41602
41603 Return loaded bounds. */
41604
41605 static rtx
41606 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
41607 {
41608 rtx reg = gen_reg_rtx (BNDmode);
41609 rtx addr;
41610
41611 /* Get address to be used to access Bounds Table. Special slots start
41612 at the location of return address of the current function. */
41613 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
41614
41615 /* Load pointer value from a memory if we don't have it. */
41616 if (!ptr)
41617 {
41618 gcc_assert (MEM_P (slot));
41619 ptr = copy_addr_to_reg (slot);
41620 }
41621
41622 if (!register_operand (ptr, Pmode))
41623 ptr = ix86_zero_extend_to_Pmode (ptr);
41624
41625 emit_insn (BNDmode == BND64mode
41626 ? gen_bnd64_ldx (reg, addr, ptr)
41627 : gen_bnd32_ldx (reg, addr, ptr));
41628
41629 return reg;
41630 }
41631
41632 /* Expand pass uses this hook to store BOUNDS for call argument PTR
41633 passed in SLOT in case BOUNDS are not passed in a register.
41634
41635 If SLOT is a memory, then BOUNDS are stored as for regular pointer
41636 stored in memory. PTR may be NULL in case SLOT is a memory.
41637 In such case value of PTR (if required) may be loaded from SLOT.
41638
41639 If SLOT is NULL or a register then SLOT_NO is an integer constant
41640 holding number of the target dependent special slot which should be
41641 used to store BOUNDS. */
41642
41643 static void
41644 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
41645 {
41646 rtx addr;
41647
41648 /* Get address to be used to access Bounds Table. Special slots start
41649 at the location of return address of a called function. */
41650 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
41651
41652 /* Load pointer value from a memory if we don't have it. */
41653 if (!ptr)
41654 {
41655 gcc_assert (MEM_P (slot));
41656 ptr = copy_addr_to_reg (slot);
41657 }
41658
41659 if (!register_operand (ptr, Pmode))
41660 ptr = ix86_zero_extend_to_Pmode (ptr);
41661
41662 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
41663 if (!register_operand (bounds, BNDmode))
41664 bounds = copy_to_mode_reg (BNDmode, bounds);
41665
41666 emit_insn (BNDmode == BND64mode
41667 ? gen_bnd64_stx (addr, ptr, bounds)
41668 : gen_bnd32_stx (addr, ptr, bounds));
41669 }
41670
41671 /* Load and return bounds returned by function in SLOT. */
41672
41673 static rtx
41674 ix86_load_returned_bounds (rtx slot)
41675 {
41676 rtx res;
41677
41678 gcc_assert (REG_P (slot));
41679 res = gen_reg_rtx (BNDmode);
41680 emit_move_insn (res, slot);
41681
41682 return res;
41683 }
41684
41685 /* Store BOUNDS returned by function into SLOT. */
41686
41687 static void
41688 ix86_store_returned_bounds (rtx slot, rtx bounds)
41689 {
41690 gcc_assert (REG_P (slot));
41691 emit_move_insn (slot, bounds);
41692 }
41693
41694 /* Returns a function decl for a vectorized version of the builtin function
41695 with builtin function code FN and the result vector type TYPE, or NULL_TREE
41696 if it is not available. */
41697
41698 static tree
41699 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
41700 tree type_in)
41701 {
41702 machine_mode in_mode, out_mode;
41703 int in_n, out_n;
41704 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
41705
41706 if (TREE_CODE (type_out) != VECTOR_TYPE
41707 || TREE_CODE (type_in) != VECTOR_TYPE
41708 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
41709 return NULL_TREE;
41710
41711 out_mode = TYPE_MODE (TREE_TYPE (type_out));
41712 out_n = TYPE_VECTOR_SUBPARTS (type_out);
41713 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41714 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41715
41716 switch (fn)
41717 {
41718 case BUILT_IN_SQRT:
41719 if (out_mode == DFmode && in_mode == DFmode)
41720 {
41721 if (out_n == 2 && in_n == 2)
41722 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
41723 else if (out_n == 4 && in_n == 4)
41724 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
41725 else if (out_n == 8 && in_n == 8)
41726 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
41727 }
41728 break;
41729
41730 case BUILT_IN_EXP2F:
41731 if (out_mode == SFmode && in_mode == SFmode)
41732 {
41733 if (out_n == 16 && in_n == 16)
41734 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
41735 }
41736 break;
41737
41738 case BUILT_IN_SQRTF:
41739 if (out_mode == SFmode && in_mode == SFmode)
41740 {
41741 if (out_n == 4 && in_n == 4)
41742 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
41743 else if (out_n == 8 && in_n == 8)
41744 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
41745 else if (out_n == 16 && in_n == 16)
41746 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
41747 }
41748 break;
41749
41750 case BUILT_IN_IFLOOR:
41751 case BUILT_IN_LFLOOR:
41752 case BUILT_IN_LLFLOOR:
41753 /* The round insn does not trap on denormals. */
41754 if (flag_trapping_math || !TARGET_ROUND)
41755 break;
41756
41757 if (out_mode == SImode && in_mode == DFmode)
41758 {
41759 if (out_n == 4 && in_n == 2)
41760 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
41761 else if (out_n == 8 && in_n == 4)
41762 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
41763 else if (out_n == 16 && in_n == 8)
41764 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
41765 }
41766 break;
41767
41768 case BUILT_IN_IFLOORF:
41769 case BUILT_IN_LFLOORF:
41770 case BUILT_IN_LLFLOORF:
41771 /* The round insn does not trap on denormals. */
41772 if (flag_trapping_math || !TARGET_ROUND)
41773 break;
41774
41775 if (out_mode == SImode && in_mode == SFmode)
41776 {
41777 if (out_n == 4 && in_n == 4)
41778 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
41779 else if (out_n == 8 && in_n == 8)
41780 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
41781 }
41782 break;
41783
41784 case BUILT_IN_ICEIL:
41785 case BUILT_IN_LCEIL:
41786 case BUILT_IN_LLCEIL:
41787 /* The round insn does not trap on denormals. */
41788 if (flag_trapping_math || !TARGET_ROUND)
41789 break;
41790
41791 if (out_mode == SImode && in_mode == DFmode)
41792 {
41793 if (out_n == 4 && in_n == 2)
41794 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
41795 else if (out_n == 8 && in_n == 4)
41796 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
41797 else if (out_n == 16 && in_n == 8)
41798 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
41799 }
41800 break;
41801
41802 case BUILT_IN_ICEILF:
41803 case BUILT_IN_LCEILF:
41804 case BUILT_IN_LLCEILF:
41805 /* The round insn does not trap on denormals. */
41806 if (flag_trapping_math || !TARGET_ROUND)
41807 break;
41808
41809 if (out_mode == SImode && in_mode == SFmode)
41810 {
41811 if (out_n == 4 && in_n == 4)
41812 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
41813 else if (out_n == 8 && in_n == 8)
41814 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
41815 }
41816 break;
41817
41818 case BUILT_IN_IRINT:
41819 case BUILT_IN_LRINT:
41820 case BUILT_IN_LLRINT:
41821 if (out_mode == SImode && in_mode == DFmode)
41822 {
41823 if (out_n == 4 && in_n == 2)
41824 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
41825 else if (out_n == 8 && in_n == 4)
41826 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
41827 }
41828 break;
41829
41830 case BUILT_IN_IRINTF:
41831 case BUILT_IN_LRINTF:
41832 case BUILT_IN_LLRINTF:
41833 if (out_mode == SImode && in_mode == SFmode)
41834 {
41835 if (out_n == 4 && in_n == 4)
41836 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
41837 else if (out_n == 8 && in_n == 8)
41838 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
41839 }
41840 break;
41841
41842 case BUILT_IN_IROUND:
41843 case BUILT_IN_LROUND:
41844 case BUILT_IN_LLROUND:
41845 /* The round insn does not trap on denormals. */
41846 if (flag_trapping_math || !TARGET_ROUND)
41847 break;
41848
41849 if (out_mode == SImode && in_mode == DFmode)
41850 {
41851 if (out_n == 4 && in_n == 2)
41852 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
41853 else if (out_n == 8 && in_n == 4)
41854 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
41855 else if (out_n == 16 && in_n == 8)
41856 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
41857 }
41858 break;
41859
41860 case BUILT_IN_IROUNDF:
41861 case BUILT_IN_LROUNDF:
41862 case BUILT_IN_LLROUNDF:
41863 /* The round insn does not trap on denormals. */
41864 if (flag_trapping_math || !TARGET_ROUND)
41865 break;
41866
41867 if (out_mode == SImode && in_mode == SFmode)
41868 {
41869 if (out_n == 4 && in_n == 4)
41870 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
41871 else if (out_n == 8 && in_n == 8)
41872 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
41873 }
41874 break;
41875
41876 case BUILT_IN_COPYSIGN:
41877 if (out_mode == DFmode && in_mode == DFmode)
41878 {
41879 if (out_n == 2 && in_n == 2)
41880 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
41881 else if (out_n == 4 && in_n == 4)
41882 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
41883 else if (out_n == 8 && in_n == 8)
41884 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
41885 }
41886 break;
41887
41888 case BUILT_IN_COPYSIGNF:
41889 if (out_mode == SFmode && in_mode == SFmode)
41890 {
41891 if (out_n == 4 && in_n == 4)
41892 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
41893 else if (out_n == 8 && in_n == 8)
41894 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
41895 else if (out_n == 16 && in_n == 16)
41896 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
41897 }
41898 break;
41899
41900 case BUILT_IN_FLOOR:
41901 /* The round insn does not trap on denormals. */
41902 if (flag_trapping_math || !TARGET_ROUND)
41903 break;
41904
41905 if (out_mode == DFmode && in_mode == DFmode)
41906 {
41907 if (out_n == 2 && in_n == 2)
41908 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
41909 else if (out_n == 4 && in_n == 4)
41910 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
41911 }
41912 break;
41913
41914 case BUILT_IN_FLOORF:
41915 /* The round insn does not trap on denormals. */
41916 if (flag_trapping_math || !TARGET_ROUND)
41917 break;
41918
41919 if (out_mode == SFmode && in_mode == SFmode)
41920 {
41921 if (out_n == 4 && in_n == 4)
41922 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
41923 else if (out_n == 8 && in_n == 8)
41924 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
41925 }
41926 break;
41927
41928 case BUILT_IN_CEIL:
41929 /* The round insn does not trap on denormals. */
41930 if (flag_trapping_math || !TARGET_ROUND)
41931 break;
41932
41933 if (out_mode == DFmode && in_mode == DFmode)
41934 {
41935 if (out_n == 2 && in_n == 2)
41936 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
41937 else if (out_n == 4 && in_n == 4)
41938 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
41939 }
41940 break;
41941
41942 case BUILT_IN_CEILF:
41943 /* The round insn does not trap on denormals. */
41944 if (flag_trapping_math || !TARGET_ROUND)
41945 break;
41946
41947 if (out_mode == SFmode && in_mode == SFmode)
41948 {
41949 if (out_n == 4 && in_n == 4)
41950 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
41951 else if (out_n == 8 && in_n == 8)
41952 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
41953 }
41954 break;
41955
41956 case BUILT_IN_TRUNC:
41957 /* The round insn does not trap on denormals. */
41958 if (flag_trapping_math || !TARGET_ROUND)
41959 break;
41960
41961 if (out_mode == DFmode && in_mode == DFmode)
41962 {
41963 if (out_n == 2 && in_n == 2)
41964 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
41965 else if (out_n == 4 && in_n == 4)
41966 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
41967 }
41968 break;
41969
41970 case BUILT_IN_TRUNCF:
41971 /* The round insn does not trap on denormals. */
41972 if (flag_trapping_math || !TARGET_ROUND)
41973 break;
41974
41975 if (out_mode == SFmode && in_mode == SFmode)
41976 {
41977 if (out_n == 4 && in_n == 4)
41978 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
41979 else if (out_n == 8 && in_n == 8)
41980 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
41981 }
41982 break;
41983
41984 case BUILT_IN_RINT:
41985 /* The round insn does not trap on denormals. */
41986 if (flag_trapping_math || !TARGET_ROUND)
41987 break;
41988
41989 if (out_mode == DFmode && in_mode == DFmode)
41990 {
41991 if (out_n == 2 && in_n == 2)
41992 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
41993 else if (out_n == 4 && in_n == 4)
41994 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
41995 }
41996 break;
41997
41998 case BUILT_IN_RINTF:
41999 /* The round insn does not trap on denormals. */
42000 if (flag_trapping_math || !TARGET_ROUND)
42001 break;
42002
42003 if (out_mode == SFmode && in_mode == SFmode)
42004 {
42005 if (out_n == 4 && in_n == 4)
42006 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42007 else if (out_n == 8 && in_n == 8)
42008 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42009 }
42010 break;
42011
42012 case BUILT_IN_ROUND:
42013 /* The round insn does not trap on denormals. */
42014 if (flag_trapping_math || !TARGET_ROUND)
42015 break;
42016
42017 if (out_mode == DFmode && in_mode == DFmode)
42018 {
42019 if (out_n == 2 && in_n == 2)
42020 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
42021 else if (out_n == 4 && in_n == 4)
42022 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
42023 }
42024 break;
42025
42026 case BUILT_IN_ROUNDF:
42027 /* The round insn does not trap on denormals. */
42028 if (flag_trapping_math || !TARGET_ROUND)
42029 break;
42030
42031 if (out_mode == SFmode && in_mode == SFmode)
42032 {
42033 if (out_n == 4 && in_n == 4)
42034 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
42035 else if (out_n == 8 && in_n == 8)
42036 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
42037 }
42038 break;
42039
42040 case BUILT_IN_FMA:
42041 if (out_mode == DFmode && in_mode == DFmode)
42042 {
42043 if (out_n == 2 && in_n == 2)
42044 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42045 if (out_n == 4 && in_n == 4)
42046 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42047 }
42048 break;
42049
42050 case BUILT_IN_FMAF:
42051 if (out_mode == SFmode && in_mode == SFmode)
42052 {
42053 if (out_n == 4 && in_n == 4)
42054 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42055 if (out_n == 8 && in_n == 8)
42056 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42057 }
42058 break;
42059
42060 default:
42061 break;
42062 }
42063
42064 /* Dispatch to a handler for a vectorization library. */
42065 if (ix86_veclib_handler)
42066 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
42067 type_in);
42068
42069 return NULL_TREE;
42070 }
42071
42072 /* Handler for an SVML-style interface to
42073 a library with vectorized intrinsics. */
42074
42075 static tree
42076 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
42077 {
42078 char name[20];
42079 tree fntype, new_fndecl, args;
42080 unsigned arity;
42081 const char *bname;
42082 machine_mode el_mode, in_mode;
42083 int n, in_n;
42084
42085 /* The SVML is suitable for unsafe math only. */
42086 if (!flag_unsafe_math_optimizations)
42087 return NULL_TREE;
42088
42089 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42090 n = TYPE_VECTOR_SUBPARTS (type_out);
42091 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42092 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42093 if (el_mode != in_mode
42094 || n != in_n)
42095 return NULL_TREE;
42096
42097 switch (fn)
42098 {
42099 case BUILT_IN_EXP:
42100 case BUILT_IN_LOG:
42101 case BUILT_IN_LOG10:
42102 case BUILT_IN_POW:
42103 case BUILT_IN_TANH:
42104 case BUILT_IN_TAN:
42105 case BUILT_IN_ATAN:
42106 case BUILT_IN_ATAN2:
42107 case BUILT_IN_ATANH:
42108 case BUILT_IN_CBRT:
42109 case BUILT_IN_SINH:
42110 case BUILT_IN_SIN:
42111 case BUILT_IN_ASINH:
42112 case BUILT_IN_ASIN:
42113 case BUILT_IN_COSH:
42114 case BUILT_IN_COS:
42115 case BUILT_IN_ACOSH:
42116 case BUILT_IN_ACOS:
42117 if (el_mode != DFmode || n != 2)
42118 return NULL_TREE;
42119 break;
42120
42121 case BUILT_IN_EXPF:
42122 case BUILT_IN_LOGF:
42123 case BUILT_IN_LOG10F:
42124 case BUILT_IN_POWF:
42125 case BUILT_IN_TANHF:
42126 case BUILT_IN_TANF:
42127 case BUILT_IN_ATANF:
42128 case BUILT_IN_ATAN2F:
42129 case BUILT_IN_ATANHF:
42130 case BUILT_IN_CBRTF:
42131 case BUILT_IN_SINHF:
42132 case BUILT_IN_SINF:
42133 case BUILT_IN_ASINHF:
42134 case BUILT_IN_ASINF:
42135 case BUILT_IN_COSHF:
42136 case BUILT_IN_COSF:
42137 case BUILT_IN_ACOSHF:
42138 case BUILT_IN_ACOSF:
42139 if (el_mode != SFmode || n != 4)
42140 return NULL_TREE;
42141 break;
42142
42143 default:
42144 return NULL_TREE;
42145 }
42146
42147 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
42148
42149 if (fn == BUILT_IN_LOGF)
42150 strcpy (name, "vmlsLn4");
42151 else if (fn == BUILT_IN_LOG)
42152 strcpy (name, "vmldLn2");
42153 else if (n == 4)
42154 {
42155 sprintf (name, "vmls%s", bname+10);
42156 name[strlen (name)-1] = '4';
42157 }
42158 else
42159 sprintf (name, "vmld%s2", bname+10);
42160
42161 /* Convert to uppercase. */
42162 name[4] &= ~0x20;
42163
42164 arity = 0;
42165 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
42166 args;
42167 args = TREE_CHAIN (args))
42168 arity++;
42169
42170 if (arity == 1)
42171 fntype = build_function_type_list (type_out, type_in, NULL);
42172 else
42173 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42174
42175 /* Build a function declaration for the vectorized function. */
42176 new_fndecl = build_decl (BUILTINS_LOCATION,
42177 FUNCTION_DECL, get_identifier (name), fntype);
42178 TREE_PUBLIC (new_fndecl) = 1;
42179 DECL_EXTERNAL (new_fndecl) = 1;
42180 DECL_IS_NOVOPS (new_fndecl) = 1;
42181 TREE_READONLY (new_fndecl) = 1;
42182
42183 return new_fndecl;
42184 }
42185
42186 /* Handler for an ACML-style interface to
42187 a library with vectorized intrinsics. */
42188
42189 static tree
42190 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
42191 {
42192 char name[20] = "__vr.._";
42193 tree fntype, new_fndecl, args;
42194 unsigned arity;
42195 const char *bname;
42196 machine_mode el_mode, in_mode;
42197 int n, in_n;
42198
42199 /* The ACML is 64bits only and suitable for unsafe math only as
42200 it does not correctly support parts of IEEE with the required
42201 precision such as denormals. */
42202 if (!TARGET_64BIT
42203 || !flag_unsafe_math_optimizations)
42204 return NULL_TREE;
42205
42206 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42207 n = TYPE_VECTOR_SUBPARTS (type_out);
42208 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42209 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42210 if (el_mode != in_mode
42211 || n != in_n)
42212 return NULL_TREE;
42213
42214 switch (fn)
42215 {
42216 case BUILT_IN_SIN:
42217 case BUILT_IN_COS:
42218 case BUILT_IN_EXP:
42219 case BUILT_IN_LOG:
42220 case BUILT_IN_LOG2:
42221 case BUILT_IN_LOG10:
42222 name[4] = 'd';
42223 name[5] = '2';
42224 if (el_mode != DFmode
42225 || n != 2)
42226 return NULL_TREE;
42227 break;
42228
42229 case BUILT_IN_SINF:
42230 case BUILT_IN_COSF:
42231 case BUILT_IN_EXPF:
42232 case BUILT_IN_POWF:
42233 case BUILT_IN_LOGF:
42234 case BUILT_IN_LOG2F:
42235 case BUILT_IN_LOG10F:
42236 name[4] = 's';
42237 name[5] = '4';
42238 if (el_mode != SFmode
42239 || n != 4)
42240 return NULL_TREE;
42241 break;
42242
42243 default:
42244 return NULL_TREE;
42245 }
42246
42247 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
42248 sprintf (name + 7, "%s", bname+10);
42249
42250 arity = 0;
42251 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
42252 args;
42253 args = TREE_CHAIN (args))
42254 arity++;
42255
42256 if (arity == 1)
42257 fntype = build_function_type_list (type_out, type_in, NULL);
42258 else
42259 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42260
42261 /* Build a function declaration for the vectorized function. */
42262 new_fndecl = build_decl (BUILTINS_LOCATION,
42263 FUNCTION_DECL, get_identifier (name), fntype);
42264 TREE_PUBLIC (new_fndecl) = 1;
42265 DECL_EXTERNAL (new_fndecl) = 1;
42266 DECL_IS_NOVOPS (new_fndecl) = 1;
42267 TREE_READONLY (new_fndecl) = 1;
42268
42269 return new_fndecl;
42270 }
42271
42272 /* Returns a decl of a function that implements gather load with
42273 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42274 Return NULL_TREE if it is not available. */
42275
42276 static tree
42277 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42278 const_tree index_type, int scale)
42279 {
42280 bool si;
42281 enum ix86_builtins code;
42282
42283 if (! TARGET_AVX2)
42284 return NULL_TREE;
42285
42286 if ((TREE_CODE (index_type) != INTEGER_TYPE
42287 && !POINTER_TYPE_P (index_type))
42288 || (TYPE_MODE (index_type) != SImode
42289 && TYPE_MODE (index_type) != DImode))
42290 return NULL_TREE;
42291
42292 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42293 return NULL_TREE;
42294
42295 /* v*gather* insn sign extends index to pointer mode. */
42296 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42297 && TYPE_UNSIGNED (index_type))
42298 return NULL_TREE;
42299
42300 if (scale <= 0
42301 || scale > 8
42302 || (scale & (scale - 1)) != 0)
42303 return NULL_TREE;
42304
42305 si = TYPE_MODE (index_type) == SImode;
42306 switch (TYPE_MODE (mem_vectype))
42307 {
42308 case V2DFmode:
42309 if (TARGET_AVX512VL)
42310 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42311 else
42312 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42313 break;
42314 case V4DFmode:
42315 if (TARGET_AVX512VL)
42316 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42317 else
42318 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42319 break;
42320 case V2DImode:
42321 if (TARGET_AVX512VL)
42322 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42323 else
42324 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42325 break;
42326 case V4DImode:
42327 if (TARGET_AVX512VL)
42328 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42329 else
42330 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42331 break;
42332 case V4SFmode:
42333 if (TARGET_AVX512VL)
42334 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42335 else
42336 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42337 break;
42338 case V8SFmode:
42339 if (TARGET_AVX512VL)
42340 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42341 else
42342 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42343 break;
42344 case V4SImode:
42345 if (TARGET_AVX512VL)
42346 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42347 else
42348 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42349 break;
42350 case V8SImode:
42351 if (TARGET_AVX512VL)
42352 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42353 else
42354 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42355 break;
42356 case V8DFmode:
42357 if (TARGET_AVX512F)
42358 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42359 else
42360 return NULL_TREE;
42361 break;
42362 case V8DImode:
42363 if (TARGET_AVX512F)
42364 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42365 else
42366 return NULL_TREE;
42367 break;
42368 case V16SFmode:
42369 if (TARGET_AVX512F)
42370 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42371 else
42372 return NULL_TREE;
42373 break;
42374 case V16SImode:
42375 if (TARGET_AVX512F)
42376 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42377 else
42378 return NULL_TREE;
42379 break;
42380 default:
42381 return NULL_TREE;
42382 }
42383
42384 return ix86_get_builtin (code);
42385 }
42386
42387 /* Returns a decl of a function that implements scatter store with
42388 register type VECTYPE and index type INDEX_TYPE and SCALE.
42389 Return NULL_TREE if it is not available. */
42390
42391 static tree
42392 ix86_vectorize_builtin_scatter (const_tree vectype,
42393 const_tree index_type, int scale)
42394 {
42395 bool si;
42396 enum ix86_builtins code;
42397
42398 if (!TARGET_AVX512F)
42399 return NULL_TREE;
42400
42401 if ((TREE_CODE (index_type) != INTEGER_TYPE
42402 && !POINTER_TYPE_P (index_type))
42403 || (TYPE_MODE (index_type) != SImode
42404 && TYPE_MODE (index_type) != DImode))
42405 return NULL_TREE;
42406
42407 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42408 return NULL_TREE;
42409
42410 /* v*scatter* insn sign extends index to pointer mode. */
42411 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42412 && TYPE_UNSIGNED (index_type))
42413 return NULL_TREE;
42414
42415 /* Scale can be 1, 2, 4 or 8. */
42416 if (scale <= 0
42417 || scale > 8
42418 || (scale & (scale - 1)) != 0)
42419 return NULL_TREE;
42420
42421 si = TYPE_MODE (index_type) == SImode;
42422 switch (TYPE_MODE (vectype))
42423 {
42424 case V8DFmode:
42425 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42426 break;
42427 case V8DImode:
42428 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42429 break;
42430 case V16SFmode:
42431 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42432 break;
42433 case V16SImode:
42434 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42435 break;
42436 default:
42437 return NULL_TREE;
42438 }
42439
42440 return ix86_builtins[code];
42441 }
42442
42443 /* Returns a code for a target-specific builtin that implements
42444 reciprocal of the function, or NULL_TREE if not available. */
42445
42446 static tree
42447 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
42448 {
42449 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
42450 && flag_finite_math_only && !flag_trapping_math
42451 && flag_unsafe_math_optimizations))
42452 return NULL_TREE;
42453
42454 if (md_fn)
42455 /* Machine dependent builtins. */
42456 switch (fn)
42457 {
42458 /* Vectorized version of sqrt to rsqrt conversion. */
42459 case IX86_BUILTIN_SQRTPS_NR:
42460 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42461
42462 case IX86_BUILTIN_SQRTPS_NR256:
42463 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42464
42465 default:
42466 return NULL_TREE;
42467 }
42468 else
42469 /* Normal builtins. */
42470 switch (fn)
42471 {
42472 /* Sqrt to rsqrt conversion. */
42473 case BUILT_IN_SQRTF:
42474 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
42475
42476 default:
42477 return NULL_TREE;
42478 }
42479 }
42480 \f
42481 /* Helper for avx_vpermilps256_operand et al. This is also used by
42482 the expansion functions to turn the parallel back into a mask.
42483 The return value is 0 for no match and the imm8+1 for a match. */
42484
42485 int
42486 avx_vpermilp_parallel (rtx par, machine_mode mode)
42487 {
42488 unsigned i, nelt = GET_MODE_NUNITS (mode);
42489 unsigned mask = 0;
42490 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42491
42492 if (XVECLEN (par, 0) != (int) nelt)
42493 return 0;
42494
42495 /* Validate that all of the elements are constants, and not totally
42496 out of range. Copy the data into an integral array to make the
42497 subsequent checks easier. */
42498 for (i = 0; i < nelt; ++i)
42499 {
42500 rtx er = XVECEXP (par, 0, i);
42501 unsigned HOST_WIDE_INT ei;
42502
42503 if (!CONST_INT_P (er))
42504 return 0;
42505 ei = INTVAL (er);
42506 if (ei >= nelt)
42507 return 0;
42508 ipar[i] = ei;
42509 }
42510
42511 switch (mode)
42512 {
42513 case V8DFmode:
42514 /* In the 512-bit DFmode case, we can only move elements within
42515 a 128-bit lane. First fill the second part of the mask,
42516 then fallthru. */
42517 for (i = 4; i < 6; ++i)
42518 {
42519 if (ipar[i] < 4 || ipar[i] >= 6)
42520 return 0;
42521 mask |= (ipar[i] - 4) << i;
42522 }
42523 for (i = 6; i < 8; ++i)
42524 {
42525 if (ipar[i] < 6)
42526 return 0;
42527 mask |= (ipar[i] - 6) << i;
42528 }
42529 /* FALLTHRU */
42530
42531 case V4DFmode:
42532 /* In the 256-bit DFmode case, we can only move elements within
42533 a 128-bit lane. */
42534 for (i = 0; i < 2; ++i)
42535 {
42536 if (ipar[i] >= 2)
42537 return 0;
42538 mask |= ipar[i] << i;
42539 }
42540 for (i = 2; i < 4; ++i)
42541 {
42542 if (ipar[i] < 2)
42543 return 0;
42544 mask |= (ipar[i] - 2) << i;
42545 }
42546 break;
42547
42548 case V16SFmode:
42549 /* In 512 bit SFmode case, permutation in the upper 256 bits
42550 must mirror the permutation in the lower 256-bits. */
42551 for (i = 0; i < 8; ++i)
42552 if (ipar[i] + 8 != ipar[i + 8])
42553 return 0;
42554 /* FALLTHRU */
42555
42556 case V8SFmode:
42557 /* In 256 bit SFmode case, we have full freedom of
42558 movement within the low 128-bit lane, but the high 128-bit
42559 lane must mirror the exact same pattern. */
42560 for (i = 0; i < 4; ++i)
42561 if (ipar[i] + 4 != ipar[i + 4])
42562 return 0;
42563 nelt = 4;
42564 /* FALLTHRU */
42565
42566 case V2DFmode:
42567 case V4SFmode:
42568 /* In the 128-bit case, we've full freedom in the placement of
42569 the elements from the source operand. */
42570 for (i = 0; i < nelt; ++i)
42571 mask |= ipar[i] << (i * (nelt / 2));
42572 break;
42573
42574 default:
42575 gcc_unreachable ();
42576 }
42577
42578 /* Make sure success has a non-zero value by adding one. */
42579 return mask + 1;
42580 }
42581
42582 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42583 the expansion functions to turn the parallel back into a mask.
42584 The return value is 0 for no match and the imm8+1 for a match. */
42585
42586 int
42587 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42588 {
42589 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42590 unsigned mask = 0;
42591 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42592
42593 if (XVECLEN (par, 0) != (int) nelt)
42594 return 0;
42595
42596 /* Validate that all of the elements are constants, and not totally
42597 out of range. Copy the data into an integral array to make the
42598 subsequent checks easier. */
42599 for (i = 0; i < nelt; ++i)
42600 {
42601 rtx er = XVECEXP (par, 0, i);
42602 unsigned HOST_WIDE_INT ei;
42603
42604 if (!CONST_INT_P (er))
42605 return 0;
42606 ei = INTVAL (er);
42607 if (ei >= 2 * nelt)
42608 return 0;
42609 ipar[i] = ei;
42610 }
42611
42612 /* Validate that the halves of the permute are halves. */
42613 for (i = 0; i < nelt2 - 1; ++i)
42614 if (ipar[i] + 1 != ipar[i + 1])
42615 return 0;
42616 for (i = nelt2; i < nelt - 1; ++i)
42617 if (ipar[i] + 1 != ipar[i + 1])
42618 return 0;
42619
42620 /* Reconstruct the mask. */
42621 for (i = 0; i < 2; ++i)
42622 {
42623 unsigned e = ipar[i * nelt2];
42624 if (e % nelt2)
42625 return 0;
42626 e /= nelt2;
42627 mask |= e << (i * 4);
42628 }
42629
42630 /* Make sure success has a non-zero value by adding one. */
42631 return mask + 1;
42632 }
42633 \f
42634 /* Return a register priority for hard reg REGNO. */
42635 static int
42636 ix86_register_priority (int hard_regno)
42637 {
42638 /* ebp and r13 as the base always wants a displacement, r12 as the
42639 base always wants an index. So discourage their usage in an
42640 address. */
42641 if (hard_regno == R12_REG || hard_regno == R13_REG)
42642 return 0;
42643 if (hard_regno == BP_REG)
42644 return 1;
42645 /* New x86-64 int registers result in bigger code size. Discourage
42646 them. */
42647 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42648 return 2;
42649 /* New x86-64 SSE registers result in bigger code size. Discourage
42650 them. */
42651 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42652 return 2;
42653 /* Usage of AX register results in smaller code. Prefer it. */
42654 if (hard_regno == AX_REG)
42655 return 4;
42656 return 3;
42657 }
42658
42659 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42660
42661 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42662 QImode must go into class Q_REGS.
42663 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42664 movdf to do mem-to-mem moves through integer regs. */
42665
42666 static reg_class_t
42667 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42668 {
42669 machine_mode mode = GET_MODE (x);
42670
42671 /* We're only allowed to return a subclass of CLASS. Many of the
42672 following checks fail for NO_REGS, so eliminate that early. */
42673 if (regclass == NO_REGS)
42674 return NO_REGS;
42675
42676 /* All classes can load zeros. */
42677 if (x == CONST0_RTX (mode))
42678 return regclass;
42679
42680 /* Force constants into memory if we are loading a (nonzero) constant into
42681 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42682 instructions to load from a constant. */
42683 if (CONSTANT_P (x)
42684 && (MAYBE_MMX_CLASS_P (regclass)
42685 || MAYBE_SSE_CLASS_P (regclass)
42686 || MAYBE_MASK_CLASS_P (regclass)))
42687 return NO_REGS;
42688
42689 /* Prefer SSE regs only, if we can use them for math. */
42690 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
42691 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
42692
42693 /* Floating-point constants need more complex checks. */
42694 if (CONST_DOUBLE_P (x))
42695 {
42696 /* General regs can load everything. */
42697 if (reg_class_subset_p (regclass, GENERAL_REGS))
42698 return regclass;
42699
42700 /* Floats can load 0 and 1 plus some others. Note that we eliminated
42701 zero above. We only want to wind up preferring 80387 registers if
42702 we plan on doing computation with them. */
42703 if (TARGET_80387
42704 && standard_80387_constant_p (x) > 0)
42705 {
42706 /* Limit class to non-sse. */
42707 if (regclass == FLOAT_SSE_REGS)
42708 return FLOAT_REGS;
42709 if (regclass == FP_TOP_SSE_REGS)
42710 return FP_TOP_REG;
42711 if (regclass == FP_SECOND_SSE_REGS)
42712 return FP_SECOND_REG;
42713 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
42714 return regclass;
42715 }
42716
42717 return NO_REGS;
42718 }
42719
42720 /* Generally when we see PLUS here, it's the function invariant
42721 (plus soft-fp const_int). Which can only be computed into general
42722 regs. */
42723 if (GET_CODE (x) == PLUS)
42724 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
42725
42726 /* QImode constants are easy to load, but non-constant QImode data
42727 must go into Q_REGS. */
42728 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
42729 {
42730 if (reg_class_subset_p (regclass, Q_REGS))
42731 return regclass;
42732 if (reg_class_subset_p (Q_REGS, regclass))
42733 return Q_REGS;
42734 return NO_REGS;
42735 }
42736
42737 return regclass;
42738 }
42739
42740 /* Discourage putting floating-point values in SSE registers unless
42741 SSE math is being used, and likewise for the 387 registers. */
42742 static reg_class_t
42743 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
42744 {
42745 machine_mode mode = GET_MODE (x);
42746
42747 /* Restrict the output reload class to the register bank that we are doing
42748 math on. If we would like not to return a subset of CLASS, reject this
42749 alternative: if reload cannot do this, it will still use its choice. */
42750 mode = GET_MODE (x);
42751 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
42752 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
42753
42754 if (X87_FLOAT_MODE_P (mode))
42755 {
42756 if (regclass == FP_TOP_SSE_REGS)
42757 return FP_TOP_REG;
42758 else if (regclass == FP_SECOND_SSE_REGS)
42759 return FP_SECOND_REG;
42760 else
42761 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
42762 }
42763
42764 return regclass;
42765 }
42766
42767 static reg_class_t
42768 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
42769 machine_mode mode, secondary_reload_info *sri)
42770 {
42771 /* Double-word spills from general registers to non-offsettable memory
42772 references (zero-extended addresses) require special handling. */
42773 if (TARGET_64BIT
42774 && MEM_P (x)
42775 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
42776 && INTEGER_CLASS_P (rclass)
42777 && !offsettable_memref_p (x))
42778 {
42779 sri->icode = (in_p
42780 ? CODE_FOR_reload_noff_load
42781 : CODE_FOR_reload_noff_store);
42782 /* Add the cost of moving address to a temporary. */
42783 sri->extra_cost = 1;
42784
42785 return NO_REGS;
42786 }
42787
42788 /* QImode spills from non-QI registers require
42789 intermediate register on 32bit targets. */
42790 if (mode == QImode
42791 && (MAYBE_MASK_CLASS_P (rclass)
42792 || (!TARGET_64BIT && !in_p
42793 && INTEGER_CLASS_P (rclass)
42794 && MAYBE_NON_Q_CLASS_P (rclass))))
42795 {
42796 int regno;
42797
42798 if (REG_P (x))
42799 regno = REGNO (x);
42800 else
42801 regno = -1;
42802
42803 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
42804 regno = true_regnum (x);
42805
42806 /* Return Q_REGS if the operand is in memory. */
42807 if (regno == -1)
42808 return Q_REGS;
42809 }
42810
42811 /* This condition handles corner case where an expression involving
42812 pointers gets vectorized. We're trying to use the address of a
42813 stack slot as a vector initializer.
42814
42815 (set (reg:V2DI 74 [ vect_cst_.2 ])
42816 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
42817
42818 Eventually frame gets turned into sp+offset like this:
42819
42820 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
42821 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
42822 (const_int 392 [0x188]))))
42823
42824 That later gets turned into:
42825
42826 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
42827 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
42828 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
42829
42830 We'll have the following reload recorded:
42831
42832 Reload 0: reload_in (DI) =
42833 (plus:DI (reg/f:DI 7 sp)
42834 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
42835 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
42836 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
42837 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
42838 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
42839 reload_reg_rtx: (reg:V2DI 22 xmm1)
42840
42841 Which isn't going to work since SSE instructions can't handle scalar
42842 additions. Returning GENERAL_REGS forces the addition into integer
42843 register and reload can handle subsequent reloads without problems. */
42844
42845 if (in_p && GET_CODE (x) == PLUS
42846 && SSE_CLASS_P (rclass)
42847 && SCALAR_INT_MODE_P (mode))
42848 return GENERAL_REGS;
42849
42850 return NO_REGS;
42851 }
42852
42853 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
42854
42855 static bool
42856 ix86_class_likely_spilled_p (reg_class_t rclass)
42857 {
42858 switch (rclass)
42859 {
42860 case AREG:
42861 case DREG:
42862 case CREG:
42863 case BREG:
42864 case AD_REGS:
42865 case SIREG:
42866 case DIREG:
42867 case SSE_FIRST_REG:
42868 case FP_TOP_REG:
42869 case FP_SECOND_REG:
42870 case BND_REGS:
42871 return true;
42872
42873 default:
42874 break;
42875 }
42876
42877 return false;
42878 }
42879
42880 /* If we are copying between general and FP registers, we need a memory
42881 location. The same is true for SSE and MMX registers.
42882
42883 To optimize register_move_cost performance, allow inline variant.
42884
42885 The macro can't work reliably when one of the CLASSES is class containing
42886 registers from multiple units (SSE, MMX, integer). We avoid this by never
42887 combining those units in single alternative in the machine description.
42888 Ensure that this constraint holds to avoid unexpected surprises.
42889
42890 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
42891 enforce these sanity checks. */
42892
42893 static inline bool
42894 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
42895 machine_mode mode, int strict)
42896 {
42897 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
42898 return false;
42899 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
42900 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
42901 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
42902 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
42903 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
42904 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
42905 {
42906 gcc_assert (!strict || lra_in_progress);
42907 return true;
42908 }
42909
42910 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
42911 return true;
42912
42913 /* Between mask and general, we have moves no larger than word size. */
42914 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
42915 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
42916 return true;
42917
42918 /* ??? This is a lie. We do have moves between mmx/general, and for
42919 mmx/sse2. But by saying we need secondary memory we discourage the
42920 register allocator from using the mmx registers unless needed. */
42921 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
42922 return true;
42923
42924 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42925 {
42926 /* SSE1 doesn't have any direct moves from other classes. */
42927 if (!TARGET_SSE2)
42928 return true;
42929
42930 /* If the target says that inter-unit moves are more expensive
42931 than moving through memory, then don't generate them. */
42932 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
42933 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
42934 return true;
42935
42936 /* Between SSE and general, we have moves no larger than word size. */
42937 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42938 return true;
42939 }
42940
42941 return false;
42942 }
42943
42944 bool
42945 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
42946 machine_mode mode, int strict)
42947 {
42948 return inline_secondary_memory_needed (class1, class2, mode, strict);
42949 }
42950
42951 /* Implement the TARGET_CLASS_MAX_NREGS hook.
42952
42953 On the 80386, this is the size of MODE in words,
42954 except in the FP regs, where a single reg is always enough. */
42955
42956 static unsigned char
42957 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
42958 {
42959 if (MAYBE_INTEGER_CLASS_P (rclass))
42960 {
42961 if (mode == XFmode)
42962 return (TARGET_64BIT ? 2 : 3);
42963 else if (mode == XCmode)
42964 return (TARGET_64BIT ? 4 : 6);
42965 else
42966 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
42967 }
42968 else
42969 {
42970 if (COMPLEX_MODE_P (mode))
42971 return 2;
42972 else
42973 return 1;
42974 }
42975 }
42976
42977 /* Return true if the registers in CLASS cannot represent the change from
42978 modes FROM to TO. */
42979
42980 bool
42981 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
42982 enum reg_class regclass)
42983 {
42984 if (from == to)
42985 return false;
42986
42987 /* x87 registers can't do subreg at all, as all values are reformatted
42988 to extended precision. */
42989 if (MAYBE_FLOAT_CLASS_P (regclass))
42990 return true;
42991
42992 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
42993 {
42994 /* Vector registers do not support QI or HImode loads. If we don't
42995 disallow a change to these modes, reload will assume it's ok to
42996 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
42997 the vec_dupv4hi pattern. */
42998 if (GET_MODE_SIZE (from) < 4)
42999 return true;
43000 }
43001
43002 return false;
43003 }
43004
43005 /* Return the cost of moving data of mode M between a
43006 register and memory. A value of 2 is the default; this cost is
43007 relative to those in `REGISTER_MOVE_COST'.
43008
43009 This function is used extensively by register_move_cost that is used to
43010 build tables at startup. Make it inline in this case.
43011 When IN is 2, return maximum of in and out move cost.
43012
43013 If moving between registers and memory is more expensive than
43014 between two registers, you should define this macro to express the
43015 relative cost.
43016
43017 Model also increased moving costs of QImode registers in non
43018 Q_REGS classes.
43019 */
43020 static inline int
43021 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43022 int in)
43023 {
43024 int cost;
43025 if (FLOAT_CLASS_P (regclass))
43026 {
43027 int index;
43028 switch (mode)
43029 {
43030 case SFmode:
43031 index = 0;
43032 break;
43033 case DFmode:
43034 index = 1;
43035 break;
43036 case XFmode:
43037 index = 2;
43038 break;
43039 default:
43040 return 100;
43041 }
43042 if (in == 2)
43043 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43044 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43045 }
43046 if (SSE_CLASS_P (regclass))
43047 {
43048 int index;
43049 switch (GET_MODE_SIZE (mode))
43050 {
43051 case 4:
43052 index = 0;
43053 break;
43054 case 8:
43055 index = 1;
43056 break;
43057 case 16:
43058 index = 2;
43059 break;
43060 default:
43061 return 100;
43062 }
43063 if (in == 2)
43064 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43065 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43066 }
43067 if (MMX_CLASS_P (regclass))
43068 {
43069 int index;
43070 switch (GET_MODE_SIZE (mode))
43071 {
43072 case 4:
43073 index = 0;
43074 break;
43075 case 8:
43076 index = 1;
43077 break;
43078 default:
43079 return 100;
43080 }
43081 if (in)
43082 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43083 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43084 }
43085 switch (GET_MODE_SIZE (mode))
43086 {
43087 case 1:
43088 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43089 {
43090 if (!in)
43091 return ix86_cost->int_store[0];
43092 if (TARGET_PARTIAL_REG_DEPENDENCY
43093 && optimize_function_for_speed_p (cfun))
43094 cost = ix86_cost->movzbl_load;
43095 else
43096 cost = ix86_cost->int_load[0];
43097 if (in == 2)
43098 return MAX (cost, ix86_cost->int_store[0]);
43099 return cost;
43100 }
43101 else
43102 {
43103 if (in == 2)
43104 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43105 if (in)
43106 return ix86_cost->movzbl_load;
43107 else
43108 return ix86_cost->int_store[0] + 4;
43109 }
43110 break;
43111 case 2:
43112 if (in == 2)
43113 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43114 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43115 default:
43116 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43117 if (mode == TFmode)
43118 mode = XFmode;
43119 if (in == 2)
43120 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43121 else if (in)
43122 cost = ix86_cost->int_load[2];
43123 else
43124 cost = ix86_cost->int_store[2];
43125 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43126 }
43127 }
43128
43129 static int
43130 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43131 bool in)
43132 {
43133 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43134 }
43135
43136
43137 /* Return the cost of moving data from a register in class CLASS1 to
43138 one in class CLASS2.
43139
43140 It is not required that the cost always equal 2 when FROM is the same as TO;
43141 on some machines it is expensive to move between registers if they are not
43142 general registers. */
43143
43144 static int
43145 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43146 reg_class_t class2_i)
43147 {
43148 enum reg_class class1 = (enum reg_class) class1_i;
43149 enum reg_class class2 = (enum reg_class) class2_i;
43150
43151 /* In case we require secondary memory, compute cost of the store followed
43152 by load. In order to avoid bad register allocation choices, we need
43153 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43154
43155 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43156 {
43157 int cost = 1;
43158
43159 cost += inline_memory_move_cost (mode, class1, 2);
43160 cost += inline_memory_move_cost (mode, class2, 2);
43161
43162 /* In case of copying from general_purpose_register we may emit multiple
43163 stores followed by single load causing memory size mismatch stall.
43164 Count this as arbitrarily high cost of 20. */
43165 if (targetm.class_max_nregs (class1, mode)
43166 > targetm.class_max_nregs (class2, mode))
43167 cost += 20;
43168
43169 /* In the case of FP/MMX moves, the registers actually overlap, and we
43170 have to switch modes in order to treat them differently. */
43171 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43172 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43173 cost += 20;
43174
43175 return cost;
43176 }
43177
43178 /* Moves between SSE/MMX and integer unit are expensive. */
43179 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43180 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43181
43182 /* ??? By keeping returned value relatively high, we limit the number
43183 of moves between integer and MMX/SSE registers for all targets.
43184 Additionally, high value prevents problem with x86_modes_tieable_p(),
43185 where integer modes in MMX/SSE registers are not tieable
43186 because of missing QImode and HImode moves to, from or between
43187 MMX/SSE registers. */
43188 return MAX (8, ix86_cost->mmxsse_to_integer);
43189
43190 if (MAYBE_FLOAT_CLASS_P (class1))
43191 return ix86_cost->fp_move;
43192 if (MAYBE_SSE_CLASS_P (class1))
43193 return ix86_cost->sse_move;
43194 if (MAYBE_MMX_CLASS_P (class1))
43195 return ix86_cost->mmx_move;
43196 return 2;
43197 }
43198
43199 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43200 MODE. */
43201
43202 bool
43203 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43204 {
43205 /* Flags and only flags can only hold CCmode values. */
43206 if (CC_REGNO_P (regno))
43207 return GET_MODE_CLASS (mode) == MODE_CC;
43208 if (GET_MODE_CLASS (mode) == MODE_CC
43209 || GET_MODE_CLASS (mode) == MODE_RANDOM
43210 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43211 return false;
43212 if (STACK_REGNO_P (regno))
43213 return VALID_FP_MODE_P (mode);
43214 if (MASK_REGNO_P (regno))
43215 return (VALID_MASK_REG_MODE (mode)
43216 || (TARGET_AVX512BW
43217 && VALID_MASK_AVX512BW_MODE (mode)));
43218 if (BND_REGNO_P (regno))
43219 return VALID_BND_REG_MODE (mode);
43220 if (SSE_REGNO_P (regno))
43221 {
43222 /* We implement the move patterns for all vector modes into and
43223 out of SSE registers, even when no operation instructions
43224 are available. */
43225
43226 /* For AVX-512 we allow, regardless of regno:
43227 - XI mode
43228 - any of 512-bit wide vector mode
43229 - any scalar mode. */
43230 if (TARGET_AVX512F
43231 && (mode == XImode
43232 || VALID_AVX512F_REG_MODE (mode)
43233 || VALID_AVX512F_SCALAR_MODE (mode)))
43234 return true;
43235
43236 /* TODO check for QI/HI scalars. */
43237 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43238 if (TARGET_AVX512VL
43239 && (mode == OImode
43240 || mode == TImode
43241 || VALID_AVX256_REG_MODE (mode)
43242 || VALID_AVX512VL_128_REG_MODE (mode)))
43243 return true;
43244
43245 /* xmm16-xmm31 are only available for AVX-512. */
43246 if (EXT_REX_SSE_REGNO_P (regno))
43247 return false;
43248
43249 /* OImode and AVX modes are available only when AVX is enabled. */
43250 return ((TARGET_AVX
43251 && VALID_AVX256_REG_OR_OI_MODE (mode))
43252 || VALID_SSE_REG_MODE (mode)
43253 || VALID_SSE2_REG_MODE (mode)
43254 || VALID_MMX_REG_MODE (mode)
43255 || VALID_MMX_REG_MODE_3DNOW (mode));
43256 }
43257 if (MMX_REGNO_P (regno))
43258 {
43259 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43260 so if the register is available at all, then we can move data of
43261 the given mode into or out of it. */
43262 return (VALID_MMX_REG_MODE (mode)
43263 || VALID_MMX_REG_MODE_3DNOW (mode));
43264 }
43265
43266 if (mode == QImode)
43267 {
43268 /* Take care for QImode values - they can be in non-QI regs,
43269 but then they do cause partial register stalls. */
43270 if (ANY_QI_REGNO_P (regno))
43271 return true;
43272 if (!TARGET_PARTIAL_REG_STALL)
43273 return true;
43274 /* LRA checks if the hard register is OK for the given mode.
43275 QImode values can live in non-QI regs, so we allow all
43276 registers here. */
43277 if (lra_in_progress)
43278 return true;
43279 return !can_create_pseudo_p ();
43280 }
43281 /* We handle both integer and floats in the general purpose registers. */
43282 else if (VALID_INT_MODE_P (mode))
43283 return true;
43284 else if (VALID_FP_MODE_P (mode))
43285 return true;
43286 else if (VALID_DFP_MODE_P (mode))
43287 return true;
43288 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43289 on to use that value in smaller contexts, this can easily force a
43290 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43291 supporting DImode, allow it. */
43292 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43293 return true;
43294
43295 return false;
43296 }
43297
43298 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43299 tieable integer mode. */
43300
43301 static bool
43302 ix86_tieable_integer_mode_p (machine_mode mode)
43303 {
43304 switch (mode)
43305 {
43306 case HImode:
43307 case SImode:
43308 return true;
43309
43310 case QImode:
43311 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43312
43313 case DImode:
43314 return TARGET_64BIT;
43315
43316 default:
43317 return false;
43318 }
43319 }
43320
43321 /* Return true if MODE1 is accessible in a register that can hold MODE2
43322 without copying. That is, all register classes that can hold MODE2
43323 can also hold MODE1. */
43324
43325 bool
43326 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43327 {
43328 if (mode1 == mode2)
43329 return true;
43330
43331 if (ix86_tieable_integer_mode_p (mode1)
43332 && ix86_tieable_integer_mode_p (mode2))
43333 return true;
43334
43335 /* MODE2 being XFmode implies fp stack or general regs, which means we
43336 can tie any smaller floating point modes to it. Note that we do not
43337 tie this with TFmode. */
43338 if (mode2 == XFmode)
43339 return mode1 == SFmode || mode1 == DFmode;
43340
43341 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43342 that we can tie it with SFmode. */
43343 if (mode2 == DFmode)
43344 return mode1 == SFmode;
43345
43346 /* If MODE2 is only appropriate for an SSE register, then tie with
43347 any other mode acceptable to SSE registers. */
43348 if (GET_MODE_SIZE (mode2) == 32
43349 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43350 return (GET_MODE_SIZE (mode1) == 32
43351 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43352 if (GET_MODE_SIZE (mode2) == 16
43353 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43354 return (GET_MODE_SIZE (mode1) == 16
43355 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43356
43357 /* If MODE2 is appropriate for an MMX register, then tie
43358 with any other mode acceptable to MMX registers. */
43359 if (GET_MODE_SIZE (mode2) == 8
43360 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43361 return (GET_MODE_SIZE (mode1) == 8
43362 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43363
43364 return false;
43365 }
43366
43367 /* Return the cost of moving between two registers of mode MODE. */
43368
43369 static int
43370 ix86_set_reg_reg_cost (machine_mode mode)
43371 {
43372 unsigned int units = UNITS_PER_WORD;
43373
43374 switch (GET_MODE_CLASS (mode))
43375 {
43376 default:
43377 break;
43378
43379 case MODE_CC:
43380 units = GET_MODE_SIZE (CCmode);
43381 break;
43382
43383 case MODE_FLOAT:
43384 if ((TARGET_SSE && mode == TFmode)
43385 || (TARGET_80387 && mode == XFmode)
43386 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43387 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43388 units = GET_MODE_SIZE (mode);
43389 break;
43390
43391 case MODE_COMPLEX_FLOAT:
43392 if ((TARGET_SSE && mode == TCmode)
43393 || (TARGET_80387 && mode == XCmode)
43394 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43395 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43396 units = GET_MODE_SIZE (mode);
43397 break;
43398
43399 case MODE_VECTOR_INT:
43400 case MODE_VECTOR_FLOAT:
43401 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43402 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43403 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43404 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43405 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43406 units = GET_MODE_SIZE (mode);
43407 }
43408
43409 /* Return the cost of moving between two registers of mode MODE,
43410 assuming that the move will be in pieces of at most UNITS bytes. */
43411 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43412 }
43413
43414 /* Compute a (partial) cost for rtx X. Return true if the complete
43415 cost has been computed, and false if subexpressions should be
43416 scanned. In either case, *TOTAL contains the cost result. */
43417
43418 static bool
43419 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43420 int *total, bool speed)
43421 {
43422 rtx mask;
43423 enum rtx_code code = GET_CODE (x);
43424 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43425 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43426
43427 switch (code)
43428 {
43429 case SET:
43430 if (register_operand (SET_DEST (x), VOIDmode)
43431 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43432 {
43433 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43434 return true;
43435 }
43436 return false;
43437
43438 case CONST_INT:
43439 case CONST:
43440 case LABEL_REF:
43441 case SYMBOL_REF:
43442 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43443 *total = 3;
43444 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43445 *total = 2;
43446 else if (flag_pic && SYMBOLIC_CONST (x)
43447 && !(TARGET_64BIT
43448 && (GET_CODE (x) == LABEL_REF
43449 || (GET_CODE (x) == SYMBOL_REF
43450 && SYMBOL_REF_LOCAL_P (x))))
43451 /* Use 0 cost for CONST to improve its propagation. */
43452 && (TARGET_64BIT || GET_CODE (x) != CONST))
43453 *total = 1;
43454 else
43455 *total = 0;
43456 return true;
43457
43458 case CONST_WIDE_INT:
43459 *total = 0;
43460 return true;
43461
43462 case CONST_DOUBLE:
43463 switch (standard_80387_constant_p (x))
43464 {
43465 case 1: /* 0.0 */
43466 *total = 1;
43467 return true;
43468 default: /* Other constants */
43469 *total = 2;
43470 return true;
43471 case 0:
43472 case -1:
43473 break;
43474 }
43475 if (SSE_FLOAT_MODE_P (mode))
43476 {
43477 case CONST_VECTOR:
43478 switch (standard_sse_constant_p (x))
43479 {
43480 case 0:
43481 break;
43482 case 1: /* 0: xor eliminates false dependency */
43483 *total = 0;
43484 return true;
43485 default: /* -1: cmp contains false dependency */
43486 *total = 1;
43487 return true;
43488 }
43489 }
43490 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43491 it'll probably end up. Add a penalty for size. */
43492 *total = (COSTS_N_INSNS (1)
43493 + (flag_pic != 0 && !TARGET_64BIT)
43494 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43495 return true;
43496
43497 case ZERO_EXTEND:
43498 /* The zero extensions is often completely free on x86_64, so make
43499 it as cheap as possible. */
43500 if (TARGET_64BIT && mode == DImode
43501 && GET_MODE (XEXP (x, 0)) == SImode)
43502 *total = 1;
43503 else if (TARGET_ZERO_EXTEND_WITH_AND)
43504 *total = cost->add;
43505 else
43506 *total = cost->movzx;
43507 return false;
43508
43509 case SIGN_EXTEND:
43510 *total = cost->movsx;
43511 return false;
43512
43513 case ASHIFT:
43514 if (SCALAR_INT_MODE_P (mode)
43515 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43516 && CONST_INT_P (XEXP (x, 1)))
43517 {
43518 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43519 if (value == 1)
43520 {
43521 *total = cost->add;
43522 return false;
43523 }
43524 if ((value == 2 || value == 3)
43525 && cost->lea <= cost->shift_const)
43526 {
43527 *total = cost->lea;
43528 return false;
43529 }
43530 }
43531 /* FALLTHRU */
43532
43533 case ROTATE:
43534 case ASHIFTRT:
43535 case LSHIFTRT:
43536 case ROTATERT:
43537 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43538 {
43539 /* ??? Should be SSE vector operation cost. */
43540 /* At least for published AMD latencies, this really is the same
43541 as the latency for a simple fpu operation like fabs. */
43542 /* V*QImode is emulated with 1-11 insns. */
43543 if (mode == V16QImode || mode == V32QImode)
43544 {
43545 int count = 11;
43546 if (TARGET_XOP && mode == V16QImode)
43547 {
43548 /* For XOP we use vpshab, which requires a broadcast of the
43549 value to the variable shift insn. For constants this
43550 means a V16Q const in mem; even when we can perform the
43551 shift with one insn set the cost to prefer paddb. */
43552 if (CONSTANT_P (XEXP (x, 1)))
43553 {
43554 *total = (cost->fabs
43555 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43556 + (speed ? 2 : COSTS_N_BYTES (16)));
43557 return true;
43558 }
43559 count = 3;
43560 }
43561 else if (TARGET_SSSE3)
43562 count = 7;
43563 *total = cost->fabs * count;
43564 }
43565 else
43566 *total = cost->fabs;
43567 }
43568 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43569 {
43570 if (CONST_INT_P (XEXP (x, 1)))
43571 {
43572 if (INTVAL (XEXP (x, 1)) > 32)
43573 *total = cost->shift_const + COSTS_N_INSNS (2);
43574 else
43575 *total = cost->shift_const * 2;
43576 }
43577 else
43578 {
43579 if (GET_CODE (XEXP (x, 1)) == AND)
43580 *total = cost->shift_var * 2;
43581 else
43582 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43583 }
43584 }
43585 else
43586 {
43587 if (CONST_INT_P (XEXP (x, 1)))
43588 *total = cost->shift_const;
43589 else if (SUBREG_P (XEXP (x, 1))
43590 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43591 {
43592 /* Return the cost after shift-and truncation. */
43593 *total = cost->shift_var;
43594 return true;
43595 }
43596 else
43597 *total = cost->shift_var;
43598 }
43599 return false;
43600
43601 case FMA:
43602 {
43603 rtx sub;
43604
43605 gcc_assert (FLOAT_MODE_P (mode));
43606 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43607
43608 /* ??? SSE scalar/vector cost should be used here. */
43609 /* ??? Bald assumption that fma has the same cost as fmul. */
43610 *total = cost->fmul;
43611 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43612
43613 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43614 sub = XEXP (x, 0);
43615 if (GET_CODE (sub) == NEG)
43616 sub = XEXP (sub, 0);
43617 *total += rtx_cost (sub, mode, FMA, 0, speed);
43618
43619 sub = XEXP (x, 2);
43620 if (GET_CODE (sub) == NEG)
43621 sub = XEXP (sub, 0);
43622 *total += rtx_cost (sub, mode, FMA, 2, speed);
43623 return true;
43624 }
43625
43626 case MULT:
43627 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43628 {
43629 /* ??? SSE scalar cost should be used here. */
43630 *total = cost->fmul;
43631 return false;
43632 }
43633 else if (X87_FLOAT_MODE_P (mode))
43634 {
43635 *total = cost->fmul;
43636 return false;
43637 }
43638 else if (FLOAT_MODE_P (mode))
43639 {
43640 /* ??? SSE vector cost should be used here. */
43641 *total = cost->fmul;
43642 return false;
43643 }
43644 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43645 {
43646 /* V*QImode is emulated with 7-13 insns. */
43647 if (mode == V16QImode || mode == V32QImode)
43648 {
43649 int extra = 11;
43650 if (TARGET_XOP && mode == V16QImode)
43651 extra = 5;
43652 else if (TARGET_SSSE3)
43653 extra = 6;
43654 *total = cost->fmul * 2 + cost->fabs * extra;
43655 }
43656 /* V*DImode is emulated with 5-8 insns. */
43657 else if (mode == V2DImode || mode == V4DImode)
43658 {
43659 if (TARGET_XOP && mode == V2DImode)
43660 *total = cost->fmul * 2 + cost->fabs * 3;
43661 else
43662 *total = cost->fmul * 3 + cost->fabs * 5;
43663 }
43664 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43665 insns, including two PMULUDQ. */
43666 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43667 *total = cost->fmul * 2 + cost->fabs * 5;
43668 else
43669 *total = cost->fmul;
43670 return false;
43671 }
43672 else
43673 {
43674 rtx op0 = XEXP (x, 0);
43675 rtx op1 = XEXP (x, 1);
43676 int nbits;
43677 if (CONST_INT_P (XEXP (x, 1)))
43678 {
43679 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43680 for (nbits = 0; value != 0; value &= value - 1)
43681 nbits++;
43682 }
43683 else
43684 /* This is arbitrary. */
43685 nbits = 7;
43686
43687 /* Compute costs correctly for widening multiplication. */
43688 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
43689 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
43690 == GET_MODE_SIZE (mode))
43691 {
43692 int is_mulwiden = 0;
43693 machine_mode inner_mode = GET_MODE (op0);
43694
43695 if (GET_CODE (op0) == GET_CODE (op1))
43696 is_mulwiden = 1, op1 = XEXP (op1, 0);
43697 else if (CONST_INT_P (op1))
43698 {
43699 if (GET_CODE (op0) == SIGN_EXTEND)
43700 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
43701 == INTVAL (op1);
43702 else
43703 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
43704 }
43705
43706 if (is_mulwiden)
43707 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
43708 }
43709
43710 *total = (cost->mult_init[MODE_INDEX (mode)]
43711 + nbits * cost->mult_bit
43712 + rtx_cost (op0, mode, outer_code, opno, speed)
43713 + rtx_cost (op1, mode, outer_code, opno, speed));
43714
43715 return true;
43716 }
43717
43718 case DIV:
43719 case UDIV:
43720 case MOD:
43721 case UMOD:
43722 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43723 /* ??? SSE cost should be used here. */
43724 *total = cost->fdiv;
43725 else if (X87_FLOAT_MODE_P (mode))
43726 *total = cost->fdiv;
43727 else if (FLOAT_MODE_P (mode))
43728 /* ??? SSE vector cost should be used here. */
43729 *total = cost->fdiv;
43730 else
43731 *total = cost->divide[MODE_INDEX (mode)];
43732 return false;
43733
43734 case PLUS:
43735 if (GET_MODE_CLASS (mode) == MODE_INT
43736 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
43737 {
43738 if (GET_CODE (XEXP (x, 0)) == PLUS
43739 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
43740 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
43741 && CONSTANT_P (XEXP (x, 1)))
43742 {
43743 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
43744 if (val == 2 || val == 4 || val == 8)
43745 {
43746 *total = cost->lea;
43747 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
43748 outer_code, opno, speed);
43749 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
43750 outer_code, opno, speed);
43751 *total += rtx_cost (XEXP (x, 1), mode,
43752 outer_code, opno, speed);
43753 return true;
43754 }
43755 }
43756 else if (GET_CODE (XEXP (x, 0)) == MULT
43757 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
43758 {
43759 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
43760 if (val == 2 || val == 4 || val == 8)
43761 {
43762 *total = cost->lea;
43763 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
43764 outer_code, opno, speed);
43765 *total += rtx_cost (XEXP (x, 1), mode,
43766 outer_code, opno, speed);
43767 return true;
43768 }
43769 }
43770 else if (GET_CODE (XEXP (x, 0)) == PLUS)
43771 {
43772 *total = cost->lea;
43773 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
43774 outer_code, opno, speed);
43775 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
43776 outer_code, opno, speed);
43777 *total += rtx_cost (XEXP (x, 1), mode,
43778 outer_code, opno, speed);
43779 return true;
43780 }
43781 }
43782 /* FALLTHRU */
43783
43784 case MINUS:
43785 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43786 {
43787 /* ??? SSE cost should be used here. */
43788 *total = cost->fadd;
43789 return false;
43790 }
43791 else if (X87_FLOAT_MODE_P (mode))
43792 {
43793 *total = cost->fadd;
43794 return false;
43795 }
43796 else if (FLOAT_MODE_P (mode))
43797 {
43798 /* ??? SSE vector cost should be used here. */
43799 *total = cost->fadd;
43800 return false;
43801 }
43802 /* FALLTHRU */
43803
43804 case AND:
43805 case IOR:
43806 case XOR:
43807 if (GET_MODE_CLASS (mode) == MODE_INT
43808 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43809 {
43810 *total = (cost->add * 2
43811 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
43812 << (GET_MODE (XEXP (x, 0)) != DImode))
43813 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
43814 << (GET_MODE (XEXP (x, 1)) != DImode)));
43815 return true;
43816 }
43817 /* FALLTHRU */
43818
43819 case NEG:
43820 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43821 {
43822 /* ??? SSE cost should be used here. */
43823 *total = cost->fchs;
43824 return false;
43825 }
43826 else if (X87_FLOAT_MODE_P (mode))
43827 {
43828 *total = cost->fchs;
43829 return false;
43830 }
43831 else if (FLOAT_MODE_P (mode))
43832 {
43833 /* ??? SSE vector cost should be used here. */
43834 *total = cost->fchs;
43835 return false;
43836 }
43837 /* FALLTHRU */
43838
43839 case NOT:
43840 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43841 {
43842 /* ??? Should be SSE vector operation cost. */
43843 /* At least for published AMD latencies, this really is the same
43844 as the latency for a simple fpu operation like fabs. */
43845 *total = cost->fabs;
43846 }
43847 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43848 *total = cost->add * 2;
43849 else
43850 *total = cost->add;
43851 return false;
43852
43853 case COMPARE:
43854 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
43855 && XEXP (XEXP (x, 0), 1) == const1_rtx
43856 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
43857 && XEXP (x, 1) == const0_rtx)
43858 {
43859 /* This kind of construct is implemented using test[bwl].
43860 Treat it as if we had an AND. */
43861 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
43862 *total = (cost->add
43863 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
43864 opno, speed)
43865 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
43866 return true;
43867 }
43868
43869 /* The embedded comparison operand is completely free. */
43870 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
43871 && XEXP (x, 1) == const0_rtx)
43872 *total = 0;
43873
43874 return false;
43875
43876 case FLOAT_EXTEND:
43877 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
43878 *total = 0;
43879 return false;
43880
43881 case ABS:
43882 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43883 /* ??? SSE cost should be used here. */
43884 *total = cost->fabs;
43885 else if (X87_FLOAT_MODE_P (mode))
43886 *total = cost->fabs;
43887 else if (FLOAT_MODE_P (mode))
43888 /* ??? SSE vector cost should be used here. */
43889 *total = cost->fabs;
43890 return false;
43891
43892 case SQRT:
43893 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43894 /* ??? SSE cost should be used here. */
43895 *total = cost->fsqrt;
43896 else if (X87_FLOAT_MODE_P (mode))
43897 *total = cost->fsqrt;
43898 else if (FLOAT_MODE_P (mode))
43899 /* ??? SSE vector cost should be used here. */
43900 *total = cost->fsqrt;
43901 return false;
43902
43903 case UNSPEC:
43904 if (XINT (x, 1) == UNSPEC_TP)
43905 *total = 0;
43906 return false;
43907
43908 case VEC_SELECT:
43909 case VEC_CONCAT:
43910 case VEC_DUPLICATE:
43911 /* ??? Assume all of these vector manipulation patterns are
43912 recognizable. In which case they all pretty much have the
43913 same cost. */
43914 *total = cost->fabs;
43915 return true;
43916 case VEC_MERGE:
43917 mask = XEXP (x, 2);
43918 /* This is masked instruction, assume the same cost,
43919 as nonmasked variant. */
43920 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
43921 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
43922 else
43923 *total = cost->fabs;
43924 return true;
43925
43926 default:
43927 return false;
43928 }
43929 }
43930
43931 #if TARGET_MACHO
43932
43933 static int current_machopic_label_num;
43934
43935 /* Given a symbol name and its associated stub, write out the
43936 definition of the stub. */
43937
43938 void
43939 machopic_output_stub (FILE *file, const char *symb, const char *stub)
43940 {
43941 unsigned int length;
43942 char *binder_name, *symbol_name, lazy_ptr_name[32];
43943 int label = ++current_machopic_label_num;
43944
43945 /* For 64-bit we shouldn't get here. */
43946 gcc_assert (!TARGET_64BIT);
43947
43948 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
43949 symb = targetm.strip_name_encoding (symb);
43950
43951 length = strlen (stub);
43952 binder_name = XALLOCAVEC (char, length + 32);
43953 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
43954
43955 length = strlen (symb);
43956 symbol_name = XALLOCAVEC (char, length + 32);
43957 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
43958
43959 sprintf (lazy_ptr_name, "L%d$lz", label);
43960
43961 if (MACHOPIC_ATT_STUB)
43962 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
43963 else if (MACHOPIC_PURE)
43964 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
43965 else
43966 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
43967
43968 fprintf (file, "%s:\n", stub);
43969 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
43970
43971 if (MACHOPIC_ATT_STUB)
43972 {
43973 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
43974 }
43975 else if (MACHOPIC_PURE)
43976 {
43977 /* PIC stub. */
43978 /* 25-byte PIC stub using "CALL get_pc_thunk". */
43979 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
43980 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
43981 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
43982 label, lazy_ptr_name, label);
43983 fprintf (file, "\tjmp\t*%%ecx\n");
43984 }
43985 else
43986 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
43987
43988 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
43989 it needs no stub-binding-helper. */
43990 if (MACHOPIC_ATT_STUB)
43991 return;
43992
43993 fprintf (file, "%s:\n", binder_name);
43994
43995 if (MACHOPIC_PURE)
43996 {
43997 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
43998 fprintf (file, "\tpushl\t%%ecx\n");
43999 }
44000 else
44001 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44002
44003 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44004
44005 /* N.B. Keep the correspondence of these
44006 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44007 old-pic/new-pic/non-pic stubs; altering this will break
44008 compatibility with existing dylibs. */
44009 if (MACHOPIC_PURE)
44010 {
44011 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44012 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44013 }
44014 else
44015 /* 16-byte -mdynamic-no-pic stub. */
44016 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44017
44018 fprintf (file, "%s:\n", lazy_ptr_name);
44019 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44020 fprintf (file, ASM_LONG "%s\n", binder_name);
44021 }
44022 #endif /* TARGET_MACHO */
44023
44024 /* Order the registers for register allocator. */
44025
44026 void
44027 x86_order_regs_for_local_alloc (void)
44028 {
44029 int pos = 0;
44030 int i;
44031
44032 /* First allocate the local general purpose registers. */
44033 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44034 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44035 reg_alloc_order [pos++] = i;
44036
44037 /* Global general purpose registers. */
44038 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44039 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44040 reg_alloc_order [pos++] = i;
44041
44042 /* x87 registers come first in case we are doing FP math
44043 using them. */
44044 if (!TARGET_SSE_MATH)
44045 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44046 reg_alloc_order [pos++] = i;
44047
44048 /* SSE registers. */
44049 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44050 reg_alloc_order [pos++] = i;
44051 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44052 reg_alloc_order [pos++] = i;
44053
44054 /* Extended REX SSE registers. */
44055 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44056 reg_alloc_order [pos++] = i;
44057
44058 /* Mask register. */
44059 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44060 reg_alloc_order [pos++] = i;
44061
44062 /* MPX bound registers. */
44063 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44064 reg_alloc_order [pos++] = i;
44065
44066 /* x87 registers. */
44067 if (TARGET_SSE_MATH)
44068 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44069 reg_alloc_order [pos++] = i;
44070
44071 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44072 reg_alloc_order [pos++] = i;
44073
44074 /* Initialize the rest of array as we do not allocate some registers
44075 at all. */
44076 while (pos < FIRST_PSEUDO_REGISTER)
44077 reg_alloc_order [pos++] = 0;
44078 }
44079
44080 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44081 in struct attribute_spec handler. */
44082 static tree
44083 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44084 tree args,
44085 int,
44086 bool *no_add_attrs)
44087 {
44088 if (TREE_CODE (*node) != FUNCTION_TYPE
44089 && TREE_CODE (*node) != METHOD_TYPE
44090 && TREE_CODE (*node) != FIELD_DECL
44091 && TREE_CODE (*node) != TYPE_DECL)
44092 {
44093 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44094 name);
44095 *no_add_attrs = true;
44096 return NULL_TREE;
44097 }
44098 if (TARGET_64BIT)
44099 {
44100 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44101 name);
44102 *no_add_attrs = true;
44103 return NULL_TREE;
44104 }
44105 if (is_attribute_p ("callee_pop_aggregate_return", name))
44106 {
44107 tree cst;
44108
44109 cst = TREE_VALUE (args);
44110 if (TREE_CODE (cst) != INTEGER_CST)
44111 {
44112 warning (OPT_Wattributes,
44113 "%qE attribute requires an integer constant argument",
44114 name);
44115 *no_add_attrs = true;
44116 }
44117 else if (compare_tree_int (cst, 0) != 0
44118 && compare_tree_int (cst, 1) != 0)
44119 {
44120 warning (OPT_Wattributes,
44121 "argument to %qE attribute is neither zero, nor one",
44122 name);
44123 *no_add_attrs = true;
44124 }
44125
44126 return NULL_TREE;
44127 }
44128
44129 return NULL_TREE;
44130 }
44131
44132 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44133 struct attribute_spec.handler. */
44134 static tree
44135 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44136 bool *no_add_attrs)
44137 {
44138 if (TREE_CODE (*node) != FUNCTION_TYPE
44139 && TREE_CODE (*node) != METHOD_TYPE
44140 && TREE_CODE (*node) != FIELD_DECL
44141 && TREE_CODE (*node) != TYPE_DECL)
44142 {
44143 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44144 name);
44145 *no_add_attrs = true;
44146 return NULL_TREE;
44147 }
44148
44149 /* Can combine regparm with all attributes but fastcall. */
44150 if (is_attribute_p ("ms_abi", name))
44151 {
44152 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44153 {
44154 error ("ms_abi and sysv_abi attributes are not compatible");
44155 }
44156
44157 return NULL_TREE;
44158 }
44159 else if (is_attribute_p ("sysv_abi", name))
44160 {
44161 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44162 {
44163 error ("ms_abi and sysv_abi attributes are not compatible");
44164 }
44165
44166 return NULL_TREE;
44167 }
44168
44169 return NULL_TREE;
44170 }
44171
44172 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44173 struct attribute_spec.handler. */
44174 static tree
44175 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44176 bool *no_add_attrs)
44177 {
44178 tree *type = NULL;
44179 if (DECL_P (*node))
44180 {
44181 if (TREE_CODE (*node) == TYPE_DECL)
44182 type = &TREE_TYPE (*node);
44183 }
44184 else
44185 type = node;
44186
44187 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44188 {
44189 warning (OPT_Wattributes, "%qE attribute ignored",
44190 name);
44191 *no_add_attrs = true;
44192 }
44193
44194 else if ((is_attribute_p ("ms_struct", name)
44195 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44196 || ((is_attribute_p ("gcc_struct", name)
44197 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44198 {
44199 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44200 name);
44201 *no_add_attrs = true;
44202 }
44203
44204 return NULL_TREE;
44205 }
44206
44207 static tree
44208 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44209 bool *no_add_attrs)
44210 {
44211 if (TREE_CODE (*node) != FUNCTION_DECL)
44212 {
44213 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44214 name);
44215 *no_add_attrs = true;
44216 }
44217 return NULL_TREE;
44218 }
44219
44220 static bool
44221 ix86_ms_bitfield_layout_p (const_tree record_type)
44222 {
44223 return ((TARGET_MS_BITFIELD_LAYOUT
44224 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44225 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44226 }
44227
44228 /* Returns an expression indicating where the this parameter is
44229 located on entry to the FUNCTION. */
44230
44231 static rtx
44232 x86_this_parameter (tree function)
44233 {
44234 tree type = TREE_TYPE (function);
44235 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44236 int nregs;
44237
44238 if (TARGET_64BIT)
44239 {
44240 const int *parm_regs;
44241
44242 if (ix86_function_type_abi (type) == MS_ABI)
44243 parm_regs = x86_64_ms_abi_int_parameter_registers;
44244 else
44245 parm_regs = x86_64_int_parameter_registers;
44246 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44247 }
44248
44249 nregs = ix86_function_regparm (type, function);
44250
44251 if (nregs > 0 && !stdarg_p (type))
44252 {
44253 int regno;
44254 unsigned int ccvt = ix86_get_callcvt (type);
44255
44256 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44257 regno = aggr ? DX_REG : CX_REG;
44258 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44259 {
44260 regno = CX_REG;
44261 if (aggr)
44262 return gen_rtx_MEM (SImode,
44263 plus_constant (Pmode, stack_pointer_rtx, 4));
44264 }
44265 else
44266 {
44267 regno = AX_REG;
44268 if (aggr)
44269 {
44270 regno = DX_REG;
44271 if (nregs == 1)
44272 return gen_rtx_MEM (SImode,
44273 plus_constant (Pmode,
44274 stack_pointer_rtx, 4));
44275 }
44276 }
44277 return gen_rtx_REG (SImode, regno);
44278 }
44279
44280 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44281 aggr ? 8 : 4));
44282 }
44283
44284 /* Determine whether x86_output_mi_thunk can succeed. */
44285
44286 static bool
44287 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44288 const_tree function)
44289 {
44290 /* 64-bit can handle anything. */
44291 if (TARGET_64BIT)
44292 return true;
44293
44294 /* For 32-bit, everything's fine if we have one free register. */
44295 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44296 return true;
44297
44298 /* Need a free register for vcall_offset. */
44299 if (vcall_offset)
44300 return false;
44301
44302 /* Need a free register for GOT references. */
44303 if (flag_pic && !targetm.binds_local_p (function))
44304 return false;
44305
44306 /* Otherwise ok. */
44307 return true;
44308 }
44309
44310 /* Output the assembler code for a thunk function. THUNK_DECL is the
44311 declaration for the thunk function itself, FUNCTION is the decl for
44312 the target function. DELTA is an immediate constant offset to be
44313 added to THIS. If VCALL_OFFSET is nonzero, the word at
44314 *(*this + vcall_offset) should be added to THIS. */
44315
44316 static void
44317 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44318 HOST_WIDE_INT vcall_offset, tree function)
44319 {
44320 rtx this_param = x86_this_parameter (function);
44321 rtx this_reg, tmp, fnaddr;
44322 unsigned int tmp_regno;
44323 rtx_insn *insn;
44324
44325 if (TARGET_64BIT)
44326 tmp_regno = R10_REG;
44327 else
44328 {
44329 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44330 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44331 tmp_regno = AX_REG;
44332 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44333 tmp_regno = DX_REG;
44334 else
44335 tmp_regno = CX_REG;
44336 }
44337
44338 emit_note (NOTE_INSN_PROLOGUE_END);
44339
44340 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44341 pull it in now and let DELTA benefit. */
44342 if (REG_P (this_param))
44343 this_reg = this_param;
44344 else if (vcall_offset)
44345 {
44346 /* Put the this parameter into %eax. */
44347 this_reg = gen_rtx_REG (Pmode, AX_REG);
44348 emit_move_insn (this_reg, this_param);
44349 }
44350 else
44351 this_reg = NULL_RTX;
44352
44353 /* Adjust the this parameter by a fixed constant. */
44354 if (delta)
44355 {
44356 rtx delta_rtx = GEN_INT (delta);
44357 rtx delta_dst = this_reg ? this_reg : this_param;
44358
44359 if (TARGET_64BIT)
44360 {
44361 if (!x86_64_general_operand (delta_rtx, Pmode))
44362 {
44363 tmp = gen_rtx_REG (Pmode, tmp_regno);
44364 emit_move_insn (tmp, delta_rtx);
44365 delta_rtx = tmp;
44366 }
44367 }
44368
44369 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44370 }
44371
44372 /* Adjust the this parameter by a value stored in the vtable. */
44373 if (vcall_offset)
44374 {
44375 rtx vcall_addr, vcall_mem, this_mem;
44376
44377 tmp = gen_rtx_REG (Pmode, tmp_regno);
44378
44379 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44380 if (Pmode != ptr_mode)
44381 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44382 emit_move_insn (tmp, this_mem);
44383
44384 /* Adjust the this parameter. */
44385 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44386 if (TARGET_64BIT
44387 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44388 {
44389 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44390 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44391 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44392 }
44393
44394 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44395 if (Pmode != ptr_mode)
44396 emit_insn (gen_addsi_1_zext (this_reg,
44397 gen_rtx_REG (ptr_mode,
44398 REGNO (this_reg)),
44399 vcall_mem));
44400 else
44401 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44402 }
44403
44404 /* If necessary, drop THIS back to its stack slot. */
44405 if (this_reg && this_reg != this_param)
44406 emit_move_insn (this_param, this_reg);
44407
44408 fnaddr = XEXP (DECL_RTL (function), 0);
44409 if (TARGET_64BIT)
44410 {
44411 if (!flag_pic || targetm.binds_local_p (function)
44412 || TARGET_PECOFF)
44413 ;
44414 else
44415 {
44416 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44417 tmp = gen_rtx_CONST (Pmode, tmp);
44418 fnaddr = gen_const_mem (Pmode, tmp);
44419 }
44420 }
44421 else
44422 {
44423 if (!flag_pic || targetm.binds_local_p (function))
44424 ;
44425 #if TARGET_MACHO
44426 else if (TARGET_MACHO)
44427 {
44428 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44429 fnaddr = XEXP (fnaddr, 0);
44430 }
44431 #endif /* TARGET_MACHO */
44432 else
44433 {
44434 tmp = gen_rtx_REG (Pmode, CX_REG);
44435 output_set_got (tmp, NULL_RTX);
44436
44437 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44438 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44439 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44440 fnaddr = gen_const_mem (Pmode, fnaddr);
44441 }
44442 }
44443
44444 /* Our sibling call patterns do not allow memories, because we have no
44445 predicate that can distinguish between frame and non-frame memory.
44446 For our purposes here, we can get away with (ab)using a jump pattern,
44447 because we're going to do no optimization. */
44448 if (MEM_P (fnaddr))
44449 {
44450 if (sibcall_insn_operand (fnaddr, word_mode))
44451 {
44452 fnaddr = XEXP (DECL_RTL (function), 0);
44453 tmp = gen_rtx_MEM (QImode, fnaddr);
44454 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44455 tmp = emit_call_insn (tmp);
44456 SIBLING_CALL_P (tmp) = 1;
44457 }
44458 else
44459 emit_jump_insn (gen_indirect_jump (fnaddr));
44460 }
44461 else
44462 {
44463 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44464 {
44465 // CM_LARGE_PIC always uses pseudo PIC register which is
44466 // uninitialized. Since FUNCTION is local and calling it
44467 // doesn't go through PLT, we use scratch register %r11 as
44468 // PIC register and initialize it here.
44469 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44470 ix86_init_large_pic_reg (tmp_regno);
44471 fnaddr = legitimize_pic_address (fnaddr,
44472 gen_rtx_REG (Pmode, tmp_regno));
44473 }
44474
44475 if (!sibcall_insn_operand (fnaddr, word_mode))
44476 {
44477 tmp = gen_rtx_REG (word_mode, tmp_regno);
44478 if (GET_MODE (fnaddr) != word_mode)
44479 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44480 emit_move_insn (tmp, fnaddr);
44481 fnaddr = tmp;
44482 }
44483
44484 tmp = gen_rtx_MEM (QImode, fnaddr);
44485 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44486 tmp = emit_call_insn (tmp);
44487 SIBLING_CALL_P (tmp) = 1;
44488 }
44489 emit_barrier ();
44490
44491 /* Emit just enough of rest_of_compilation to get the insns emitted.
44492 Note that use_thunk calls assemble_start_function et al. */
44493 insn = get_insns ();
44494 shorten_branches (insn);
44495 final_start_function (insn, file, 1);
44496 final (insn, file, 1);
44497 final_end_function ();
44498 }
44499
44500 static void
44501 x86_file_start (void)
44502 {
44503 default_file_start ();
44504 if (TARGET_16BIT)
44505 fputs ("\t.code16gcc\n", asm_out_file);
44506 #if TARGET_MACHO
44507 darwin_file_start ();
44508 #endif
44509 if (X86_FILE_START_VERSION_DIRECTIVE)
44510 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44511 if (X86_FILE_START_FLTUSED)
44512 fputs ("\t.global\t__fltused\n", asm_out_file);
44513 if (ix86_asm_dialect == ASM_INTEL)
44514 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44515 }
44516
44517 int
44518 x86_field_alignment (tree field, int computed)
44519 {
44520 machine_mode mode;
44521 tree type = TREE_TYPE (field);
44522
44523 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44524 return computed;
44525 if (TARGET_IAMCU)
44526 return iamcu_alignment (type, computed);
44527 mode = TYPE_MODE (strip_array_types (type));
44528 if (mode == DFmode || mode == DCmode
44529 || GET_MODE_CLASS (mode) == MODE_INT
44530 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44531 return MIN (32, computed);
44532 return computed;
44533 }
44534
44535 /* Print call to TARGET to FILE. */
44536
44537 static void
44538 x86_print_call_or_nop (FILE *file, const char *target)
44539 {
44540 if (flag_nop_mcount)
44541 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44542 else
44543 fprintf (file, "1:\tcall\t%s\n", target);
44544 }
44545
44546 /* Output assembler code to FILE to increment profiler label # LABELNO
44547 for profiling a function entry. */
44548 void
44549 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44550 {
44551 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44552 : MCOUNT_NAME);
44553 if (TARGET_64BIT)
44554 {
44555 #ifndef NO_PROFILE_COUNTERS
44556 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44557 #endif
44558
44559 if (!TARGET_PECOFF && flag_pic)
44560 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44561 else
44562 x86_print_call_or_nop (file, mcount_name);
44563 }
44564 else if (flag_pic)
44565 {
44566 #ifndef NO_PROFILE_COUNTERS
44567 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44568 LPREFIX, labelno);
44569 #endif
44570 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44571 }
44572 else
44573 {
44574 #ifndef NO_PROFILE_COUNTERS
44575 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44576 LPREFIX, labelno);
44577 #endif
44578 x86_print_call_or_nop (file, mcount_name);
44579 }
44580
44581 if (flag_record_mcount)
44582 {
44583 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44584 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44585 fprintf (file, "\t.previous\n");
44586 }
44587 }
44588
44589 /* We don't have exact information about the insn sizes, but we may assume
44590 quite safely that we are informed about all 1 byte insns and memory
44591 address sizes. This is enough to eliminate unnecessary padding in
44592 99% of cases. */
44593
44594 static int
44595 min_insn_size (rtx_insn *insn)
44596 {
44597 int l = 0, len;
44598
44599 if (!INSN_P (insn) || !active_insn_p (insn))
44600 return 0;
44601
44602 /* Discard alignments we've emit and jump instructions. */
44603 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44604 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44605 return 0;
44606
44607 /* Important case - calls are always 5 bytes.
44608 It is common to have many calls in the row. */
44609 if (CALL_P (insn)
44610 && symbolic_reference_mentioned_p (PATTERN (insn))
44611 && !SIBLING_CALL_P (insn))
44612 return 5;
44613 len = get_attr_length (insn);
44614 if (len <= 1)
44615 return 1;
44616
44617 /* For normal instructions we rely on get_attr_length being exact,
44618 with a few exceptions. */
44619 if (!JUMP_P (insn))
44620 {
44621 enum attr_type type = get_attr_type (insn);
44622
44623 switch (type)
44624 {
44625 case TYPE_MULTI:
44626 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44627 || asm_noperands (PATTERN (insn)) >= 0)
44628 return 0;
44629 break;
44630 case TYPE_OTHER:
44631 case TYPE_FCMP:
44632 break;
44633 default:
44634 /* Otherwise trust get_attr_length. */
44635 return len;
44636 }
44637
44638 l = get_attr_length_address (insn);
44639 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44640 l = 4;
44641 }
44642 if (l)
44643 return 1+l;
44644 else
44645 return 2;
44646 }
44647
44648 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44649
44650 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44651 window. */
44652
44653 static void
44654 ix86_avoid_jump_mispredicts (void)
44655 {
44656 rtx_insn *insn, *start = get_insns ();
44657 int nbytes = 0, njumps = 0;
44658 bool isjump = false;
44659
44660 /* Look for all minimal intervals of instructions containing 4 jumps.
44661 The intervals are bounded by START and INSN. NBYTES is the total
44662 size of instructions in the interval including INSN and not including
44663 START. When the NBYTES is smaller than 16 bytes, it is possible
44664 that the end of START and INSN ends up in the same 16byte page.
44665
44666 The smallest offset in the page INSN can start is the case where START
44667 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44668 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44669
44670 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44671 have to, control transfer to label(s) can be performed through other
44672 means, and also we estimate minimum length of all asm stmts as 0. */
44673 for (insn = start; insn; insn = NEXT_INSN (insn))
44674 {
44675 int min_size;
44676
44677 if (LABEL_P (insn))
44678 {
44679 int align = label_to_alignment (insn);
44680 int max_skip = label_to_max_skip (insn);
44681
44682 if (max_skip > 15)
44683 max_skip = 15;
44684 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
44685 already in the current 16 byte page, because otherwise
44686 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
44687 bytes to reach 16 byte boundary. */
44688 if (align <= 0
44689 || (align <= 3 && max_skip != (1 << align) - 1))
44690 max_skip = 0;
44691 if (dump_file)
44692 fprintf (dump_file, "Label %i with max_skip %i\n",
44693 INSN_UID (insn), max_skip);
44694 if (max_skip)
44695 {
44696 while (nbytes + max_skip >= 16)
44697 {
44698 start = NEXT_INSN (start);
44699 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
44700 || CALL_P (start))
44701 njumps--, isjump = true;
44702 else
44703 isjump = false;
44704 nbytes -= min_insn_size (start);
44705 }
44706 }
44707 continue;
44708 }
44709
44710 min_size = min_insn_size (insn);
44711 nbytes += min_size;
44712 if (dump_file)
44713 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
44714 INSN_UID (insn), min_size);
44715 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
44716 || CALL_P (insn))
44717 njumps++;
44718 else
44719 continue;
44720
44721 while (njumps > 3)
44722 {
44723 start = NEXT_INSN (start);
44724 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
44725 || CALL_P (start))
44726 njumps--, isjump = true;
44727 else
44728 isjump = false;
44729 nbytes -= min_insn_size (start);
44730 }
44731 gcc_assert (njumps >= 0);
44732 if (dump_file)
44733 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
44734 INSN_UID (start), INSN_UID (insn), nbytes);
44735
44736 if (njumps == 3 && isjump && nbytes < 16)
44737 {
44738 int padsize = 15 - nbytes + min_insn_size (insn);
44739
44740 if (dump_file)
44741 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
44742 INSN_UID (insn), padsize);
44743 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
44744 }
44745 }
44746 }
44747 #endif
44748
44749 /* AMD Athlon works faster
44750 when RET is not destination of conditional jump or directly preceded
44751 by other jump instruction. We avoid the penalty by inserting NOP just
44752 before the RET instructions in such cases. */
44753 static void
44754 ix86_pad_returns (void)
44755 {
44756 edge e;
44757 edge_iterator ei;
44758
44759 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
44760 {
44761 basic_block bb = e->src;
44762 rtx_insn *ret = BB_END (bb);
44763 rtx_insn *prev;
44764 bool replace = false;
44765
44766 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
44767 || optimize_bb_for_size_p (bb))
44768 continue;
44769 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
44770 if (active_insn_p (prev) || LABEL_P (prev))
44771 break;
44772 if (prev && LABEL_P (prev))
44773 {
44774 edge e;
44775 edge_iterator ei;
44776
44777 FOR_EACH_EDGE (e, ei, bb->preds)
44778 if (EDGE_FREQUENCY (e) && e->src->index >= 0
44779 && !(e->flags & EDGE_FALLTHRU))
44780 {
44781 replace = true;
44782 break;
44783 }
44784 }
44785 if (!replace)
44786 {
44787 prev = prev_active_insn (ret);
44788 if (prev
44789 && ((JUMP_P (prev) && any_condjump_p (prev))
44790 || CALL_P (prev)))
44791 replace = true;
44792 /* Empty functions get branch mispredict even when
44793 the jump destination is not visible to us. */
44794 if (!prev && !optimize_function_for_size_p (cfun))
44795 replace = true;
44796 }
44797 if (replace)
44798 {
44799 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
44800 delete_insn (ret);
44801 }
44802 }
44803 }
44804
44805 /* Count the minimum number of instructions in BB. Return 4 if the
44806 number of instructions >= 4. */
44807
44808 static int
44809 ix86_count_insn_bb (basic_block bb)
44810 {
44811 rtx_insn *insn;
44812 int insn_count = 0;
44813
44814 /* Count number of instructions in this block. Return 4 if the number
44815 of instructions >= 4. */
44816 FOR_BB_INSNS (bb, insn)
44817 {
44818 /* Only happen in exit blocks. */
44819 if (JUMP_P (insn)
44820 && ANY_RETURN_P (PATTERN (insn)))
44821 break;
44822
44823 if (NONDEBUG_INSN_P (insn)
44824 && GET_CODE (PATTERN (insn)) != USE
44825 && GET_CODE (PATTERN (insn)) != CLOBBER)
44826 {
44827 insn_count++;
44828 if (insn_count >= 4)
44829 return insn_count;
44830 }
44831 }
44832
44833 return insn_count;
44834 }
44835
44836
44837 /* Count the minimum number of instructions in code path in BB.
44838 Return 4 if the number of instructions >= 4. */
44839
44840 static int
44841 ix86_count_insn (basic_block bb)
44842 {
44843 edge e;
44844 edge_iterator ei;
44845 int min_prev_count;
44846
44847 /* Only bother counting instructions along paths with no
44848 more than 2 basic blocks between entry and exit. Given
44849 that BB has an edge to exit, determine if a predecessor
44850 of BB has an edge from entry. If so, compute the number
44851 of instructions in the predecessor block. If there
44852 happen to be multiple such blocks, compute the minimum. */
44853 min_prev_count = 4;
44854 FOR_EACH_EDGE (e, ei, bb->preds)
44855 {
44856 edge prev_e;
44857 edge_iterator prev_ei;
44858
44859 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
44860 {
44861 min_prev_count = 0;
44862 break;
44863 }
44864 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
44865 {
44866 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
44867 {
44868 int count = ix86_count_insn_bb (e->src);
44869 if (count < min_prev_count)
44870 min_prev_count = count;
44871 break;
44872 }
44873 }
44874 }
44875
44876 if (min_prev_count < 4)
44877 min_prev_count += ix86_count_insn_bb (bb);
44878
44879 return min_prev_count;
44880 }
44881
44882 /* Pad short function to 4 instructions. */
44883
44884 static void
44885 ix86_pad_short_function (void)
44886 {
44887 edge e;
44888 edge_iterator ei;
44889
44890 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
44891 {
44892 rtx_insn *ret = BB_END (e->src);
44893 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
44894 {
44895 int insn_count = ix86_count_insn (e->src);
44896
44897 /* Pad short function. */
44898 if (insn_count < 4)
44899 {
44900 rtx_insn *insn = ret;
44901
44902 /* Find epilogue. */
44903 while (insn
44904 && (!NOTE_P (insn)
44905 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
44906 insn = PREV_INSN (insn);
44907
44908 if (!insn)
44909 insn = ret;
44910
44911 /* Two NOPs count as one instruction. */
44912 insn_count = 2 * (4 - insn_count);
44913 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
44914 }
44915 }
44916 }
44917 }
44918
44919 /* Fix up a Windows system unwinder issue. If an EH region falls through into
44920 the epilogue, the Windows system unwinder will apply epilogue logic and
44921 produce incorrect offsets. This can be avoided by adding a nop between
44922 the last insn that can throw and the first insn of the epilogue. */
44923
44924 static void
44925 ix86_seh_fixup_eh_fallthru (void)
44926 {
44927 edge e;
44928 edge_iterator ei;
44929
44930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
44931 {
44932 rtx_insn *insn, *next;
44933
44934 /* Find the beginning of the epilogue. */
44935 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
44936 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
44937 break;
44938 if (insn == NULL)
44939 continue;
44940
44941 /* We only care about preceding insns that can throw. */
44942 insn = prev_active_insn (insn);
44943 if (insn == NULL || !can_throw_internal (insn))
44944 continue;
44945
44946 /* Do not separate calls from their debug information. */
44947 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
44948 if (NOTE_P (next)
44949 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
44950 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
44951 insn = next;
44952 else
44953 break;
44954
44955 emit_insn_after (gen_nops (const1_rtx), insn);
44956 }
44957 }
44958
44959 /* Implement machine specific optimizations. We implement padding of returns
44960 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
44961 static void
44962 ix86_reorg (void)
44963 {
44964 /* We are freeing block_for_insn in the toplev to keep compatibility
44965 with old MDEP_REORGS that are not CFG based. Recompute it now. */
44966 compute_bb_for_insn ();
44967
44968 if (TARGET_SEH && current_function_has_exception_handlers ())
44969 ix86_seh_fixup_eh_fallthru ();
44970
44971 if (optimize && optimize_function_for_speed_p (cfun))
44972 {
44973 if (TARGET_PAD_SHORT_FUNCTION)
44974 ix86_pad_short_function ();
44975 else if (TARGET_PAD_RETURNS)
44976 ix86_pad_returns ();
44977 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44978 if (TARGET_FOUR_JUMP_LIMIT)
44979 ix86_avoid_jump_mispredicts ();
44980 #endif
44981 }
44982 }
44983
44984 /* Return nonzero when QImode register that must be represented via REX prefix
44985 is used. */
44986 bool
44987 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
44988 {
44989 int i;
44990 extract_insn_cached (insn);
44991 for (i = 0; i < recog_data.n_operands; i++)
44992 if (GENERAL_REG_P (recog_data.operand[i])
44993 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
44994 return true;
44995 return false;
44996 }
44997
44998 /* Return true when INSN mentions register that must be encoded using REX
44999 prefix. */
45000 bool
45001 x86_extended_reg_mentioned_p (rtx insn)
45002 {
45003 subrtx_iterator::array_type array;
45004 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45005 {
45006 const_rtx x = *iter;
45007 if (REG_P (x)
45008 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45009 return true;
45010 }
45011 return false;
45012 }
45013
45014 /* If profitable, negate (without causing overflow) integer constant
45015 of mode MODE at location LOC. Return true in this case. */
45016 bool
45017 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45018 {
45019 HOST_WIDE_INT val;
45020
45021 if (!CONST_INT_P (*loc))
45022 return false;
45023
45024 switch (mode)
45025 {
45026 case DImode:
45027 /* DImode x86_64 constants must fit in 32 bits. */
45028 gcc_assert (x86_64_immediate_operand (*loc, mode));
45029
45030 mode = SImode;
45031 break;
45032
45033 case SImode:
45034 case HImode:
45035 case QImode:
45036 break;
45037
45038 default:
45039 gcc_unreachable ();
45040 }
45041
45042 /* Avoid overflows. */
45043 if (mode_signbit_p (mode, *loc))
45044 return false;
45045
45046 val = INTVAL (*loc);
45047
45048 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45049 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45050 if ((val < 0 && val != -128)
45051 || val == 128)
45052 {
45053 *loc = GEN_INT (-val);
45054 return true;
45055 }
45056
45057 return false;
45058 }
45059
45060 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45061 optabs would emit if we didn't have TFmode patterns. */
45062
45063 void
45064 x86_emit_floatuns (rtx operands[2])
45065 {
45066 rtx_code_label *neglab, *donelab;
45067 rtx i0, i1, f0, in, out;
45068 machine_mode mode, inmode;
45069
45070 inmode = GET_MODE (operands[1]);
45071 gcc_assert (inmode == SImode || inmode == DImode);
45072
45073 out = operands[0];
45074 in = force_reg (inmode, operands[1]);
45075 mode = GET_MODE (out);
45076 neglab = gen_label_rtx ();
45077 donelab = gen_label_rtx ();
45078 f0 = gen_reg_rtx (mode);
45079
45080 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45081
45082 expand_float (out, in, 0);
45083
45084 emit_jump_insn (gen_jump (donelab));
45085 emit_barrier ();
45086
45087 emit_label (neglab);
45088
45089 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45090 1, OPTAB_DIRECT);
45091 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45092 1, OPTAB_DIRECT);
45093 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45094
45095 expand_float (f0, i0, 0);
45096
45097 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45098
45099 emit_label (donelab);
45100 }
45101 \f
45102 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45103 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45104 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45105 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45106
45107 /* Get a vector mode of the same size as the original but with elements
45108 twice as wide. This is only guaranteed to apply to integral vectors. */
45109
45110 static inline machine_mode
45111 get_mode_wider_vector (machine_mode o)
45112 {
45113 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45114 machine_mode n = GET_MODE_WIDER_MODE (o);
45115 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45116 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45117 return n;
45118 }
45119
45120 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45121 fill target with val via vec_duplicate. */
45122
45123 static bool
45124 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45125 {
45126 bool ok;
45127 rtx_insn *insn;
45128 rtx dup;
45129
45130 /* First attempt to recognize VAL as-is. */
45131 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45132 insn = emit_insn (gen_rtx_SET (target, dup));
45133 if (recog_memoized (insn) < 0)
45134 {
45135 rtx_insn *seq;
45136 /* If that fails, force VAL into a register. */
45137
45138 start_sequence ();
45139 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45140 seq = get_insns ();
45141 end_sequence ();
45142 if (seq)
45143 emit_insn_before (seq, insn);
45144
45145 ok = recog_memoized (insn) >= 0;
45146 gcc_assert (ok);
45147 }
45148 return true;
45149 }
45150
45151 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45152 with all elements equal to VAR. Return true if successful. */
45153
45154 static bool
45155 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45156 rtx target, rtx val)
45157 {
45158 bool ok;
45159
45160 switch (mode)
45161 {
45162 case V2SImode:
45163 case V2SFmode:
45164 if (!mmx_ok)
45165 return false;
45166 /* FALLTHRU */
45167
45168 case V4DFmode:
45169 case V4DImode:
45170 case V8SFmode:
45171 case V8SImode:
45172 case V2DFmode:
45173 case V2DImode:
45174 case V4SFmode:
45175 case V4SImode:
45176 case V16SImode:
45177 case V8DImode:
45178 case V16SFmode:
45179 case V8DFmode:
45180 return ix86_vector_duplicate_value (mode, target, val);
45181
45182 case V4HImode:
45183 if (!mmx_ok)
45184 return false;
45185 if (TARGET_SSE || TARGET_3DNOW_A)
45186 {
45187 rtx x;
45188
45189 val = gen_lowpart (SImode, val);
45190 x = gen_rtx_TRUNCATE (HImode, val);
45191 x = gen_rtx_VEC_DUPLICATE (mode, x);
45192 emit_insn (gen_rtx_SET (target, x));
45193 return true;
45194 }
45195 goto widen;
45196
45197 case V8QImode:
45198 if (!mmx_ok)
45199 return false;
45200 goto widen;
45201
45202 case V8HImode:
45203 if (TARGET_AVX2)
45204 return ix86_vector_duplicate_value (mode, target, val);
45205
45206 if (TARGET_SSE2)
45207 {
45208 struct expand_vec_perm_d dperm;
45209 rtx tmp1, tmp2;
45210
45211 permute:
45212 memset (&dperm, 0, sizeof (dperm));
45213 dperm.target = target;
45214 dperm.vmode = mode;
45215 dperm.nelt = GET_MODE_NUNITS (mode);
45216 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45217 dperm.one_operand_p = true;
45218
45219 /* Extend to SImode using a paradoxical SUBREG. */
45220 tmp1 = gen_reg_rtx (SImode);
45221 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45222
45223 /* Insert the SImode value as low element of a V4SImode vector. */
45224 tmp2 = gen_reg_rtx (V4SImode);
45225 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45226 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45227
45228 ok = (expand_vec_perm_1 (&dperm)
45229 || expand_vec_perm_broadcast_1 (&dperm));
45230 gcc_assert (ok);
45231 return ok;
45232 }
45233 goto widen;
45234
45235 case V16QImode:
45236 if (TARGET_AVX2)
45237 return ix86_vector_duplicate_value (mode, target, val);
45238
45239 if (TARGET_SSE2)
45240 goto permute;
45241 goto widen;
45242
45243 widen:
45244 /* Replicate the value once into the next wider mode and recurse. */
45245 {
45246 machine_mode smode, wsmode, wvmode;
45247 rtx x;
45248
45249 smode = GET_MODE_INNER (mode);
45250 wvmode = get_mode_wider_vector (mode);
45251 wsmode = GET_MODE_INNER (wvmode);
45252
45253 val = convert_modes (wsmode, smode, val, true);
45254 x = expand_simple_binop (wsmode, ASHIFT, val,
45255 GEN_INT (GET_MODE_BITSIZE (smode)),
45256 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45257 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45258
45259 x = gen_reg_rtx (wvmode);
45260 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45261 gcc_assert (ok);
45262 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45263 return ok;
45264 }
45265
45266 case V16HImode:
45267 case V32QImode:
45268 if (TARGET_AVX2)
45269 return ix86_vector_duplicate_value (mode, target, val);
45270 else
45271 {
45272 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45273 rtx x = gen_reg_rtx (hvmode);
45274
45275 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45276 gcc_assert (ok);
45277
45278 x = gen_rtx_VEC_CONCAT (mode, x, x);
45279 emit_insn (gen_rtx_SET (target, x));
45280 }
45281 return true;
45282
45283 case V64QImode:
45284 case V32HImode:
45285 if (TARGET_AVX512BW)
45286 return ix86_vector_duplicate_value (mode, target, val);
45287 else
45288 {
45289 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45290 rtx x = gen_reg_rtx (hvmode);
45291
45292 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45293 gcc_assert (ok);
45294
45295 x = gen_rtx_VEC_CONCAT (mode, x, x);
45296 emit_insn (gen_rtx_SET (target, x));
45297 }
45298 return true;
45299
45300 default:
45301 return false;
45302 }
45303 }
45304
45305 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45306 whose ONE_VAR element is VAR, and other elements are zero. Return true
45307 if successful. */
45308
45309 static bool
45310 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45311 rtx target, rtx var, int one_var)
45312 {
45313 machine_mode vsimode;
45314 rtx new_target;
45315 rtx x, tmp;
45316 bool use_vector_set = false;
45317
45318 switch (mode)
45319 {
45320 case V2DImode:
45321 /* For SSE4.1, we normally use vector set. But if the second
45322 element is zero and inter-unit moves are OK, we use movq
45323 instead. */
45324 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45325 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45326 && one_var == 0));
45327 break;
45328 case V16QImode:
45329 case V4SImode:
45330 case V4SFmode:
45331 use_vector_set = TARGET_SSE4_1;
45332 break;
45333 case V8HImode:
45334 use_vector_set = TARGET_SSE2;
45335 break;
45336 case V4HImode:
45337 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45338 break;
45339 case V32QImode:
45340 case V16HImode:
45341 case V8SImode:
45342 case V8SFmode:
45343 case V4DFmode:
45344 use_vector_set = TARGET_AVX;
45345 break;
45346 case V4DImode:
45347 /* Use ix86_expand_vector_set in 64bit mode only. */
45348 use_vector_set = TARGET_AVX && TARGET_64BIT;
45349 break;
45350 default:
45351 break;
45352 }
45353
45354 if (use_vector_set)
45355 {
45356 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45357 var = force_reg (GET_MODE_INNER (mode), var);
45358 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45359 return true;
45360 }
45361
45362 switch (mode)
45363 {
45364 case V2SFmode:
45365 case V2SImode:
45366 if (!mmx_ok)
45367 return false;
45368 /* FALLTHRU */
45369
45370 case V2DFmode:
45371 case V2DImode:
45372 if (one_var != 0)
45373 return false;
45374 var = force_reg (GET_MODE_INNER (mode), var);
45375 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45376 emit_insn (gen_rtx_SET (target, x));
45377 return true;
45378
45379 case V4SFmode:
45380 case V4SImode:
45381 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45382 new_target = gen_reg_rtx (mode);
45383 else
45384 new_target = target;
45385 var = force_reg (GET_MODE_INNER (mode), var);
45386 x = gen_rtx_VEC_DUPLICATE (mode, var);
45387 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45388 emit_insn (gen_rtx_SET (new_target, x));
45389 if (one_var != 0)
45390 {
45391 /* We need to shuffle the value to the correct position, so
45392 create a new pseudo to store the intermediate result. */
45393
45394 /* With SSE2, we can use the integer shuffle insns. */
45395 if (mode != V4SFmode && TARGET_SSE2)
45396 {
45397 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45398 const1_rtx,
45399 GEN_INT (one_var == 1 ? 0 : 1),
45400 GEN_INT (one_var == 2 ? 0 : 1),
45401 GEN_INT (one_var == 3 ? 0 : 1)));
45402 if (target != new_target)
45403 emit_move_insn (target, new_target);
45404 return true;
45405 }
45406
45407 /* Otherwise convert the intermediate result to V4SFmode and
45408 use the SSE1 shuffle instructions. */
45409 if (mode != V4SFmode)
45410 {
45411 tmp = gen_reg_rtx (V4SFmode);
45412 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45413 }
45414 else
45415 tmp = new_target;
45416
45417 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45418 const1_rtx,
45419 GEN_INT (one_var == 1 ? 0 : 1),
45420 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45421 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45422
45423 if (mode != V4SFmode)
45424 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45425 else if (tmp != target)
45426 emit_move_insn (target, tmp);
45427 }
45428 else if (target != new_target)
45429 emit_move_insn (target, new_target);
45430 return true;
45431
45432 case V8HImode:
45433 case V16QImode:
45434 vsimode = V4SImode;
45435 goto widen;
45436 case V4HImode:
45437 case V8QImode:
45438 if (!mmx_ok)
45439 return false;
45440 vsimode = V2SImode;
45441 goto widen;
45442 widen:
45443 if (one_var != 0)
45444 return false;
45445
45446 /* Zero extend the variable element to SImode and recurse. */
45447 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45448
45449 x = gen_reg_rtx (vsimode);
45450 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45451 var, one_var))
45452 gcc_unreachable ();
45453
45454 emit_move_insn (target, gen_lowpart (mode, x));
45455 return true;
45456
45457 default:
45458 return false;
45459 }
45460 }
45461
45462 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45463 consisting of the values in VALS. It is known that all elements
45464 except ONE_VAR are constants. Return true if successful. */
45465
45466 static bool
45467 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45468 rtx target, rtx vals, int one_var)
45469 {
45470 rtx var = XVECEXP (vals, 0, one_var);
45471 machine_mode wmode;
45472 rtx const_vec, x;
45473
45474 const_vec = copy_rtx (vals);
45475 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
45476 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
45477
45478 switch (mode)
45479 {
45480 case V2DFmode:
45481 case V2DImode:
45482 case V2SFmode:
45483 case V2SImode:
45484 /* For the two element vectors, it's just as easy to use
45485 the general case. */
45486 return false;
45487
45488 case V4DImode:
45489 /* Use ix86_expand_vector_set in 64bit mode only. */
45490 if (!TARGET_64BIT)
45491 return false;
45492 case V4DFmode:
45493 case V8SFmode:
45494 case V8SImode:
45495 case V16HImode:
45496 case V32QImode:
45497 case V4SFmode:
45498 case V4SImode:
45499 case V8HImode:
45500 case V4HImode:
45501 break;
45502
45503 case V16QImode:
45504 if (TARGET_SSE4_1)
45505 break;
45506 wmode = V8HImode;
45507 goto widen;
45508 case V8QImode:
45509 wmode = V4HImode;
45510 goto widen;
45511 widen:
45512 /* There's no way to set one QImode entry easily. Combine
45513 the variable value with its adjacent constant value, and
45514 promote to an HImode set. */
45515 x = XVECEXP (vals, 0, one_var ^ 1);
45516 if (one_var & 1)
45517 {
45518 var = convert_modes (HImode, QImode, var, true);
45519 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
45520 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45521 x = GEN_INT (INTVAL (x) & 0xff);
45522 }
45523 else
45524 {
45525 var = convert_modes (HImode, QImode, var, true);
45526 x = gen_int_mode (INTVAL (x) << 8, HImode);
45527 }
45528 if (x != const0_rtx)
45529 var = expand_simple_binop (HImode, IOR, var, x, var,
45530 1, OPTAB_LIB_WIDEN);
45531
45532 x = gen_reg_rtx (wmode);
45533 emit_move_insn (x, gen_lowpart (wmode, const_vec));
45534 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
45535
45536 emit_move_insn (target, gen_lowpart (mode, x));
45537 return true;
45538
45539 default:
45540 return false;
45541 }
45542
45543 emit_move_insn (target, const_vec);
45544 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45545 return true;
45546 }
45547
45548 /* A subroutine of ix86_expand_vector_init_general. Use vector
45549 concatenate to handle the most general case: all values variable,
45550 and none identical. */
45551
45552 static void
45553 ix86_expand_vector_init_concat (machine_mode mode,
45554 rtx target, rtx *ops, int n)
45555 {
45556 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
45557 rtx first[16], second[8], third[4];
45558 rtvec v;
45559 int i, j;
45560
45561 switch (n)
45562 {
45563 case 2:
45564 switch (mode)
45565 {
45566 case V16SImode:
45567 cmode = V8SImode;
45568 break;
45569 case V16SFmode:
45570 cmode = V8SFmode;
45571 break;
45572 case V8DImode:
45573 cmode = V4DImode;
45574 break;
45575 case V8DFmode:
45576 cmode = V4DFmode;
45577 break;
45578 case V8SImode:
45579 cmode = V4SImode;
45580 break;
45581 case V8SFmode:
45582 cmode = V4SFmode;
45583 break;
45584 case V4DImode:
45585 cmode = V2DImode;
45586 break;
45587 case V4DFmode:
45588 cmode = V2DFmode;
45589 break;
45590 case V4SImode:
45591 cmode = V2SImode;
45592 break;
45593 case V4SFmode:
45594 cmode = V2SFmode;
45595 break;
45596 case V2DImode:
45597 cmode = DImode;
45598 break;
45599 case V2SImode:
45600 cmode = SImode;
45601 break;
45602 case V2DFmode:
45603 cmode = DFmode;
45604 break;
45605 case V2SFmode:
45606 cmode = SFmode;
45607 break;
45608 default:
45609 gcc_unreachable ();
45610 }
45611
45612 if (!register_operand (ops[1], cmode))
45613 ops[1] = force_reg (cmode, ops[1]);
45614 if (!register_operand (ops[0], cmode))
45615 ops[0] = force_reg (cmode, ops[0]);
45616 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
45617 ops[1])));
45618 break;
45619
45620 case 4:
45621 switch (mode)
45622 {
45623 case V4DImode:
45624 cmode = V2DImode;
45625 break;
45626 case V4DFmode:
45627 cmode = V2DFmode;
45628 break;
45629 case V4SImode:
45630 cmode = V2SImode;
45631 break;
45632 case V4SFmode:
45633 cmode = V2SFmode;
45634 break;
45635 default:
45636 gcc_unreachable ();
45637 }
45638 goto half;
45639
45640 case 8:
45641 switch (mode)
45642 {
45643 case V8DImode:
45644 cmode = V2DImode;
45645 hmode = V4DImode;
45646 break;
45647 case V8DFmode:
45648 cmode = V2DFmode;
45649 hmode = V4DFmode;
45650 break;
45651 case V8SImode:
45652 cmode = V2SImode;
45653 hmode = V4SImode;
45654 break;
45655 case V8SFmode:
45656 cmode = V2SFmode;
45657 hmode = V4SFmode;
45658 break;
45659 default:
45660 gcc_unreachable ();
45661 }
45662 goto half;
45663
45664 case 16:
45665 switch (mode)
45666 {
45667 case V16SImode:
45668 cmode = V2SImode;
45669 hmode = V4SImode;
45670 gmode = V8SImode;
45671 break;
45672 case V16SFmode:
45673 cmode = V2SFmode;
45674 hmode = V4SFmode;
45675 gmode = V8SFmode;
45676 break;
45677 default:
45678 gcc_unreachable ();
45679 }
45680 goto half;
45681
45682 half:
45683 /* FIXME: We process inputs backward to help RA. PR 36222. */
45684 i = n - 1;
45685 j = (n >> 1) - 1;
45686 for (; i > 0; i -= 2, j--)
45687 {
45688 first[j] = gen_reg_rtx (cmode);
45689 v = gen_rtvec (2, ops[i - 1], ops[i]);
45690 ix86_expand_vector_init (false, first[j],
45691 gen_rtx_PARALLEL (cmode, v));
45692 }
45693
45694 n >>= 1;
45695 if (n > 4)
45696 {
45697 gcc_assert (hmode != VOIDmode);
45698 gcc_assert (gmode != VOIDmode);
45699 for (i = j = 0; i < n; i += 2, j++)
45700 {
45701 second[j] = gen_reg_rtx (hmode);
45702 ix86_expand_vector_init_concat (hmode, second [j],
45703 &first [i], 2);
45704 }
45705 n >>= 1;
45706 for (i = j = 0; i < n; i += 2, j++)
45707 {
45708 third[j] = gen_reg_rtx (gmode);
45709 ix86_expand_vector_init_concat (gmode, third[j],
45710 &second[i], 2);
45711 }
45712 n >>= 1;
45713 ix86_expand_vector_init_concat (mode, target, third, n);
45714 }
45715 else if (n > 2)
45716 {
45717 gcc_assert (hmode != VOIDmode);
45718 for (i = j = 0; i < n; i += 2, j++)
45719 {
45720 second[j] = gen_reg_rtx (hmode);
45721 ix86_expand_vector_init_concat (hmode, second [j],
45722 &first [i], 2);
45723 }
45724 n >>= 1;
45725 ix86_expand_vector_init_concat (mode, target, second, n);
45726 }
45727 else
45728 ix86_expand_vector_init_concat (mode, target, first, n);
45729 break;
45730
45731 default:
45732 gcc_unreachable ();
45733 }
45734 }
45735
45736 /* A subroutine of ix86_expand_vector_init_general. Use vector
45737 interleave to handle the most general case: all values variable,
45738 and none identical. */
45739
45740 static void
45741 ix86_expand_vector_init_interleave (machine_mode mode,
45742 rtx target, rtx *ops, int n)
45743 {
45744 machine_mode first_imode, second_imode, third_imode, inner_mode;
45745 int i, j;
45746 rtx op0, op1;
45747 rtx (*gen_load_even) (rtx, rtx, rtx);
45748 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
45749 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
45750
45751 switch (mode)
45752 {
45753 case V8HImode:
45754 gen_load_even = gen_vec_setv8hi;
45755 gen_interleave_first_low = gen_vec_interleave_lowv4si;
45756 gen_interleave_second_low = gen_vec_interleave_lowv2di;
45757 inner_mode = HImode;
45758 first_imode = V4SImode;
45759 second_imode = V2DImode;
45760 third_imode = VOIDmode;
45761 break;
45762 case V16QImode:
45763 gen_load_even = gen_vec_setv16qi;
45764 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
45765 gen_interleave_second_low = gen_vec_interleave_lowv4si;
45766 inner_mode = QImode;
45767 first_imode = V8HImode;
45768 second_imode = V4SImode;
45769 third_imode = V2DImode;
45770 break;
45771 default:
45772 gcc_unreachable ();
45773 }
45774
45775 for (i = 0; i < n; i++)
45776 {
45777 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
45778 op0 = gen_reg_rtx (SImode);
45779 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
45780
45781 /* Insert the SImode value as low element of V4SImode vector. */
45782 op1 = gen_reg_rtx (V4SImode);
45783 op0 = gen_rtx_VEC_MERGE (V4SImode,
45784 gen_rtx_VEC_DUPLICATE (V4SImode,
45785 op0),
45786 CONST0_RTX (V4SImode),
45787 const1_rtx);
45788 emit_insn (gen_rtx_SET (op1, op0));
45789
45790 /* Cast the V4SImode vector back to a vector in orignal mode. */
45791 op0 = gen_reg_rtx (mode);
45792 emit_move_insn (op0, gen_lowpart (mode, op1));
45793
45794 /* Load even elements into the second position. */
45795 emit_insn (gen_load_even (op0,
45796 force_reg (inner_mode,
45797 ops [i + i + 1]),
45798 const1_rtx));
45799
45800 /* Cast vector to FIRST_IMODE vector. */
45801 ops[i] = gen_reg_rtx (first_imode);
45802 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
45803 }
45804
45805 /* Interleave low FIRST_IMODE vectors. */
45806 for (i = j = 0; i < n; i += 2, j++)
45807 {
45808 op0 = gen_reg_rtx (first_imode);
45809 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
45810
45811 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
45812 ops[j] = gen_reg_rtx (second_imode);
45813 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
45814 }
45815
45816 /* Interleave low SECOND_IMODE vectors. */
45817 switch (second_imode)
45818 {
45819 case V4SImode:
45820 for (i = j = 0; i < n / 2; i += 2, j++)
45821 {
45822 op0 = gen_reg_rtx (second_imode);
45823 emit_insn (gen_interleave_second_low (op0, ops[i],
45824 ops[i + 1]));
45825
45826 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
45827 vector. */
45828 ops[j] = gen_reg_rtx (third_imode);
45829 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
45830 }
45831 second_imode = V2DImode;
45832 gen_interleave_second_low = gen_vec_interleave_lowv2di;
45833 /* FALLTHRU */
45834
45835 case V2DImode:
45836 op0 = gen_reg_rtx (second_imode);
45837 emit_insn (gen_interleave_second_low (op0, ops[0],
45838 ops[1]));
45839
45840 /* Cast the SECOND_IMODE vector back to a vector on original
45841 mode. */
45842 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
45843 break;
45844
45845 default:
45846 gcc_unreachable ();
45847 }
45848 }
45849
45850 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
45851 all values variable, and none identical. */
45852
45853 static void
45854 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
45855 rtx target, rtx vals)
45856 {
45857 rtx ops[64], op0, op1, op2, op3, op4, op5;
45858 machine_mode half_mode = VOIDmode;
45859 machine_mode quarter_mode = VOIDmode;
45860 int n, i;
45861
45862 switch (mode)
45863 {
45864 case V2SFmode:
45865 case V2SImode:
45866 if (!mmx_ok && !TARGET_SSE)
45867 break;
45868 /* FALLTHRU */
45869
45870 case V16SImode:
45871 case V16SFmode:
45872 case V8DFmode:
45873 case V8DImode:
45874 case V8SFmode:
45875 case V8SImode:
45876 case V4DFmode:
45877 case V4DImode:
45878 case V4SFmode:
45879 case V4SImode:
45880 case V2DFmode:
45881 case V2DImode:
45882 n = GET_MODE_NUNITS (mode);
45883 for (i = 0; i < n; i++)
45884 ops[i] = XVECEXP (vals, 0, i);
45885 ix86_expand_vector_init_concat (mode, target, ops, n);
45886 return;
45887
45888 case V32QImode:
45889 half_mode = V16QImode;
45890 goto half;
45891
45892 case V16HImode:
45893 half_mode = V8HImode;
45894 goto half;
45895
45896 half:
45897 n = GET_MODE_NUNITS (mode);
45898 for (i = 0; i < n; i++)
45899 ops[i] = XVECEXP (vals, 0, i);
45900 op0 = gen_reg_rtx (half_mode);
45901 op1 = gen_reg_rtx (half_mode);
45902 ix86_expand_vector_init_interleave (half_mode, op0, ops,
45903 n >> 2);
45904 ix86_expand_vector_init_interleave (half_mode, op1,
45905 &ops [n >> 1], n >> 2);
45906 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
45907 return;
45908
45909 case V64QImode:
45910 quarter_mode = V16QImode;
45911 half_mode = V32QImode;
45912 goto quarter;
45913
45914 case V32HImode:
45915 quarter_mode = V8HImode;
45916 half_mode = V16HImode;
45917 goto quarter;
45918
45919 quarter:
45920 n = GET_MODE_NUNITS (mode);
45921 for (i = 0; i < n; i++)
45922 ops[i] = XVECEXP (vals, 0, i);
45923 op0 = gen_reg_rtx (quarter_mode);
45924 op1 = gen_reg_rtx (quarter_mode);
45925 op2 = gen_reg_rtx (quarter_mode);
45926 op3 = gen_reg_rtx (quarter_mode);
45927 op4 = gen_reg_rtx (half_mode);
45928 op5 = gen_reg_rtx (half_mode);
45929 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
45930 n >> 3);
45931 ix86_expand_vector_init_interleave (quarter_mode, op1,
45932 &ops [n >> 2], n >> 3);
45933 ix86_expand_vector_init_interleave (quarter_mode, op2,
45934 &ops [n >> 1], n >> 3);
45935 ix86_expand_vector_init_interleave (quarter_mode, op3,
45936 &ops [(n >> 1) | (n >> 2)], n >> 3);
45937 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
45938 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
45939 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
45940 return;
45941
45942 case V16QImode:
45943 if (!TARGET_SSE4_1)
45944 break;
45945 /* FALLTHRU */
45946
45947 case V8HImode:
45948 if (!TARGET_SSE2)
45949 break;
45950
45951 /* Don't use ix86_expand_vector_init_interleave if we can't
45952 move from GPR to SSE register directly. */
45953 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
45954 break;
45955
45956 n = GET_MODE_NUNITS (mode);
45957 for (i = 0; i < n; i++)
45958 ops[i] = XVECEXP (vals, 0, i);
45959 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
45960 return;
45961
45962 case V4HImode:
45963 case V8QImode:
45964 break;
45965
45966 default:
45967 gcc_unreachable ();
45968 }
45969
45970 {
45971 int i, j, n_elts, n_words, n_elt_per_word;
45972 machine_mode inner_mode;
45973 rtx words[4], shift;
45974
45975 inner_mode = GET_MODE_INNER (mode);
45976 n_elts = GET_MODE_NUNITS (mode);
45977 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
45978 n_elt_per_word = n_elts / n_words;
45979 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
45980
45981 for (i = 0; i < n_words; ++i)
45982 {
45983 rtx word = NULL_RTX;
45984
45985 for (j = 0; j < n_elt_per_word; ++j)
45986 {
45987 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
45988 elt = convert_modes (word_mode, inner_mode, elt, true);
45989
45990 if (j == 0)
45991 word = elt;
45992 else
45993 {
45994 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
45995 word, 1, OPTAB_LIB_WIDEN);
45996 word = expand_simple_binop (word_mode, IOR, word, elt,
45997 word, 1, OPTAB_LIB_WIDEN);
45998 }
45999 }
46000
46001 words[i] = word;
46002 }
46003
46004 if (n_words == 1)
46005 emit_move_insn (target, gen_lowpart (mode, words[0]));
46006 else if (n_words == 2)
46007 {
46008 rtx tmp = gen_reg_rtx (mode);
46009 emit_clobber (tmp);
46010 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46011 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46012 emit_move_insn (target, tmp);
46013 }
46014 else if (n_words == 4)
46015 {
46016 rtx tmp = gen_reg_rtx (V4SImode);
46017 gcc_assert (word_mode == SImode);
46018 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46019 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46020 emit_move_insn (target, gen_lowpart (mode, tmp));
46021 }
46022 else
46023 gcc_unreachable ();
46024 }
46025 }
46026
46027 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46028 instructions unless MMX_OK is true. */
46029
46030 void
46031 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46032 {
46033 machine_mode mode = GET_MODE (target);
46034 machine_mode inner_mode = GET_MODE_INNER (mode);
46035 int n_elts = GET_MODE_NUNITS (mode);
46036 int n_var = 0, one_var = -1;
46037 bool all_same = true, all_const_zero = true;
46038 int i;
46039 rtx x;
46040
46041 for (i = 0; i < n_elts; ++i)
46042 {
46043 x = XVECEXP (vals, 0, i);
46044 if (!(CONST_SCALAR_INT_P (x)
46045 || CONST_DOUBLE_P (x)
46046 || CONST_FIXED_P (x)))
46047 n_var++, one_var = i;
46048 else if (x != CONST0_RTX (inner_mode))
46049 all_const_zero = false;
46050 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46051 all_same = false;
46052 }
46053
46054 /* Constants are best loaded from the constant pool. */
46055 if (n_var == 0)
46056 {
46057 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46058 return;
46059 }
46060
46061 /* If all values are identical, broadcast the value. */
46062 if (all_same
46063 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46064 XVECEXP (vals, 0, 0)))
46065 return;
46066
46067 /* Values where only one field is non-constant are best loaded from
46068 the pool and overwritten via move later. */
46069 if (n_var == 1)
46070 {
46071 if (all_const_zero
46072 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46073 XVECEXP (vals, 0, one_var),
46074 one_var))
46075 return;
46076
46077 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46078 return;
46079 }
46080
46081 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46082 }
46083
46084 void
46085 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46086 {
46087 machine_mode mode = GET_MODE (target);
46088 machine_mode inner_mode = GET_MODE_INNER (mode);
46089 machine_mode half_mode;
46090 bool use_vec_merge = false;
46091 rtx tmp;
46092 static rtx (*gen_extract[6][2]) (rtx, rtx)
46093 = {
46094 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46095 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46096 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46097 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46098 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46099 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46100 };
46101 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46102 = {
46103 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46104 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46105 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46106 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46107 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46108 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46109 };
46110 int i, j, n;
46111 machine_mode mmode = VOIDmode;
46112 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46113
46114 switch (mode)
46115 {
46116 case V2SFmode:
46117 case V2SImode:
46118 if (mmx_ok)
46119 {
46120 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46121 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46122 if (elt == 0)
46123 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46124 else
46125 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46126 emit_insn (gen_rtx_SET (target, tmp));
46127 return;
46128 }
46129 break;
46130
46131 case V2DImode:
46132 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46133 if (use_vec_merge)
46134 break;
46135
46136 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46137 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46138 if (elt == 0)
46139 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46140 else
46141 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46142 emit_insn (gen_rtx_SET (target, tmp));
46143 return;
46144
46145 case V2DFmode:
46146 {
46147 rtx op0, op1;
46148
46149 /* For the two element vectors, we implement a VEC_CONCAT with
46150 the extraction of the other element. */
46151
46152 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46153 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46154
46155 if (elt == 0)
46156 op0 = val, op1 = tmp;
46157 else
46158 op0 = tmp, op1 = val;
46159
46160 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46161 emit_insn (gen_rtx_SET (target, tmp));
46162 }
46163 return;
46164
46165 case V4SFmode:
46166 use_vec_merge = TARGET_SSE4_1;
46167 if (use_vec_merge)
46168 break;
46169
46170 switch (elt)
46171 {
46172 case 0:
46173 use_vec_merge = true;
46174 break;
46175
46176 case 1:
46177 /* tmp = target = A B C D */
46178 tmp = copy_to_reg (target);
46179 /* target = A A B B */
46180 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46181 /* target = X A B B */
46182 ix86_expand_vector_set (false, target, val, 0);
46183 /* target = A X C D */
46184 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46185 const1_rtx, const0_rtx,
46186 GEN_INT (2+4), GEN_INT (3+4)));
46187 return;
46188
46189 case 2:
46190 /* tmp = target = A B C D */
46191 tmp = copy_to_reg (target);
46192 /* tmp = X B C D */
46193 ix86_expand_vector_set (false, tmp, val, 0);
46194 /* target = A B X D */
46195 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46196 const0_rtx, const1_rtx,
46197 GEN_INT (0+4), GEN_INT (3+4)));
46198 return;
46199
46200 case 3:
46201 /* tmp = target = A B C D */
46202 tmp = copy_to_reg (target);
46203 /* tmp = X B C D */
46204 ix86_expand_vector_set (false, tmp, val, 0);
46205 /* target = A B X D */
46206 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46207 const0_rtx, const1_rtx,
46208 GEN_INT (2+4), GEN_INT (0+4)));
46209 return;
46210
46211 default:
46212 gcc_unreachable ();
46213 }
46214 break;
46215
46216 case V4SImode:
46217 use_vec_merge = TARGET_SSE4_1;
46218 if (use_vec_merge)
46219 break;
46220
46221 /* Element 0 handled by vec_merge below. */
46222 if (elt == 0)
46223 {
46224 use_vec_merge = true;
46225 break;
46226 }
46227
46228 if (TARGET_SSE2)
46229 {
46230 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46231 store into element 0, then shuffle them back. */
46232
46233 rtx order[4];
46234
46235 order[0] = GEN_INT (elt);
46236 order[1] = const1_rtx;
46237 order[2] = const2_rtx;
46238 order[3] = GEN_INT (3);
46239 order[elt] = const0_rtx;
46240
46241 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46242 order[1], order[2], order[3]));
46243
46244 ix86_expand_vector_set (false, target, val, 0);
46245
46246 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46247 order[1], order[2], order[3]));
46248 }
46249 else
46250 {
46251 /* For SSE1, we have to reuse the V4SF code. */
46252 rtx t = gen_reg_rtx (V4SFmode);
46253 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46254 emit_move_insn (target, gen_lowpart (mode, t));
46255 }
46256 return;
46257
46258 case V8HImode:
46259 use_vec_merge = TARGET_SSE2;
46260 break;
46261 case V4HImode:
46262 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46263 break;
46264
46265 case V16QImode:
46266 use_vec_merge = TARGET_SSE4_1;
46267 break;
46268
46269 case V8QImode:
46270 break;
46271
46272 case V32QImode:
46273 half_mode = V16QImode;
46274 j = 0;
46275 n = 16;
46276 goto half;
46277
46278 case V16HImode:
46279 half_mode = V8HImode;
46280 j = 1;
46281 n = 8;
46282 goto half;
46283
46284 case V8SImode:
46285 half_mode = V4SImode;
46286 j = 2;
46287 n = 4;
46288 goto half;
46289
46290 case V4DImode:
46291 half_mode = V2DImode;
46292 j = 3;
46293 n = 2;
46294 goto half;
46295
46296 case V8SFmode:
46297 half_mode = V4SFmode;
46298 j = 4;
46299 n = 4;
46300 goto half;
46301
46302 case V4DFmode:
46303 half_mode = V2DFmode;
46304 j = 5;
46305 n = 2;
46306 goto half;
46307
46308 half:
46309 /* Compute offset. */
46310 i = elt / n;
46311 elt %= n;
46312
46313 gcc_assert (i <= 1);
46314
46315 /* Extract the half. */
46316 tmp = gen_reg_rtx (half_mode);
46317 emit_insn (gen_extract[j][i] (tmp, target));
46318
46319 /* Put val in tmp at elt. */
46320 ix86_expand_vector_set (false, tmp, val, elt);
46321
46322 /* Put it back. */
46323 emit_insn (gen_insert[j][i] (target, target, tmp));
46324 return;
46325
46326 case V8DFmode:
46327 if (TARGET_AVX512F)
46328 {
46329 mmode = QImode;
46330 gen_blendm = gen_avx512f_blendmv8df;
46331 }
46332 break;
46333
46334 case V8DImode:
46335 if (TARGET_AVX512F)
46336 {
46337 mmode = QImode;
46338 gen_blendm = gen_avx512f_blendmv8di;
46339 }
46340 break;
46341
46342 case V16SFmode:
46343 if (TARGET_AVX512F)
46344 {
46345 mmode = HImode;
46346 gen_blendm = gen_avx512f_blendmv16sf;
46347 }
46348 break;
46349
46350 case V16SImode:
46351 if (TARGET_AVX512F)
46352 {
46353 mmode = HImode;
46354 gen_blendm = gen_avx512f_blendmv16si;
46355 }
46356 break;
46357
46358 case V32HImode:
46359 if (TARGET_AVX512F && TARGET_AVX512BW)
46360 {
46361 mmode = SImode;
46362 gen_blendm = gen_avx512bw_blendmv32hi;
46363 }
46364 break;
46365
46366 case V64QImode:
46367 if (TARGET_AVX512F && TARGET_AVX512BW)
46368 {
46369 mmode = DImode;
46370 gen_blendm = gen_avx512bw_blendmv64qi;
46371 }
46372 break;
46373
46374 default:
46375 break;
46376 }
46377
46378 if (mmode != VOIDmode)
46379 {
46380 tmp = gen_reg_rtx (mode);
46381 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46382 emit_insn (gen_blendm (target, tmp, target,
46383 force_reg (mmode,
46384 gen_int_mode (1 << elt, mmode))));
46385 }
46386 else if (use_vec_merge)
46387 {
46388 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46389 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46390 emit_insn (gen_rtx_SET (target, tmp));
46391 }
46392 else
46393 {
46394 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46395
46396 emit_move_insn (mem, target);
46397
46398 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46399 emit_move_insn (tmp, val);
46400
46401 emit_move_insn (target, mem);
46402 }
46403 }
46404
46405 void
46406 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46407 {
46408 machine_mode mode = GET_MODE (vec);
46409 machine_mode inner_mode = GET_MODE_INNER (mode);
46410 bool use_vec_extr = false;
46411 rtx tmp;
46412
46413 switch (mode)
46414 {
46415 case V2SImode:
46416 case V2SFmode:
46417 if (!mmx_ok)
46418 break;
46419 /* FALLTHRU */
46420
46421 case V2DFmode:
46422 case V2DImode:
46423 use_vec_extr = true;
46424 break;
46425
46426 case V4SFmode:
46427 use_vec_extr = TARGET_SSE4_1;
46428 if (use_vec_extr)
46429 break;
46430
46431 switch (elt)
46432 {
46433 case 0:
46434 tmp = vec;
46435 break;
46436
46437 case 1:
46438 case 3:
46439 tmp = gen_reg_rtx (mode);
46440 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46441 GEN_INT (elt), GEN_INT (elt),
46442 GEN_INT (elt+4), GEN_INT (elt+4)));
46443 break;
46444
46445 case 2:
46446 tmp = gen_reg_rtx (mode);
46447 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46448 break;
46449
46450 default:
46451 gcc_unreachable ();
46452 }
46453 vec = tmp;
46454 use_vec_extr = true;
46455 elt = 0;
46456 break;
46457
46458 case V4SImode:
46459 use_vec_extr = TARGET_SSE4_1;
46460 if (use_vec_extr)
46461 break;
46462
46463 if (TARGET_SSE2)
46464 {
46465 switch (elt)
46466 {
46467 case 0:
46468 tmp = vec;
46469 break;
46470
46471 case 1:
46472 case 3:
46473 tmp = gen_reg_rtx (mode);
46474 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
46475 GEN_INT (elt), GEN_INT (elt),
46476 GEN_INT (elt), GEN_INT (elt)));
46477 break;
46478
46479 case 2:
46480 tmp = gen_reg_rtx (mode);
46481 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
46482 break;
46483
46484 default:
46485 gcc_unreachable ();
46486 }
46487 vec = tmp;
46488 use_vec_extr = true;
46489 elt = 0;
46490 }
46491 else
46492 {
46493 /* For SSE1, we have to reuse the V4SF code. */
46494 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
46495 gen_lowpart (V4SFmode, vec), elt);
46496 return;
46497 }
46498 break;
46499
46500 case V8HImode:
46501 use_vec_extr = TARGET_SSE2;
46502 break;
46503 case V4HImode:
46504 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46505 break;
46506
46507 case V16QImode:
46508 use_vec_extr = TARGET_SSE4_1;
46509 break;
46510
46511 case V8SFmode:
46512 if (TARGET_AVX)
46513 {
46514 tmp = gen_reg_rtx (V4SFmode);
46515 if (elt < 4)
46516 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
46517 else
46518 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
46519 ix86_expand_vector_extract (false, target, tmp, elt & 3);
46520 return;
46521 }
46522 break;
46523
46524 case V4DFmode:
46525 if (TARGET_AVX)
46526 {
46527 tmp = gen_reg_rtx (V2DFmode);
46528 if (elt < 2)
46529 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
46530 else
46531 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
46532 ix86_expand_vector_extract (false, target, tmp, elt & 1);
46533 return;
46534 }
46535 break;
46536
46537 case V32QImode:
46538 if (TARGET_AVX)
46539 {
46540 tmp = gen_reg_rtx (V16QImode);
46541 if (elt < 16)
46542 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
46543 else
46544 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
46545 ix86_expand_vector_extract (false, target, tmp, elt & 15);
46546 return;
46547 }
46548 break;
46549
46550 case V16HImode:
46551 if (TARGET_AVX)
46552 {
46553 tmp = gen_reg_rtx (V8HImode);
46554 if (elt < 8)
46555 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
46556 else
46557 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
46558 ix86_expand_vector_extract (false, target, tmp, elt & 7);
46559 return;
46560 }
46561 break;
46562
46563 case V8SImode:
46564 if (TARGET_AVX)
46565 {
46566 tmp = gen_reg_rtx (V4SImode);
46567 if (elt < 4)
46568 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
46569 else
46570 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
46571 ix86_expand_vector_extract (false, target, tmp, elt & 3);
46572 return;
46573 }
46574 break;
46575
46576 case V4DImode:
46577 if (TARGET_AVX)
46578 {
46579 tmp = gen_reg_rtx (V2DImode);
46580 if (elt < 2)
46581 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
46582 else
46583 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
46584 ix86_expand_vector_extract (false, target, tmp, elt & 1);
46585 return;
46586 }
46587 break;
46588
46589 case V32HImode:
46590 if (TARGET_AVX512BW)
46591 {
46592 tmp = gen_reg_rtx (V16HImode);
46593 if (elt < 16)
46594 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
46595 else
46596 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
46597 ix86_expand_vector_extract (false, target, tmp, elt & 15);
46598 return;
46599 }
46600 break;
46601
46602 case V64QImode:
46603 if (TARGET_AVX512BW)
46604 {
46605 tmp = gen_reg_rtx (V32QImode);
46606 if (elt < 32)
46607 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
46608 else
46609 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
46610 ix86_expand_vector_extract (false, target, tmp, elt & 31);
46611 return;
46612 }
46613 break;
46614
46615 case V16SFmode:
46616 tmp = gen_reg_rtx (V8SFmode);
46617 if (elt < 8)
46618 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
46619 else
46620 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
46621 ix86_expand_vector_extract (false, target, tmp, elt & 7);
46622 return;
46623
46624 case V8DFmode:
46625 tmp = gen_reg_rtx (V4DFmode);
46626 if (elt < 4)
46627 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
46628 else
46629 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
46630 ix86_expand_vector_extract (false, target, tmp, elt & 3);
46631 return;
46632
46633 case V16SImode:
46634 tmp = gen_reg_rtx (V8SImode);
46635 if (elt < 8)
46636 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
46637 else
46638 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
46639 ix86_expand_vector_extract (false, target, tmp, elt & 7);
46640 return;
46641
46642 case V8DImode:
46643 tmp = gen_reg_rtx (V4DImode);
46644 if (elt < 4)
46645 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
46646 else
46647 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
46648 ix86_expand_vector_extract (false, target, tmp, elt & 3);
46649 return;
46650
46651 case V8QImode:
46652 /* ??? Could extract the appropriate HImode element and shift. */
46653 default:
46654 break;
46655 }
46656
46657 if (use_vec_extr)
46658 {
46659 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
46660 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
46661
46662 /* Let the rtl optimizers know about the zero extension performed. */
46663 if (inner_mode == QImode || inner_mode == HImode)
46664 {
46665 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
46666 target = gen_lowpart (SImode, target);
46667 }
46668
46669 emit_insn (gen_rtx_SET (target, tmp));
46670 }
46671 else
46672 {
46673 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46674
46675 emit_move_insn (mem, vec);
46676
46677 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46678 emit_move_insn (target, tmp);
46679 }
46680 }
46681
46682 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
46683 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
46684 The upper bits of DEST are undefined, though they shouldn't cause
46685 exceptions (some bits from src or all zeros are ok). */
46686
46687 static void
46688 emit_reduc_half (rtx dest, rtx src, int i)
46689 {
46690 rtx tem, d = dest;
46691 switch (GET_MODE (src))
46692 {
46693 case V4SFmode:
46694 if (i == 128)
46695 tem = gen_sse_movhlps (dest, src, src);
46696 else
46697 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
46698 GEN_INT (1 + 4), GEN_INT (1 + 4));
46699 break;
46700 case V2DFmode:
46701 tem = gen_vec_interleave_highv2df (dest, src, src);
46702 break;
46703 case V16QImode:
46704 case V8HImode:
46705 case V4SImode:
46706 case V2DImode:
46707 d = gen_reg_rtx (V1TImode);
46708 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
46709 GEN_INT (i / 2));
46710 break;
46711 case V8SFmode:
46712 if (i == 256)
46713 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
46714 else
46715 tem = gen_avx_shufps256 (dest, src, src,
46716 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
46717 break;
46718 case V4DFmode:
46719 if (i == 256)
46720 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
46721 else
46722 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
46723 break;
46724 case V32QImode:
46725 case V16HImode:
46726 case V8SImode:
46727 case V4DImode:
46728 if (i == 256)
46729 {
46730 if (GET_MODE (dest) != V4DImode)
46731 d = gen_reg_rtx (V4DImode);
46732 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
46733 gen_lowpart (V4DImode, src),
46734 const1_rtx);
46735 }
46736 else
46737 {
46738 d = gen_reg_rtx (V2TImode);
46739 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
46740 GEN_INT (i / 2));
46741 }
46742 break;
46743 case V64QImode:
46744 case V32HImode:
46745 case V16SImode:
46746 case V16SFmode:
46747 case V8DImode:
46748 case V8DFmode:
46749 if (i > 128)
46750 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
46751 gen_lowpart (V16SImode, src),
46752 gen_lowpart (V16SImode, src),
46753 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
46754 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
46755 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
46756 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
46757 GEN_INT (0xC), GEN_INT (0xD),
46758 GEN_INT (0xE), GEN_INT (0xF),
46759 GEN_INT (0x10), GEN_INT (0x11),
46760 GEN_INT (0x12), GEN_INT (0x13),
46761 GEN_INT (0x14), GEN_INT (0x15),
46762 GEN_INT (0x16), GEN_INT (0x17));
46763 else
46764 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
46765 gen_lowpart (V16SImode, src),
46766 GEN_INT (i == 128 ? 0x2 : 0x1),
46767 GEN_INT (0x3),
46768 GEN_INT (0x3),
46769 GEN_INT (0x3),
46770 GEN_INT (i == 128 ? 0x6 : 0x5),
46771 GEN_INT (0x7),
46772 GEN_INT (0x7),
46773 GEN_INT (0x7),
46774 GEN_INT (i == 128 ? 0xA : 0x9),
46775 GEN_INT (0xB),
46776 GEN_INT (0xB),
46777 GEN_INT (0xB),
46778 GEN_INT (i == 128 ? 0xE : 0xD),
46779 GEN_INT (0xF),
46780 GEN_INT (0xF),
46781 GEN_INT (0xF));
46782 break;
46783 default:
46784 gcc_unreachable ();
46785 }
46786 emit_insn (tem);
46787 if (d != dest)
46788 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
46789 }
46790
46791 /* Expand a vector reduction. FN is the binary pattern to reduce;
46792 DEST is the destination; IN is the input vector. */
46793
46794 void
46795 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
46796 {
46797 rtx half, dst, vec = in;
46798 machine_mode mode = GET_MODE (in);
46799 int i;
46800
46801 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
46802 if (TARGET_SSE4_1
46803 && mode == V8HImode
46804 && fn == gen_uminv8hi3)
46805 {
46806 emit_insn (gen_sse4_1_phminposuw (dest, in));
46807 return;
46808 }
46809
46810 for (i = GET_MODE_BITSIZE (mode);
46811 i > GET_MODE_UNIT_BITSIZE (mode);
46812 i >>= 1)
46813 {
46814 half = gen_reg_rtx (mode);
46815 emit_reduc_half (half, vec, i);
46816 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
46817 dst = dest;
46818 else
46819 dst = gen_reg_rtx (mode);
46820 emit_insn (fn (dst, half, vec));
46821 vec = dst;
46822 }
46823 }
46824 \f
46825 /* Target hook for scalar_mode_supported_p. */
46826 static bool
46827 ix86_scalar_mode_supported_p (machine_mode mode)
46828 {
46829 if (DECIMAL_FLOAT_MODE_P (mode))
46830 return default_decimal_float_supported_p ();
46831 else if (mode == TFmode)
46832 return true;
46833 else
46834 return default_scalar_mode_supported_p (mode);
46835 }
46836
46837 /* Implements target hook vector_mode_supported_p. */
46838 static bool
46839 ix86_vector_mode_supported_p (machine_mode mode)
46840 {
46841 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
46842 return true;
46843 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
46844 return true;
46845 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
46846 return true;
46847 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
46848 return true;
46849 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
46850 return true;
46851 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
46852 return true;
46853 return false;
46854 }
46855
46856 /* Implement target hook libgcc_floating_mode_supported_p. */
46857 static bool
46858 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
46859 {
46860 switch (mode)
46861 {
46862 case SFmode:
46863 case DFmode:
46864 case XFmode:
46865 return true;
46866
46867 case TFmode:
46868 #ifdef IX86_NO_LIBGCC_TFMODE
46869 return false;
46870 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
46871 return TARGET_LONG_DOUBLE_128;
46872 #else
46873 return true;
46874 #endif
46875
46876 default:
46877 return false;
46878 }
46879 }
46880
46881 /* Target hook for c_mode_for_suffix. */
46882 static machine_mode
46883 ix86_c_mode_for_suffix (char suffix)
46884 {
46885 if (suffix == 'q')
46886 return TFmode;
46887 if (suffix == 'w')
46888 return XFmode;
46889
46890 return VOIDmode;
46891 }
46892
46893 /* Worker function for TARGET_MD_ASM_ADJUST.
46894
46895 We implement asm flag outputs, and maintain source compatibility
46896 with the old cc0-based compiler. */
46897
46898 static rtx_insn *
46899 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
46900 vec<const char *> &constraints,
46901 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
46902 {
46903 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
46904 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
46905
46906 bool saw_asm_flag = false;
46907
46908 start_sequence ();
46909 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
46910 {
46911 const char *con = constraints[i];
46912 if (strncmp (con, "=@cc", 4) != 0)
46913 continue;
46914 con += 4;
46915 if (strchr (con, ',') != NULL)
46916 {
46917 error ("alternatives not allowed in asm flag output");
46918 continue;
46919 }
46920
46921 bool invert = false;
46922 if (con[0] == 'n')
46923 invert = true, con++;
46924
46925 machine_mode mode = CCmode;
46926 rtx_code code = UNKNOWN;
46927
46928 switch (con[0])
46929 {
46930 case 'a':
46931 if (con[1] == 0)
46932 mode = CCAmode, code = EQ;
46933 else if (con[1] == 'e' && con[2] == 0)
46934 mode = CCCmode, code = EQ;
46935 break;
46936 case 'b':
46937 if (con[1] == 0)
46938 mode = CCCmode, code = EQ;
46939 else if (con[1] == 'e' && con[2] == 0)
46940 mode = CCAmode, code = NE;
46941 break;
46942 case 'c':
46943 if (con[1] == 0)
46944 mode = CCCmode, code = EQ;
46945 break;
46946 case 'e':
46947 if (con[1] == 0)
46948 mode = CCZmode, code = EQ;
46949 break;
46950 case 'g':
46951 if (con[1] == 0)
46952 mode = CCGCmode, code = GT;
46953 else if (con[1] == 'e' && con[2] == 0)
46954 mode = CCGCmode, code = GE;
46955 break;
46956 case 'l':
46957 if (con[1] == 0)
46958 mode = CCGCmode, code = LT;
46959 else if (con[1] == 'e' && con[2] == 0)
46960 mode = CCGCmode, code = LE;
46961 break;
46962 case 'o':
46963 if (con[1] == 0)
46964 mode = CCOmode, code = EQ;
46965 break;
46966 case 'p':
46967 if (con[1] == 0)
46968 mode = CCPmode, code = EQ;
46969 break;
46970 case 's':
46971 if (con[1] == 0)
46972 mode = CCSmode, code = EQ;
46973 break;
46974 case 'z':
46975 if (con[1] == 0)
46976 mode = CCZmode, code = EQ;
46977 break;
46978 }
46979 if (code == UNKNOWN)
46980 {
46981 error ("unknown asm flag output %qs", constraints[i]);
46982 continue;
46983 }
46984 if (invert)
46985 code = reverse_condition (code);
46986
46987 rtx dest = outputs[i];
46988 if (!saw_asm_flag)
46989 {
46990 /* This is the first asm flag output. Here we put the flags
46991 register in as the real output and adjust the condition to
46992 allow it. */
46993 constraints[i] = "=Bf";
46994 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
46995 saw_asm_flag = true;
46996 }
46997 else
46998 {
46999 /* We don't need the flags register as output twice. */
47000 constraints[i] = "=X";
47001 outputs[i] = gen_rtx_SCRATCH (SImode);
47002 }
47003
47004 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47005 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47006
47007 machine_mode dest_mode = GET_MODE (dest);
47008 if (!SCALAR_INT_MODE_P (dest_mode))
47009 {
47010 error ("invalid type for asm flag output");
47011 continue;
47012 }
47013
47014 if (dest_mode == DImode && !TARGET_64BIT)
47015 dest_mode = SImode;
47016
47017 if (dest_mode != QImode)
47018 {
47019 rtx destqi = gen_reg_rtx (QImode);
47020 emit_insn (gen_rtx_SET (destqi, x));
47021
47022 if (TARGET_ZERO_EXTEND_WITH_AND
47023 && optimize_function_for_speed_p (cfun))
47024 {
47025 x = force_reg (dest_mode, const0_rtx);
47026
47027 emit_insn (gen_movstrictqi
47028 (gen_lowpart (QImode, x), destqi));
47029 }
47030 else
47031 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47032 }
47033
47034 if (dest_mode != GET_MODE (dest))
47035 {
47036 rtx tmp = gen_reg_rtx (SImode);
47037
47038 emit_insn (gen_rtx_SET (tmp, x));
47039 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47040 }
47041 else
47042 emit_insn (gen_rtx_SET (dest, x));
47043 }
47044 rtx_insn *seq = get_insns ();
47045 end_sequence ();
47046
47047 if (saw_asm_flag)
47048 return seq;
47049 else
47050 {
47051 /* If we had no asm flag outputs, clobber the flags. */
47052 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47053 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47054 return NULL;
47055 }
47056 }
47057
47058 /* Implements target vector targetm.asm.encode_section_info. */
47059
47060 static void ATTRIBUTE_UNUSED
47061 ix86_encode_section_info (tree decl, rtx rtl, int first)
47062 {
47063 default_encode_section_info (decl, rtl, first);
47064
47065 if (ix86_in_large_data_p (decl))
47066 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47067 }
47068
47069 /* Worker function for REVERSE_CONDITION. */
47070
47071 enum rtx_code
47072 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47073 {
47074 return (mode != CCFPmode && mode != CCFPUmode
47075 ? reverse_condition (code)
47076 : reverse_condition_maybe_unordered (code));
47077 }
47078
47079 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47080 to OPERANDS[0]. */
47081
47082 const char *
47083 output_387_reg_move (rtx insn, rtx *operands)
47084 {
47085 if (REG_P (operands[0]))
47086 {
47087 if (REG_P (operands[1])
47088 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47089 {
47090 if (REGNO (operands[0]) == FIRST_STACK_REG)
47091 return output_387_ffreep (operands, 0);
47092 return "fstp\t%y0";
47093 }
47094 if (STACK_TOP_P (operands[0]))
47095 return "fld%Z1\t%y1";
47096 return "fst\t%y0";
47097 }
47098 else if (MEM_P (operands[0]))
47099 {
47100 gcc_assert (REG_P (operands[1]));
47101 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47102 return "fstp%Z0\t%y0";
47103 else
47104 {
47105 /* There is no non-popping store to memory for XFmode.
47106 So if we need one, follow the store with a load. */
47107 if (GET_MODE (operands[0]) == XFmode)
47108 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47109 else
47110 return "fst%Z0\t%y0";
47111 }
47112 }
47113 else
47114 gcc_unreachable();
47115 }
47116
47117 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47118 FP status register is set. */
47119
47120 void
47121 ix86_emit_fp_unordered_jump (rtx label)
47122 {
47123 rtx reg = gen_reg_rtx (HImode);
47124 rtx temp;
47125
47126 emit_insn (gen_x86_fnstsw_1 (reg));
47127
47128 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47129 {
47130 emit_insn (gen_x86_sahf_1 (reg));
47131
47132 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47133 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47134 }
47135 else
47136 {
47137 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47138
47139 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47140 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47141 }
47142
47143 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47144 gen_rtx_LABEL_REF (VOIDmode, label),
47145 pc_rtx);
47146 temp = gen_rtx_SET (pc_rtx, temp);
47147
47148 emit_jump_insn (temp);
47149 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47150 }
47151
47152 /* Output code to perform a log1p XFmode calculation. */
47153
47154 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47155 {
47156 rtx_code_label *label1 = gen_label_rtx ();
47157 rtx_code_label *label2 = gen_label_rtx ();
47158
47159 rtx tmp = gen_reg_rtx (XFmode);
47160 rtx tmp2 = gen_reg_rtx (XFmode);
47161 rtx test;
47162
47163 emit_insn (gen_absxf2 (tmp, op1));
47164 test = gen_rtx_GE (VOIDmode, tmp,
47165 const_double_from_real_value (
47166 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47167 XFmode));
47168 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47169
47170 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47171 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47172 emit_jump (label2);
47173
47174 emit_label (label1);
47175 emit_move_insn (tmp, CONST1_RTX (XFmode));
47176 emit_insn (gen_addxf3 (tmp, op1, tmp));
47177 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47178 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47179
47180 emit_label (label2);
47181 }
47182
47183 /* Emit code for round calculation. */
47184 void ix86_emit_i387_round (rtx op0, rtx op1)
47185 {
47186 machine_mode inmode = GET_MODE (op1);
47187 machine_mode outmode = GET_MODE (op0);
47188 rtx e1, e2, res, tmp, tmp1, half;
47189 rtx scratch = gen_reg_rtx (HImode);
47190 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47191 rtx_code_label *jump_label = gen_label_rtx ();
47192 rtx insn;
47193 rtx (*gen_abs) (rtx, rtx);
47194 rtx (*gen_neg) (rtx, rtx);
47195
47196 switch (inmode)
47197 {
47198 case SFmode:
47199 gen_abs = gen_abssf2;
47200 break;
47201 case DFmode:
47202 gen_abs = gen_absdf2;
47203 break;
47204 case XFmode:
47205 gen_abs = gen_absxf2;
47206 break;
47207 default:
47208 gcc_unreachable ();
47209 }
47210
47211 switch (outmode)
47212 {
47213 case SFmode:
47214 gen_neg = gen_negsf2;
47215 break;
47216 case DFmode:
47217 gen_neg = gen_negdf2;
47218 break;
47219 case XFmode:
47220 gen_neg = gen_negxf2;
47221 break;
47222 case HImode:
47223 gen_neg = gen_neghi2;
47224 break;
47225 case SImode:
47226 gen_neg = gen_negsi2;
47227 break;
47228 case DImode:
47229 gen_neg = gen_negdi2;
47230 break;
47231 default:
47232 gcc_unreachable ();
47233 }
47234
47235 e1 = gen_reg_rtx (inmode);
47236 e2 = gen_reg_rtx (inmode);
47237 res = gen_reg_rtx (outmode);
47238
47239 half = const_double_from_real_value (dconsthalf, inmode);
47240
47241 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47242
47243 /* scratch = fxam(op1) */
47244 emit_insn (gen_rtx_SET (scratch,
47245 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47246 UNSPEC_FXAM)));
47247 /* e1 = fabs(op1) */
47248 emit_insn (gen_abs (e1, op1));
47249
47250 /* e2 = e1 + 0.5 */
47251 half = force_reg (inmode, half);
47252 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47253
47254 /* res = floor(e2) */
47255 if (inmode != XFmode)
47256 {
47257 tmp1 = gen_reg_rtx (XFmode);
47258
47259 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47260 }
47261 else
47262 tmp1 = e2;
47263
47264 switch (outmode)
47265 {
47266 case SFmode:
47267 case DFmode:
47268 {
47269 rtx tmp0 = gen_reg_rtx (XFmode);
47270
47271 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47272
47273 emit_insn (gen_rtx_SET (res,
47274 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47275 UNSPEC_TRUNC_NOOP)));
47276 }
47277 break;
47278 case XFmode:
47279 emit_insn (gen_frndintxf2_floor (res, tmp1));
47280 break;
47281 case HImode:
47282 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47283 break;
47284 case SImode:
47285 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47286 break;
47287 case DImode:
47288 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47289 break;
47290 default:
47291 gcc_unreachable ();
47292 }
47293
47294 /* flags = signbit(a) */
47295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47296
47297 /* if (flags) then res = -res */
47298 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47299 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47300 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47301 pc_rtx);
47302 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47303 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47304 JUMP_LABEL (insn) = jump_label;
47305
47306 emit_insn (gen_neg (res, res));
47307
47308 emit_label (jump_label);
47309 LABEL_NUSES (jump_label) = 1;
47310
47311 emit_move_insn (op0, res);
47312 }
47313
47314 /* Output code to perform a Newton-Rhapson approximation of a single precision
47315 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47316
47317 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47318 {
47319 rtx x0, x1, e0, e1;
47320
47321 x0 = gen_reg_rtx (mode);
47322 e0 = gen_reg_rtx (mode);
47323 e1 = gen_reg_rtx (mode);
47324 x1 = gen_reg_rtx (mode);
47325
47326 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47327
47328 b = force_reg (mode, b);
47329
47330 /* x0 = rcp(b) estimate */
47331 if (mode == V16SFmode || mode == V8DFmode)
47332 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47333 UNSPEC_RCP14)));
47334 else
47335 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47336 UNSPEC_RCP)));
47337
47338 /* e0 = x0 * b */
47339 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47340
47341 /* e0 = x0 * e0 */
47342 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47343
47344 /* e1 = x0 + x0 */
47345 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47346
47347 /* x1 = e1 - e0 */
47348 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47349
47350 /* res = a * x1 */
47351 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47352 }
47353
47354 /* Output code to perform a Newton-Rhapson approximation of a
47355 single precision floating point [reciprocal] square root. */
47356
47357 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
47358 bool recip)
47359 {
47360 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47361 REAL_VALUE_TYPE r;
47362 int unspec;
47363
47364 x0 = gen_reg_rtx (mode);
47365 e0 = gen_reg_rtx (mode);
47366 e1 = gen_reg_rtx (mode);
47367 e2 = gen_reg_rtx (mode);
47368 e3 = gen_reg_rtx (mode);
47369
47370 real_from_integer (&r, VOIDmode, -3, SIGNED);
47371 mthree = const_double_from_real_value (r, SFmode);
47372
47373 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47374 mhalf = const_double_from_real_value (r, SFmode);
47375 unspec = UNSPEC_RSQRT;
47376
47377 if (VECTOR_MODE_P (mode))
47378 {
47379 mthree = ix86_build_const_vector (mode, true, mthree);
47380 mhalf = ix86_build_const_vector (mode, true, mhalf);
47381 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47382 if (GET_MODE_SIZE (mode) == 64)
47383 unspec = UNSPEC_RSQRT14;
47384 }
47385
47386 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47387 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47388
47389 a = force_reg (mode, a);
47390
47391 /* x0 = rsqrt(a) estimate */
47392 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47393 unspec)));
47394
47395 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47396 if (!recip)
47397 {
47398 rtx zero, mask;
47399
47400 zero = gen_reg_rtx (mode);
47401 mask = gen_reg_rtx (mode);
47402
47403 zero = force_reg (mode, CONST0_RTX(mode));
47404
47405 /* Handle masked compare. */
47406 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47407 {
47408 mask = gen_reg_rtx (HImode);
47409 /* Imm value 0x4 corresponds to not-equal comparison. */
47410 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47411 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47412 }
47413 else
47414 {
47415 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47416
47417 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47418 }
47419 }
47420
47421 /* e0 = x0 * a */
47422 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47423 /* e1 = e0 * x0 */
47424 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47425
47426 /* e2 = e1 - 3. */
47427 mthree = force_reg (mode, mthree);
47428 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47429
47430 mhalf = force_reg (mode, mhalf);
47431 if (recip)
47432 /* e3 = -.5 * x0 */
47433 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47434 else
47435 /* e3 = -.5 * e0 */
47436 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47437 /* ret = e2 * e3 */
47438 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47439 }
47440
47441 #ifdef TARGET_SOLARIS
47442 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47443
47444 static void
47445 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47446 tree decl)
47447 {
47448 /* With Binutils 2.15, the "@unwind" marker must be specified on
47449 every occurrence of the ".eh_frame" section, not just the first
47450 one. */
47451 if (TARGET_64BIT
47452 && strcmp (name, ".eh_frame") == 0)
47453 {
47454 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47455 flags & SECTION_WRITE ? "aw" : "a");
47456 return;
47457 }
47458
47459 #ifndef USE_GAS
47460 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47461 {
47462 solaris_elf_asm_comdat_section (name, flags, decl);
47463 return;
47464 }
47465 #endif
47466
47467 default_elf_asm_named_section (name, flags, decl);
47468 }
47469 #endif /* TARGET_SOLARIS */
47470
47471 /* Return the mangling of TYPE if it is an extended fundamental type. */
47472
47473 static const char *
47474 ix86_mangle_type (const_tree type)
47475 {
47476 type = TYPE_MAIN_VARIANT (type);
47477
47478 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
47479 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
47480 return NULL;
47481
47482 switch (TYPE_MODE (type))
47483 {
47484 case TFmode:
47485 /* __float128 is "g". */
47486 return "g";
47487 case XFmode:
47488 /* "long double" or __float80 is "e". */
47489 return "e";
47490 default:
47491 return NULL;
47492 }
47493 }
47494
47495 /* For 32-bit code we can save PIC register setup by using
47496 __stack_chk_fail_local hidden function instead of calling
47497 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
47498 register, so it is better to call __stack_chk_fail directly. */
47499
47500 static tree ATTRIBUTE_UNUSED
47501 ix86_stack_protect_fail (void)
47502 {
47503 return TARGET_64BIT
47504 ? default_external_stack_protect_fail ()
47505 : default_hidden_stack_protect_fail ();
47506 }
47507
47508 /* Select a format to encode pointers in exception handling data. CODE
47509 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
47510 true if the symbol may be affected by dynamic relocations.
47511
47512 ??? All x86 object file formats are capable of representing this.
47513 After all, the relocation needed is the same as for the call insn.
47514 Whether or not a particular assembler allows us to enter such, I
47515 guess we'll have to see. */
47516 int
47517 asm_preferred_eh_data_format (int code, int global)
47518 {
47519 if (flag_pic)
47520 {
47521 int type = DW_EH_PE_sdata8;
47522 if (!TARGET_64BIT
47523 || ix86_cmodel == CM_SMALL_PIC
47524 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
47525 type = DW_EH_PE_sdata4;
47526 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
47527 }
47528 if (ix86_cmodel == CM_SMALL
47529 || (ix86_cmodel == CM_MEDIUM && code))
47530 return DW_EH_PE_udata4;
47531 return DW_EH_PE_absptr;
47532 }
47533 \f
47534 /* Expand copysign from SIGN to the positive value ABS_VALUE
47535 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
47536 the sign-bit. */
47537 static void
47538 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
47539 {
47540 machine_mode mode = GET_MODE (sign);
47541 rtx sgn = gen_reg_rtx (mode);
47542 if (mask == NULL_RTX)
47543 {
47544 machine_mode vmode;
47545
47546 if (mode == SFmode)
47547 vmode = V4SFmode;
47548 else if (mode == DFmode)
47549 vmode = V2DFmode;
47550 else
47551 vmode = mode;
47552
47553 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
47554 if (!VECTOR_MODE_P (mode))
47555 {
47556 /* We need to generate a scalar mode mask in this case. */
47557 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
47558 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
47559 mask = gen_reg_rtx (mode);
47560 emit_insn (gen_rtx_SET (mask, tmp));
47561 }
47562 }
47563 else
47564 mask = gen_rtx_NOT (mode, mask);
47565 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
47566 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
47567 }
47568
47569 /* Expand fabs (OP0) and return a new rtx that holds the result. The
47570 mask for masking out the sign-bit is stored in *SMASK, if that is
47571 non-null. */
47572 static rtx
47573 ix86_expand_sse_fabs (rtx op0, rtx *smask)
47574 {
47575 machine_mode vmode, mode = GET_MODE (op0);
47576 rtx xa, mask;
47577
47578 xa = gen_reg_rtx (mode);
47579 if (mode == SFmode)
47580 vmode = V4SFmode;
47581 else if (mode == DFmode)
47582 vmode = V2DFmode;
47583 else
47584 vmode = mode;
47585 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
47586 if (!VECTOR_MODE_P (mode))
47587 {
47588 /* We need to generate a scalar mode mask in this case. */
47589 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
47590 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
47591 mask = gen_reg_rtx (mode);
47592 emit_insn (gen_rtx_SET (mask, tmp));
47593 }
47594 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
47595
47596 if (smask)
47597 *smask = mask;
47598
47599 return xa;
47600 }
47601
47602 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
47603 swapping the operands if SWAP_OPERANDS is true. The expanded
47604 code is a forward jump to a newly created label in case the
47605 comparison is true. The generated label rtx is returned. */
47606 static rtx_code_label *
47607 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
47608 bool swap_operands)
47609 {
47610 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
47611 rtx_code_label *label;
47612 rtx tmp;
47613
47614 if (swap_operands)
47615 std::swap (op0, op1);
47616
47617 label = gen_label_rtx ();
47618 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
47619 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
47620 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
47621 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
47622 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
47623 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47624 JUMP_LABEL (tmp) = label;
47625
47626 return label;
47627 }
47628
47629 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
47630 using comparison code CODE. Operands are swapped for the comparison if
47631 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
47632 static rtx
47633 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
47634 bool swap_operands)
47635 {
47636 rtx (*insn)(rtx, rtx, rtx, rtx);
47637 machine_mode mode = GET_MODE (op0);
47638 rtx mask = gen_reg_rtx (mode);
47639
47640 if (swap_operands)
47641 std::swap (op0, op1);
47642
47643 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
47644
47645 emit_insn (insn (mask, op0, op1,
47646 gen_rtx_fmt_ee (code, mode, op0, op1)));
47647 return mask;
47648 }
47649
47650 /* Generate and return a rtx of mode MODE for 2**n where n is the number
47651 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
47652 static rtx
47653 ix86_gen_TWO52 (machine_mode mode)
47654 {
47655 REAL_VALUE_TYPE TWO52r;
47656 rtx TWO52;
47657
47658 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
47659 TWO52 = const_double_from_real_value (TWO52r, mode);
47660 TWO52 = force_reg (mode, TWO52);
47661
47662 return TWO52;
47663 }
47664
47665 /* Expand SSE sequence for computing lround from OP1 storing
47666 into OP0. */
47667 void
47668 ix86_expand_lround (rtx op0, rtx op1)
47669 {
47670 /* C code for the stuff we're doing below:
47671 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
47672 return (long)tmp;
47673 */
47674 machine_mode mode = GET_MODE (op1);
47675 const struct real_format *fmt;
47676 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
47677 rtx adj;
47678
47679 /* load nextafter (0.5, 0.0) */
47680 fmt = REAL_MODE_FORMAT (mode);
47681 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
47682 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
47683
47684 /* adj = copysign (0.5, op1) */
47685 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
47686 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
47687
47688 /* adj = op1 + adj */
47689 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
47690
47691 /* op0 = (imode)adj */
47692 expand_fix (op0, adj, 0);
47693 }
47694
47695 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
47696 into OPERAND0. */
47697 void
47698 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
47699 {
47700 /* C code for the stuff we're doing below (for do_floor):
47701 xi = (long)op1;
47702 xi -= (double)xi > op1 ? 1 : 0;
47703 return xi;
47704 */
47705 machine_mode fmode = GET_MODE (op1);
47706 machine_mode imode = GET_MODE (op0);
47707 rtx ireg, freg, tmp;
47708 rtx_code_label *label;
47709
47710 /* reg = (long)op1 */
47711 ireg = gen_reg_rtx (imode);
47712 expand_fix (ireg, op1, 0);
47713
47714 /* freg = (double)reg */
47715 freg = gen_reg_rtx (fmode);
47716 expand_float (freg, ireg, 0);
47717
47718 /* ireg = (freg > op1) ? ireg - 1 : ireg */
47719 label = ix86_expand_sse_compare_and_jump (UNLE,
47720 freg, op1, !do_floor);
47721 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
47722 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
47723 emit_move_insn (ireg, tmp);
47724
47725 emit_label (label);
47726 LABEL_NUSES (label) = 1;
47727
47728 emit_move_insn (op0, ireg);
47729 }
47730
47731 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
47732 result in OPERAND0. */
47733 void
47734 ix86_expand_rint (rtx operand0, rtx operand1)
47735 {
47736 /* C code for the stuff we're doing below:
47737 xa = fabs (operand1);
47738 if (!isless (xa, 2**52))
47739 return operand1;
47740 xa = xa + 2**52 - 2**52;
47741 return copysign (xa, operand1);
47742 */
47743 machine_mode mode = GET_MODE (operand0);
47744 rtx res, xa, TWO52, mask;
47745 rtx_code_label *label;
47746
47747 res = gen_reg_rtx (mode);
47748 emit_move_insn (res, operand1);
47749
47750 /* xa = abs (operand1) */
47751 xa = ix86_expand_sse_fabs (res, &mask);
47752
47753 /* if (!isless (xa, TWO52)) goto label; */
47754 TWO52 = ix86_gen_TWO52 (mode);
47755 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
47756
47757 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
47758 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
47759
47760 ix86_sse_copysign_to_positive (res, xa, res, mask);
47761
47762 emit_label (label);
47763 LABEL_NUSES (label) = 1;
47764
47765 emit_move_insn (operand0, res);
47766 }
47767
47768 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
47769 into OPERAND0. */
47770 void
47771 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
47772 {
47773 /* C code for the stuff we expand below.
47774 double xa = fabs (x), x2;
47775 if (!isless (xa, TWO52))
47776 return x;
47777 xa = xa + TWO52 - TWO52;
47778 x2 = copysign (xa, x);
47779 Compensate. Floor:
47780 if (x2 > x)
47781 x2 -= 1;
47782 Compensate. Ceil:
47783 if (x2 < x)
47784 x2 -= -1;
47785 return x2;
47786 */
47787 machine_mode mode = GET_MODE (operand0);
47788 rtx xa, TWO52, tmp, one, res, mask;
47789 rtx_code_label *label;
47790
47791 TWO52 = ix86_gen_TWO52 (mode);
47792
47793 /* Temporary for holding the result, initialized to the input
47794 operand to ease control flow. */
47795 res = gen_reg_rtx (mode);
47796 emit_move_insn (res, operand1);
47797
47798 /* xa = abs (operand1) */
47799 xa = ix86_expand_sse_fabs (res, &mask);
47800
47801 /* if (!isless (xa, TWO52)) goto label; */
47802 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
47803
47804 /* xa = xa + TWO52 - TWO52; */
47805 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
47806 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
47807
47808 /* xa = copysign (xa, operand1) */
47809 ix86_sse_copysign_to_positive (xa, xa, res, mask);
47810
47811 /* generate 1.0 or -1.0 */
47812 one = force_reg (mode,
47813 const_double_from_real_value (do_floor
47814 ? dconst1 : dconstm1, mode));
47815
47816 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
47817 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
47818 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
47819 /* We always need to subtract here to preserve signed zero. */
47820 tmp = expand_simple_binop (mode, MINUS,
47821 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
47822 emit_move_insn (res, tmp);
47823
47824 emit_label (label);
47825 LABEL_NUSES (label) = 1;
47826
47827 emit_move_insn (operand0, res);
47828 }
47829
47830 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
47831 into OPERAND0. */
47832 void
47833 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
47834 {
47835 /* C code for the stuff we expand below.
47836 double xa = fabs (x), x2;
47837 if (!isless (xa, TWO52))
47838 return x;
47839 x2 = (double)(long)x;
47840 Compensate. Floor:
47841 if (x2 > x)
47842 x2 -= 1;
47843 Compensate. Ceil:
47844 if (x2 < x)
47845 x2 += 1;
47846 if (HONOR_SIGNED_ZEROS (mode))
47847 return copysign (x2, x);
47848 return x2;
47849 */
47850 machine_mode mode = GET_MODE (operand0);
47851 rtx xa, xi, TWO52, tmp, one, res, mask;
47852 rtx_code_label *label;
47853
47854 TWO52 = ix86_gen_TWO52 (mode);
47855
47856 /* Temporary for holding the result, initialized to the input
47857 operand to ease control flow. */
47858 res = gen_reg_rtx (mode);
47859 emit_move_insn (res, operand1);
47860
47861 /* xa = abs (operand1) */
47862 xa = ix86_expand_sse_fabs (res, &mask);
47863
47864 /* if (!isless (xa, TWO52)) goto label; */
47865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
47866
47867 /* xa = (double)(long)x */
47868 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
47869 expand_fix (xi, res, 0);
47870 expand_float (xa, xi, 0);
47871
47872 /* generate 1.0 */
47873 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
47874
47875 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
47876 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
47877 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
47878 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
47879 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
47880 emit_move_insn (res, tmp);
47881
47882 if (HONOR_SIGNED_ZEROS (mode))
47883 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
47884
47885 emit_label (label);
47886 LABEL_NUSES (label) = 1;
47887
47888 emit_move_insn (operand0, res);
47889 }
47890
47891 /* Expand SSE sequence for computing round from OPERAND1 storing
47892 into OPERAND0. Sequence that works without relying on DImode truncation
47893 via cvttsd2siq that is only available on 64bit targets. */
47894 void
47895 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
47896 {
47897 /* C code for the stuff we expand below.
47898 double xa = fabs (x), xa2, x2;
47899 if (!isless (xa, TWO52))
47900 return x;
47901 Using the absolute value and copying back sign makes
47902 -0.0 -> -0.0 correct.
47903 xa2 = xa + TWO52 - TWO52;
47904 Compensate.
47905 dxa = xa2 - xa;
47906 if (dxa <= -0.5)
47907 xa2 += 1;
47908 else if (dxa > 0.5)
47909 xa2 -= 1;
47910 x2 = copysign (xa2, x);
47911 return x2;
47912 */
47913 machine_mode mode = GET_MODE (operand0);
47914 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
47915 rtx_code_label *label;
47916
47917 TWO52 = ix86_gen_TWO52 (mode);
47918
47919 /* Temporary for holding the result, initialized to the input
47920 operand to ease control flow. */
47921 res = gen_reg_rtx (mode);
47922 emit_move_insn (res, operand1);
47923
47924 /* xa = abs (operand1) */
47925 xa = ix86_expand_sse_fabs (res, &mask);
47926
47927 /* if (!isless (xa, TWO52)) goto label; */
47928 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
47929
47930 /* xa2 = xa + TWO52 - TWO52; */
47931 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
47932 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
47933
47934 /* dxa = xa2 - xa; */
47935 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
47936
47937 /* generate 0.5, 1.0 and -0.5 */
47938 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
47939 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
47940 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
47941 0, OPTAB_DIRECT);
47942
47943 /* Compensate. */
47944 tmp = gen_reg_rtx (mode);
47945 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
47946 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
47947 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
47948 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
47949 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
47950 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
47951 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
47952 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
47953
47954 /* res = copysign (xa2, operand1) */
47955 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
47956
47957 emit_label (label);
47958 LABEL_NUSES (label) = 1;
47959
47960 emit_move_insn (operand0, res);
47961 }
47962
47963 /* Expand SSE sequence for computing trunc from OPERAND1 storing
47964 into OPERAND0. */
47965 void
47966 ix86_expand_trunc (rtx operand0, rtx operand1)
47967 {
47968 /* C code for SSE variant we expand below.
47969 double xa = fabs (x), x2;
47970 if (!isless (xa, TWO52))
47971 return x;
47972 x2 = (double)(long)x;
47973 if (HONOR_SIGNED_ZEROS (mode))
47974 return copysign (x2, x);
47975 return x2;
47976 */
47977 machine_mode mode = GET_MODE (operand0);
47978 rtx xa, xi, TWO52, res, mask;
47979 rtx_code_label *label;
47980
47981 TWO52 = ix86_gen_TWO52 (mode);
47982
47983 /* Temporary for holding the result, initialized to the input
47984 operand to ease control flow. */
47985 res = gen_reg_rtx (mode);
47986 emit_move_insn (res, operand1);
47987
47988 /* xa = abs (operand1) */
47989 xa = ix86_expand_sse_fabs (res, &mask);
47990
47991 /* if (!isless (xa, TWO52)) goto label; */
47992 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
47993
47994 /* x = (double)(long)x */
47995 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
47996 expand_fix (xi, res, 0);
47997 expand_float (res, xi, 0);
47998
47999 if (HONOR_SIGNED_ZEROS (mode))
48000 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48001
48002 emit_label (label);
48003 LABEL_NUSES (label) = 1;
48004
48005 emit_move_insn (operand0, res);
48006 }
48007
48008 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48009 into OPERAND0. */
48010 void
48011 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48012 {
48013 machine_mode mode = GET_MODE (operand0);
48014 rtx xa, mask, TWO52, one, res, smask, tmp;
48015 rtx_code_label *label;
48016
48017 /* C code for SSE variant we expand below.
48018 double xa = fabs (x), x2;
48019 if (!isless (xa, TWO52))
48020 return x;
48021 xa2 = xa + TWO52 - TWO52;
48022 Compensate:
48023 if (xa2 > xa)
48024 xa2 -= 1.0;
48025 x2 = copysign (xa2, x);
48026 return x2;
48027 */
48028
48029 TWO52 = ix86_gen_TWO52 (mode);
48030
48031 /* Temporary for holding the result, initialized to the input
48032 operand to ease control flow. */
48033 res = gen_reg_rtx (mode);
48034 emit_move_insn (res, operand1);
48035
48036 /* xa = abs (operand1) */
48037 xa = ix86_expand_sse_fabs (res, &smask);
48038
48039 /* if (!isless (xa, TWO52)) goto label; */
48040 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48041
48042 /* res = xa + TWO52 - TWO52; */
48043 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48044 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48045 emit_move_insn (res, tmp);
48046
48047 /* generate 1.0 */
48048 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48049
48050 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48051 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48052 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48053 tmp = expand_simple_binop (mode, MINUS,
48054 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48055 emit_move_insn (res, tmp);
48056
48057 /* res = copysign (res, operand1) */
48058 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48059
48060 emit_label (label);
48061 LABEL_NUSES (label) = 1;
48062
48063 emit_move_insn (operand0, res);
48064 }
48065
48066 /* Expand SSE sequence for computing round from OPERAND1 storing
48067 into OPERAND0. */
48068 void
48069 ix86_expand_round (rtx operand0, rtx operand1)
48070 {
48071 /* C code for the stuff we're doing below:
48072 double xa = fabs (x);
48073 if (!isless (xa, TWO52))
48074 return x;
48075 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48076 return copysign (xa, x);
48077 */
48078 machine_mode mode = GET_MODE (operand0);
48079 rtx res, TWO52, xa, xi, half, mask;
48080 rtx_code_label *label;
48081 const struct real_format *fmt;
48082 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48083
48084 /* Temporary for holding the result, initialized to the input
48085 operand to ease control flow. */
48086 res = gen_reg_rtx (mode);
48087 emit_move_insn (res, operand1);
48088
48089 TWO52 = ix86_gen_TWO52 (mode);
48090 xa = ix86_expand_sse_fabs (res, &mask);
48091 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48092
48093 /* load nextafter (0.5, 0.0) */
48094 fmt = REAL_MODE_FORMAT (mode);
48095 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48096 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48097
48098 /* xa = xa + 0.5 */
48099 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48100 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48101
48102 /* xa = (double)(int64_t)xa */
48103 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48104 expand_fix (xi, xa, 0);
48105 expand_float (xa, xi, 0);
48106
48107 /* res = copysign (xa, operand1) */
48108 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48109
48110 emit_label (label);
48111 LABEL_NUSES (label) = 1;
48112
48113 emit_move_insn (operand0, res);
48114 }
48115
48116 /* Expand SSE sequence for computing round
48117 from OP1 storing into OP0 using sse4 round insn. */
48118 void
48119 ix86_expand_round_sse4 (rtx op0, rtx op1)
48120 {
48121 machine_mode mode = GET_MODE (op0);
48122 rtx e1, e2, res, half;
48123 const struct real_format *fmt;
48124 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48125 rtx (*gen_copysign) (rtx, rtx, rtx);
48126 rtx (*gen_round) (rtx, rtx, rtx);
48127
48128 switch (mode)
48129 {
48130 case SFmode:
48131 gen_copysign = gen_copysignsf3;
48132 gen_round = gen_sse4_1_roundsf2;
48133 break;
48134 case DFmode:
48135 gen_copysign = gen_copysigndf3;
48136 gen_round = gen_sse4_1_rounddf2;
48137 break;
48138 default:
48139 gcc_unreachable ();
48140 }
48141
48142 /* round (a) = trunc (a + copysign (0.5, a)) */
48143
48144 /* load nextafter (0.5, 0.0) */
48145 fmt = REAL_MODE_FORMAT (mode);
48146 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48147 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48148 half = const_double_from_real_value (pred_half, mode);
48149
48150 /* e1 = copysign (0.5, op1) */
48151 e1 = gen_reg_rtx (mode);
48152 emit_insn (gen_copysign (e1, half, op1));
48153
48154 /* e2 = op1 + e1 */
48155 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48156
48157 /* res = trunc (e2) */
48158 res = gen_reg_rtx (mode);
48159 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48160
48161 emit_move_insn (op0, res);
48162 }
48163 \f
48164
48165 /* Table of valid machine attributes. */
48166 static const struct attribute_spec ix86_attribute_table[] =
48167 {
48168 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48169 affects_type_identity } */
48170 /* Stdcall attribute says callee is responsible for popping arguments
48171 if they are not variable. */
48172 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48173 true },
48174 /* Fastcall attribute says callee is responsible for popping arguments
48175 if they are not variable. */
48176 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48177 true },
48178 /* Thiscall attribute says callee is responsible for popping arguments
48179 if they are not variable. */
48180 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48181 true },
48182 /* Cdecl attribute says the callee is a normal C declaration */
48183 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48184 true },
48185 /* Regparm attribute specifies how many integer arguments are to be
48186 passed in registers. */
48187 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48188 true },
48189 /* Sseregparm attribute says we are using x86_64 calling conventions
48190 for FP arguments. */
48191 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48192 true },
48193 /* The transactional memory builtins are implicitly regparm or fastcall
48194 depending on the ABI. Override the generic do-nothing attribute that
48195 these builtins were declared with. */
48196 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48197 true },
48198 /* force_align_arg_pointer says this function realigns the stack at entry. */
48199 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48200 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48201 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48202 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48203 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48204 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48205 false },
48206 #endif
48207 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48208 false },
48209 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48210 false },
48211 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48212 SUBTARGET_ATTRIBUTE_TABLE,
48213 #endif
48214 /* ms_abi and sysv_abi calling convention function attributes. */
48215 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48216 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48217 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48218 false },
48219 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48220 ix86_handle_callee_pop_aggregate_return, true },
48221 /* End element. */
48222 { NULL, 0, 0, false, false, false, NULL, false }
48223 };
48224
48225 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48226 static int
48227 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48228 tree vectype, int)
48229 {
48230 unsigned elements;
48231
48232 switch (type_of_cost)
48233 {
48234 case scalar_stmt:
48235 return ix86_cost->scalar_stmt_cost;
48236
48237 case scalar_load:
48238 return ix86_cost->scalar_load_cost;
48239
48240 case scalar_store:
48241 return ix86_cost->scalar_store_cost;
48242
48243 case vector_stmt:
48244 return ix86_cost->vec_stmt_cost;
48245
48246 case vector_load:
48247 return ix86_cost->vec_align_load_cost;
48248
48249 case vector_store:
48250 return ix86_cost->vec_store_cost;
48251
48252 case vec_to_scalar:
48253 return ix86_cost->vec_to_scalar_cost;
48254
48255 case scalar_to_vec:
48256 return ix86_cost->scalar_to_vec_cost;
48257
48258 case unaligned_load:
48259 case unaligned_store:
48260 return ix86_cost->vec_unalign_load_cost;
48261
48262 case cond_branch_taken:
48263 return ix86_cost->cond_taken_branch_cost;
48264
48265 case cond_branch_not_taken:
48266 return ix86_cost->cond_not_taken_branch_cost;
48267
48268 case vec_perm:
48269 case vec_promote_demote:
48270 return ix86_cost->vec_stmt_cost;
48271
48272 case vec_construct:
48273 elements = TYPE_VECTOR_SUBPARTS (vectype);
48274 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48275
48276 default:
48277 gcc_unreachable ();
48278 }
48279 }
48280
48281 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48282 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48283 insn every time. */
48284
48285 static GTY(()) rtx_insn *vselect_insn;
48286
48287 /* Initialize vselect_insn. */
48288
48289 static void
48290 init_vselect_insn (void)
48291 {
48292 unsigned i;
48293 rtx x;
48294
48295 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48296 for (i = 0; i < MAX_VECT_LEN; ++i)
48297 XVECEXP (x, 0, i) = const0_rtx;
48298 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48299 const0_rtx), x);
48300 x = gen_rtx_SET (const0_rtx, x);
48301 start_sequence ();
48302 vselect_insn = emit_insn (x);
48303 end_sequence ();
48304 }
48305
48306 /* Construct (set target (vec_select op0 (parallel perm))) and
48307 return true if that's a valid instruction in the active ISA. */
48308
48309 static bool
48310 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48311 unsigned nelt, bool testing_p)
48312 {
48313 unsigned int i;
48314 rtx x, save_vconcat;
48315 int icode;
48316
48317 if (vselect_insn == NULL_RTX)
48318 init_vselect_insn ();
48319
48320 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48321 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48322 for (i = 0; i < nelt; ++i)
48323 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48324 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48325 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48326 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48327 SET_DEST (PATTERN (vselect_insn)) = target;
48328 icode = recog_memoized (vselect_insn);
48329
48330 if (icode >= 0 && !testing_p)
48331 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48332
48333 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48334 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48335 INSN_CODE (vselect_insn) = -1;
48336
48337 return icode >= 0;
48338 }
48339
48340 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48341
48342 static bool
48343 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48344 const unsigned char *perm, unsigned nelt,
48345 bool testing_p)
48346 {
48347 machine_mode v2mode;
48348 rtx x;
48349 bool ok;
48350
48351 if (vselect_insn == NULL_RTX)
48352 init_vselect_insn ();
48353
48354 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48355 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48356 PUT_MODE (x, v2mode);
48357 XEXP (x, 0) = op0;
48358 XEXP (x, 1) = op1;
48359 ok = expand_vselect (target, x, perm, nelt, testing_p);
48360 XEXP (x, 0) = const0_rtx;
48361 XEXP (x, 1) = const0_rtx;
48362 return ok;
48363 }
48364
48365 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48366 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48367
48368 static bool
48369 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48370 {
48371 machine_mode mmode, vmode = d->vmode;
48372 unsigned i, mask, nelt = d->nelt;
48373 rtx target, op0, op1, maskop, x;
48374 rtx rperm[32], vperm;
48375
48376 if (d->one_operand_p)
48377 return false;
48378 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48379 && (TARGET_AVX512BW
48380 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48381 ;
48382 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48383 ;
48384 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48385 ;
48386 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48387 ;
48388 else
48389 return false;
48390
48391 /* This is a blend, not a permute. Elements must stay in their
48392 respective lanes. */
48393 for (i = 0; i < nelt; ++i)
48394 {
48395 unsigned e = d->perm[i];
48396 if (!(e == i || e == i + nelt))
48397 return false;
48398 }
48399
48400 if (d->testing_p)
48401 return true;
48402
48403 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48404 decision should be extracted elsewhere, so that we only try that
48405 sequence once all budget==3 options have been tried. */
48406 target = d->target;
48407 op0 = d->op0;
48408 op1 = d->op1;
48409 mask = 0;
48410
48411 switch (vmode)
48412 {
48413 case V8DFmode:
48414 case V16SFmode:
48415 case V4DFmode:
48416 case V8SFmode:
48417 case V2DFmode:
48418 case V4SFmode:
48419 case V8HImode:
48420 case V8SImode:
48421 case V32HImode:
48422 case V64QImode:
48423 case V16SImode:
48424 case V8DImode:
48425 for (i = 0; i < nelt; ++i)
48426 mask |= (d->perm[i] >= nelt) << i;
48427 break;
48428
48429 case V2DImode:
48430 for (i = 0; i < 2; ++i)
48431 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48432 vmode = V8HImode;
48433 goto do_subreg;
48434
48435 case V4SImode:
48436 for (i = 0; i < 4; ++i)
48437 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48438 vmode = V8HImode;
48439 goto do_subreg;
48440
48441 case V16QImode:
48442 /* See if bytes move in pairs so we can use pblendw with
48443 an immediate argument, rather than pblendvb with a vector
48444 argument. */
48445 for (i = 0; i < 16; i += 2)
48446 if (d->perm[i] + 1 != d->perm[i + 1])
48447 {
48448 use_pblendvb:
48449 for (i = 0; i < nelt; ++i)
48450 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48451
48452 finish_pblendvb:
48453 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48454 vperm = force_reg (vmode, vperm);
48455
48456 if (GET_MODE_SIZE (vmode) == 16)
48457 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48458 else
48459 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48460 if (target != d->target)
48461 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48462 return true;
48463 }
48464
48465 for (i = 0; i < 8; ++i)
48466 mask |= (d->perm[i * 2] >= 16) << i;
48467 vmode = V8HImode;
48468 /* FALLTHRU */
48469
48470 do_subreg:
48471 target = gen_reg_rtx (vmode);
48472 op0 = gen_lowpart (vmode, op0);
48473 op1 = gen_lowpart (vmode, op1);
48474 break;
48475
48476 case V32QImode:
48477 /* See if bytes move in pairs. If not, vpblendvb must be used. */
48478 for (i = 0; i < 32; i += 2)
48479 if (d->perm[i] + 1 != d->perm[i + 1])
48480 goto use_pblendvb;
48481 /* See if bytes move in quadruplets. If yes, vpblendd
48482 with immediate can be used. */
48483 for (i = 0; i < 32; i += 4)
48484 if (d->perm[i] + 2 != d->perm[i + 2])
48485 break;
48486 if (i < 32)
48487 {
48488 /* See if bytes move the same in both lanes. If yes,
48489 vpblendw with immediate can be used. */
48490 for (i = 0; i < 16; i += 2)
48491 if (d->perm[i] + 16 != d->perm[i + 16])
48492 goto use_pblendvb;
48493
48494 /* Use vpblendw. */
48495 for (i = 0; i < 16; ++i)
48496 mask |= (d->perm[i * 2] >= 32) << i;
48497 vmode = V16HImode;
48498 goto do_subreg;
48499 }
48500
48501 /* Use vpblendd. */
48502 for (i = 0; i < 8; ++i)
48503 mask |= (d->perm[i * 4] >= 32) << i;
48504 vmode = V8SImode;
48505 goto do_subreg;
48506
48507 case V16HImode:
48508 /* See if words move in pairs. If yes, vpblendd can be used. */
48509 for (i = 0; i < 16; i += 2)
48510 if (d->perm[i] + 1 != d->perm[i + 1])
48511 break;
48512 if (i < 16)
48513 {
48514 /* See if words move the same in both lanes. If not,
48515 vpblendvb must be used. */
48516 for (i = 0; i < 8; i++)
48517 if (d->perm[i] + 8 != d->perm[i + 8])
48518 {
48519 /* Use vpblendvb. */
48520 for (i = 0; i < 32; ++i)
48521 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
48522
48523 vmode = V32QImode;
48524 nelt = 32;
48525 target = gen_reg_rtx (vmode);
48526 op0 = gen_lowpart (vmode, op0);
48527 op1 = gen_lowpart (vmode, op1);
48528 goto finish_pblendvb;
48529 }
48530
48531 /* Use vpblendw. */
48532 for (i = 0; i < 16; ++i)
48533 mask |= (d->perm[i] >= 16) << i;
48534 break;
48535 }
48536
48537 /* Use vpblendd. */
48538 for (i = 0; i < 8; ++i)
48539 mask |= (d->perm[i * 2] >= 16) << i;
48540 vmode = V8SImode;
48541 goto do_subreg;
48542
48543 case V4DImode:
48544 /* Use vpblendd. */
48545 for (i = 0; i < 4; ++i)
48546 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48547 vmode = V8SImode;
48548 goto do_subreg;
48549
48550 default:
48551 gcc_unreachable ();
48552 }
48553
48554 switch (vmode)
48555 {
48556 case V8DFmode:
48557 case V8DImode:
48558 mmode = QImode;
48559 break;
48560 case V16SFmode:
48561 case V16SImode:
48562 mmode = HImode;
48563 break;
48564 case V32HImode:
48565 mmode = SImode;
48566 break;
48567 case V64QImode:
48568 mmode = DImode;
48569 break;
48570 default:
48571 mmode = VOIDmode;
48572 }
48573
48574 if (mmode != VOIDmode)
48575 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
48576 else
48577 maskop = GEN_INT (mask);
48578
48579 /* This matches five different patterns with the different modes. */
48580 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
48581 x = gen_rtx_SET (target, x);
48582 emit_insn (x);
48583 if (target != d->target)
48584 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48585
48586 return true;
48587 }
48588
48589 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48590 in terms of the variable form of vpermilps.
48591
48592 Note that we will have already failed the immediate input vpermilps,
48593 which requires that the high and low part shuffle be identical; the
48594 variable form doesn't require that. */
48595
48596 static bool
48597 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
48598 {
48599 rtx rperm[8], vperm;
48600 unsigned i;
48601
48602 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
48603 return false;
48604
48605 /* We can only permute within the 128-bit lane. */
48606 for (i = 0; i < 8; ++i)
48607 {
48608 unsigned e = d->perm[i];
48609 if (i < 4 ? e >= 4 : e < 4)
48610 return false;
48611 }
48612
48613 if (d->testing_p)
48614 return true;
48615
48616 for (i = 0; i < 8; ++i)
48617 {
48618 unsigned e = d->perm[i];
48619
48620 /* Within each 128-bit lane, the elements of op0 are numbered
48621 from 0 and the elements of op1 are numbered from 4. */
48622 if (e >= 8 + 4)
48623 e -= 8;
48624 else if (e >= 4)
48625 e -= 4;
48626
48627 rperm[i] = GEN_INT (e);
48628 }
48629
48630 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
48631 vperm = force_reg (V8SImode, vperm);
48632 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
48633
48634 return true;
48635 }
48636
48637 /* Return true if permutation D can be performed as VMODE permutation
48638 instead. */
48639
48640 static bool
48641 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
48642 {
48643 unsigned int i, j, chunk;
48644
48645 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
48646 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
48647 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
48648 return false;
48649
48650 if (GET_MODE_NUNITS (vmode) >= d->nelt)
48651 return true;
48652
48653 chunk = d->nelt / GET_MODE_NUNITS (vmode);
48654 for (i = 0; i < d->nelt; i += chunk)
48655 if (d->perm[i] & (chunk - 1))
48656 return false;
48657 else
48658 for (j = 1; j < chunk; ++j)
48659 if (d->perm[i] + j != d->perm[i + j])
48660 return false;
48661
48662 return true;
48663 }
48664
48665 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48666 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
48667
48668 static bool
48669 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
48670 {
48671 unsigned i, nelt, eltsz, mask;
48672 unsigned char perm[64];
48673 machine_mode vmode = V16QImode;
48674 rtx rperm[64], vperm, target, op0, op1;
48675
48676 nelt = d->nelt;
48677
48678 if (!d->one_operand_p)
48679 {
48680 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
48681 {
48682 if (TARGET_AVX2
48683 && valid_perm_using_mode_p (V2TImode, d))
48684 {
48685 if (d->testing_p)
48686 return true;
48687
48688 /* Use vperm2i128 insn. The pattern uses
48689 V4DImode instead of V2TImode. */
48690 target = d->target;
48691 if (d->vmode != V4DImode)
48692 target = gen_reg_rtx (V4DImode);
48693 op0 = gen_lowpart (V4DImode, d->op0);
48694 op1 = gen_lowpart (V4DImode, d->op1);
48695 rperm[0]
48696 = GEN_INT ((d->perm[0] / (nelt / 2))
48697 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
48698 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
48699 if (target != d->target)
48700 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48701 return true;
48702 }
48703 return false;
48704 }
48705 }
48706 else
48707 {
48708 if (GET_MODE_SIZE (d->vmode) == 16)
48709 {
48710 if (!TARGET_SSSE3)
48711 return false;
48712 }
48713 else if (GET_MODE_SIZE (d->vmode) == 32)
48714 {
48715 if (!TARGET_AVX2)
48716 return false;
48717
48718 /* V4DImode should be already handled through
48719 expand_vselect by vpermq instruction. */
48720 gcc_assert (d->vmode != V4DImode);
48721
48722 vmode = V32QImode;
48723 if (d->vmode == V8SImode
48724 || d->vmode == V16HImode
48725 || d->vmode == V32QImode)
48726 {
48727 /* First see if vpermq can be used for
48728 V8SImode/V16HImode/V32QImode. */
48729 if (valid_perm_using_mode_p (V4DImode, d))
48730 {
48731 for (i = 0; i < 4; i++)
48732 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
48733 if (d->testing_p)
48734 return true;
48735 target = gen_reg_rtx (V4DImode);
48736 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
48737 perm, 4, false))
48738 {
48739 emit_move_insn (d->target,
48740 gen_lowpart (d->vmode, target));
48741 return true;
48742 }
48743 return false;
48744 }
48745
48746 /* Next see if vpermd can be used. */
48747 if (valid_perm_using_mode_p (V8SImode, d))
48748 vmode = V8SImode;
48749 }
48750 /* Or if vpermps can be used. */
48751 else if (d->vmode == V8SFmode)
48752 vmode = V8SImode;
48753
48754 if (vmode == V32QImode)
48755 {
48756 /* vpshufb only works intra lanes, it is not
48757 possible to shuffle bytes in between the lanes. */
48758 for (i = 0; i < nelt; ++i)
48759 if ((d->perm[i] ^ i) & (nelt / 2))
48760 return false;
48761 }
48762 }
48763 else if (GET_MODE_SIZE (d->vmode) == 64)
48764 {
48765 if (!TARGET_AVX512BW)
48766 return false;
48767
48768 /* If vpermq didn't work, vpshufb won't work either. */
48769 if (d->vmode == V8DFmode || d->vmode == V8DImode)
48770 return false;
48771
48772 vmode = V64QImode;
48773 if (d->vmode == V16SImode
48774 || d->vmode == V32HImode
48775 || d->vmode == V64QImode)
48776 {
48777 /* First see if vpermq can be used for
48778 V16SImode/V32HImode/V64QImode. */
48779 if (valid_perm_using_mode_p (V8DImode, d))
48780 {
48781 for (i = 0; i < 8; i++)
48782 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
48783 if (d->testing_p)
48784 return true;
48785 target = gen_reg_rtx (V8DImode);
48786 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
48787 perm, 8, false))
48788 {
48789 emit_move_insn (d->target,
48790 gen_lowpart (d->vmode, target));
48791 return true;
48792 }
48793 return false;
48794 }
48795
48796 /* Next see if vpermd can be used. */
48797 if (valid_perm_using_mode_p (V16SImode, d))
48798 vmode = V16SImode;
48799 }
48800 /* Or if vpermps can be used. */
48801 else if (d->vmode == V16SFmode)
48802 vmode = V16SImode;
48803 if (vmode == V64QImode)
48804 {
48805 /* vpshufb only works intra lanes, it is not
48806 possible to shuffle bytes in between the lanes. */
48807 for (i = 0; i < nelt; ++i)
48808 if ((d->perm[i] ^ i) & (nelt / 4))
48809 return false;
48810 }
48811 }
48812 else
48813 return false;
48814 }
48815
48816 if (d->testing_p)
48817 return true;
48818
48819 if (vmode == V8SImode)
48820 for (i = 0; i < 8; ++i)
48821 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
48822 else if (vmode == V16SImode)
48823 for (i = 0; i < 16; ++i)
48824 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
48825 else
48826 {
48827 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48828 if (!d->one_operand_p)
48829 mask = 2 * nelt - 1;
48830 else if (vmode == V16QImode)
48831 mask = nelt - 1;
48832 else if (vmode == V64QImode)
48833 mask = nelt / 4 - 1;
48834 else
48835 mask = nelt / 2 - 1;
48836
48837 for (i = 0; i < nelt; ++i)
48838 {
48839 unsigned j, e = d->perm[i] & mask;
48840 for (j = 0; j < eltsz; ++j)
48841 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
48842 }
48843 }
48844
48845 vperm = gen_rtx_CONST_VECTOR (vmode,
48846 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
48847 vperm = force_reg (vmode, vperm);
48848
48849 target = d->target;
48850 if (d->vmode != vmode)
48851 target = gen_reg_rtx (vmode);
48852 op0 = gen_lowpart (vmode, d->op0);
48853 if (d->one_operand_p)
48854 {
48855 if (vmode == V16QImode)
48856 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
48857 else if (vmode == V32QImode)
48858 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
48859 else if (vmode == V64QImode)
48860 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
48861 else if (vmode == V8SFmode)
48862 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
48863 else if (vmode == V8SImode)
48864 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
48865 else if (vmode == V16SFmode)
48866 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
48867 else if (vmode == V16SImode)
48868 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
48869 else
48870 gcc_unreachable ();
48871 }
48872 else
48873 {
48874 op1 = gen_lowpart (vmode, d->op1);
48875 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
48876 }
48877 if (target != d->target)
48878 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48879
48880 return true;
48881 }
48882
48883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
48884 in a single instruction. */
48885
48886 static bool
48887 expand_vec_perm_1 (struct expand_vec_perm_d *d)
48888 {
48889 unsigned i, nelt = d->nelt;
48890 unsigned char perm2[MAX_VECT_LEN];
48891
48892 /* Check plain VEC_SELECT first, because AVX has instructions that could
48893 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
48894 input where SEL+CONCAT may not. */
48895 if (d->one_operand_p)
48896 {
48897 int mask = nelt - 1;
48898 bool identity_perm = true;
48899 bool broadcast_perm = true;
48900
48901 for (i = 0; i < nelt; i++)
48902 {
48903 perm2[i] = d->perm[i] & mask;
48904 if (perm2[i] != i)
48905 identity_perm = false;
48906 if (perm2[i])
48907 broadcast_perm = false;
48908 }
48909
48910 if (identity_perm)
48911 {
48912 if (!d->testing_p)
48913 emit_move_insn (d->target, d->op0);
48914 return true;
48915 }
48916 else if (broadcast_perm && TARGET_AVX2)
48917 {
48918 /* Use vpbroadcast{b,w,d}. */
48919 rtx (*gen) (rtx, rtx) = NULL;
48920 switch (d->vmode)
48921 {
48922 case V64QImode:
48923 if (TARGET_AVX512BW)
48924 gen = gen_avx512bw_vec_dupv64qi_1;
48925 break;
48926 case V32QImode:
48927 gen = gen_avx2_pbroadcastv32qi_1;
48928 break;
48929 case V32HImode:
48930 if (TARGET_AVX512BW)
48931 gen = gen_avx512bw_vec_dupv32hi_1;
48932 break;
48933 case V16HImode:
48934 gen = gen_avx2_pbroadcastv16hi_1;
48935 break;
48936 case V16SImode:
48937 if (TARGET_AVX512F)
48938 gen = gen_avx512f_vec_dupv16si_1;
48939 break;
48940 case V8SImode:
48941 gen = gen_avx2_pbroadcastv8si_1;
48942 break;
48943 case V16QImode:
48944 gen = gen_avx2_pbroadcastv16qi;
48945 break;
48946 case V8HImode:
48947 gen = gen_avx2_pbroadcastv8hi;
48948 break;
48949 case V16SFmode:
48950 if (TARGET_AVX512F)
48951 gen = gen_avx512f_vec_dupv16sf_1;
48952 break;
48953 case V8SFmode:
48954 gen = gen_avx2_vec_dupv8sf_1;
48955 break;
48956 case V8DFmode:
48957 if (TARGET_AVX512F)
48958 gen = gen_avx512f_vec_dupv8df_1;
48959 break;
48960 case V8DImode:
48961 if (TARGET_AVX512F)
48962 gen = gen_avx512f_vec_dupv8di_1;
48963 break;
48964 /* For other modes prefer other shuffles this function creates. */
48965 default: break;
48966 }
48967 if (gen != NULL)
48968 {
48969 if (!d->testing_p)
48970 emit_insn (gen (d->target, d->op0));
48971 return true;
48972 }
48973 }
48974
48975 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
48976 return true;
48977
48978 /* There are plenty of patterns in sse.md that are written for
48979 SEL+CONCAT and are not replicated for a single op. Perhaps
48980 that should be changed, to avoid the nastiness here. */
48981
48982 /* Recognize interleave style patterns, which means incrementing
48983 every other permutation operand. */
48984 for (i = 0; i < nelt; i += 2)
48985 {
48986 perm2[i] = d->perm[i] & mask;
48987 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
48988 }
48989 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
48990 d->testing_p))
48991 return true;
48992
48993 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
48994 if (nelt >= 4)
48995 {
48996 for (i = 0; i < nelt; i += 4)
48997 {
48998 perm2[i + 0] = d->perm[i + 0] & mask;
48999 perm2[i + 1] = d->perm[i + 1] & mask;
49000 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
49001 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
49002 }
49003
49004 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
49005 d->testing_p))
49006 return true;
49007 }
49008 }
49009
49010 /* Finally, try the fully general two operand permute. */
49011 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49012 d->testing_p))
49013 return true;
49014
49015 /* Recognize interleave style patterns with reversed operands. */
49016 if (!d->one_operand_p)
49017 {
49018 for (i = 0; i < nelt; ++i)
49019 {
49020 unsigned e = d->perm[i];
49021 if (e >= nelt)
49022 e -= nelt;
49023 else
49024 e += nelt;
49025 perm2[i] = e;
49026 }
49027
49028 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
49029 d->testing_p))
49030 return true;
49031 }
49032
49033 /* Try the SSE4.1 blend variable merge instructions. */
49034 if (expand_vec_perm_blend (d))
49035 return true;
49036
49037 /* Try one of the AVX vpermil variable permutations. */
49038 if (expand_vec_perm_vpermil (d))
49039 return true;
49040
49041 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49042 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49043 if (expand_vec_perm_pshufb (d))
49044 return true;
49045
49046 /* Try the AVX2 vpalignr instruction. */
49047 if (expand_vec_perm_palignr (d, true))
49048 return true;
49049
49050 /* Try the AVX512F vpermi2 instructions. */
49051 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49052 return true;
49053
49054 return false;
49055 }
49056
49057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49058 in terms of a pair of pshuflw + pshufhw instructions. */
49059
49060 static bool
49061 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49062 {
49063 unsigned char perm2[MAX_VECT_LEN];
49064 unsigned i;
49065 bool ok;
49066
49067 if (d->vmode != V8HImode || !d->one_operand_p)
49068 return false;
49069
49070 /* The two permutations only operate in 64-bit lanes. */
49071 for (i = 0; i < 4; ++i)
49072 if (d->perm[i] >= 4)
49073 return false;
49074 for (i = 4; i < 8; ++i)
49075 if (d->perm[i] < 4)
49076 return false;
49077
49078 if (d->testing_p)
49079 return true;
49080
49081 /* Emit the pshuflw. */
49082 memcpy (perm2, d->perm, 4);
49083 for (i = 4; i < 8; ++i)
49084 perm2[i] = i;
49085 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49086 gcc_assert (ok);
49087
49088 /* Emit the pshufhw. */
49089 memcpy (perm2 + 4, d->perm + 4, 4);
49090 for (i = 0; i < 4; ++i)
49091 perm2[i] = i;
49092 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49093 gcc_assert (ok);
49094
49095 return true;
49096 }
49097
49098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49099 the permutation using the SSSE3 palignr instruction. This succeeds
49100 when all of the elements in PERM fit within one vector and we merely
49101 need to shift them down so that a single vector permutation has a
49102 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49103 the vpalignr instruction itself can perform the requested permutation. */
49104
49105 static bool
49106 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49107 {
49108 unsigned i, nelt = d->nelt;
49109 unsigned min, max, minswap, maxswap;
49110 bool in_order, ok, swap = false;
49111 rtx shift, target;
49112 struct expand_vec_perm_d dcopy;
49113
49114 /* Even with AVX, palignr only operates on 128-bit vectors,
49115 in AVX2 palignr operates on both 128-bit lanes. */
49116 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49117 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49118 return false;
49119
49120 min = 2 * nelt;
49121 max = 0;
49122 minswap = 2 * nelt;
49123 maxswap = 0;
49124 for (i = 0; i < nelt; ++i)
49125 {
49126 unsigned e = d->perm[i];
49127 unsigned eswap = d->perm[i] ^ nelt;
49128 if (GET_MODE_SIZE (d->vmode) == 32)
49129 {
49130 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49131 eswap = e ^ (nelt / 2);
49132 }
49133 if (e < min)
49134 min = e;
49135 if (e > max)
49136 max = e;
49137 if (eswap < minswap)
49138 minswap = eswap;
49139 if (eswap > maxswap)
49140 maxswap = eswap;
49141 }
49142 if (min == 0
49143 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49144 {
49145 if (d->one_operand_p
49146 || minswap == 0
49147 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49148 ? nelt / 2 : nelt))
49149 return false;
49150 swap = true;
49151 min = minswap;
49152 max = maxswap;
49153 }
49154
49155 /* Given that we have SSSE3, we know we'll be able to implement the
49156 single operand permutation after the palignr with pshufb for
49157 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49158 first. */
49159 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49160 return true;
49161
49162 dcopy = *d;
49163 if (swap)
49164 {
49165 dcopy.op0 = d->op1;
49166 dcopy.op1 = d->op0;
49167 for (i = 0; i < nelt; ++i)
49168 dcopy.perm[i] ^= nelt;
49169 }
49170
49171 in_order = true;
49172 for (i = 0; i < nelt; ++i)
49173 {
49174 unsigned e = dcopy.perm[i];
49175 if (GET_MODE_SIZE (d->vmode) == 32
49176 && e >= nelt
49177 && (e & (nelt / 2 - 1)) < min)
49178 e = e - min - (nelt / 2);
49179 else
49180 e = e - min;
49181 if (e != i)
49182 in_order = false;
49183 dcopy.perm[i] = e;
49184 }
49185 dcopy.one_operand_p = true;
49186
49187 if (single_insn_only_p && !in_order)
49188 return false;
49189
49190 /* For AVX2, test whether we can permute the result in one instruction. */
49191 if (d->testing_p)
49192 {
49193 if (in_order)
49194 return true;
49195 dcopy.op1 = dcopy.op0;
49196 return expand_vec_perm_1 (&dcopy);
49197 }
49198
49199 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49200 if (GET_MODE_SIZE (d->vmode) == 16)
49201 {
49202 target = gen_reg_rtx (TImode);
49203 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49204 gen_lowpart (TImode, dcopy.op0), shift));
49205 }
49206 else
49207 {
49208 target = gen_reg_rtx (V2TImode);
49209 emit_insn (gen_avx2_palignrv2ti (target,
49210 gen_lowpart (V2TImode, dcopy.op1),
49211 gen_lowpart (V2TImode, dcopy.op0),
49212 shift));
49213 }
49214
49215 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49216
49217 /* Test for the degenerate case where the alignment by itself
49218 produces the desired permutation. */
49219 if (in_order)
49220 {
49221 emit_move_insn (d->target, dcopy.op0);
49222 return true;
49223 }
49224
49225 ok = expand_vec_perm_1 (&dcopy);
49226 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49227
49228 return ok;
49229 }
49230
49231 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49232 the permutation using the SSE4_1 pblendv instruction. Potentially
49233 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49234
49235 static bool
49236 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49237 {
49238 unsigned i, which, nelt = d->nelt;
49239 struct expand_vec_perm_d dcopy, dcopy1;
49240 machine_mode vmode = d->vmode;
49241 bool ok;
49242
49243 /* Use the same checks as in expand_vec_perm_blend. */
49244 if (d->one_operand_p)
49245 return false;
49246 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49247 ;
49248 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49249 ;
49250 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49251 ;
49252 else
49253 return false;
49254
49255 /* Figure out where permutation elements stay not in their
49256 respective lanes. */
49257 for (i = 0, which = 0; i < nelt; ++i)
49258 {
49259 unsigned e = d->perm[i];
49260 if (e != i)
49261 which |= (e < nelt ? 1 : 2);
49262 }
49263 /* We can pblend the part where elements stay not in their
49264 respective lanes only when these elements are all in one
49265 half of a permutation.
49266 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49267 lanes, but both 8 and 9 >= 8
49268 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49269 respective lanes and 8 >= 8, but 2 not. */
49270 if (which != 1 && which != 2)
49271 return false;
49272 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49273 return true;
49274
49275 /* First we apply one operand permutation to the part where
49276 elements stay not in their respective lanes. */
49277 dcopy = *d;
49278 if (which == 2)
49279 dcopy.op0 = dcopy.op1 = d->op1;
49280 else
49281 dcopy.op0 = dcopy.op1 = d->op0;
49282 if (!d->testing_p)
49283 dcopy.target = gen_reg_rtx (vmode);
49284 dcopy.one_operand_p = true;
49285
49286 for (i = 0; i < nelt; ++i)
49287 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49288
49289 ok = expand_vec_perm_1 (&dcopy);
49290 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49291 return false;
49292 else
49293 gcc_assert (ok);
49294 if (d->testing_p)
49295 return true;
49296
49297 /* Next we put permuted elements into their positions. */
49298 dcopy1 = *d;
49299 if (which == 2)
49300 dcopy1.op1 = dcopy.target;
49301 else
49302 dcopy1.op0 = dcopy.target;
49303
49304 for (i = 0; i < nelt; ++i)
49305 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49306
49307 ok = expand_vec_perm_blend (&dcopy1);
49308 gcc_assert (ok);
49309
49310 return true;
49311 }
49312
49313 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49314
49315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49316 a two vector permutation into a single vector permutation by using
49317 an interleave operation to merge the vectors. */
49318
49319 static bool
49320 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49321 {
49322 struct expand_vec_perm_d dremap, dfinal;
49323 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49324 unsigned HOST_WIDE_INT contents;
49325 unsigned char remap[2 * MAX_VECT_LEN];
49326 rtx_insn *seq;
49327 bool ok, same_halves = false;
49328
49329 if (GET_MODE_SIZE (d->vmode) == 16)
49330 {
49331 if (d->one_operand_p)
49332 return false;
49333 }
49334 else if (GET_MODE_SIZE (d->vmode) == 32)
49335 {
49336 if (!TARGET_AVX)
49337 return false;
49338 /* For 32-byte modes allow even d->one_operand_p.
49339 The lack of cross-lane shuffling in some instructions
49340 might prevent a single insn shuffle. */
49341 dfinal = *d;
49342 dfinal.testing_p = true;
49343 /* If expand_vec_perm_interleave3 can expand this into
49344 a 3 insn sequence, give up and let it be expanded as
49345 3 insn sequence. While that is one insn longer,
49346 it doesn't need a memory operand and in the common
49347 case that both interleave low and high permutations
49348 with the same operands are adjacent needs 4 insns
49349 for both after CSE. */
49350 if (expand_vec_perm_interleave3 (&dfinal))
49351 return false;
49352 }
49353 else
49354 return false;
49355
49356 /* Examine from whence the elements come. */
49357 contents = 0;
49358 for (i = 0; i < nelt; ++i)
49359 contents |= HOST_WIDE_INT_1U << d->perm[i];
49360
49361 memset (remap, 0xff, sizeof (remap));
49362 dremap = *d;
49363
49364 if (GET_MODE_SIZE (d->vmode) == 16)
49365 {
49366 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49367
49368 /* Split the two input vectors into 4 halves. */
49369 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49370 h2 = h1 << nelt2;
49371 h3 = h2 << nelt2;
49372 h4 = h3 << nelt2;
49373
49374 /* If the elements from the low halves use interleave low, and similarly
49375 for interleave high. If the elements are from mis-matched halves, we
49376 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49377 if ((contents & (h1 | h3)) == contents)
49378 {
49379 /* punpckl* */
49380 for (i = 0; i < nelt2; ++i)
49381 {
49382 remap[i] = i * 2;
49383 remap[i + nelt] = i * 2 + 1;
49384 dremap.perm[i * 2] = i;
49385 dremap.perm[i * 2 + 1] = i + nelt;
49386 }
49387 if (!TARGET_SSE2 && d->vmode == V4SImode)
49388 dremap.vmode = V4SFmode;
49389 }
49390 else if ((contents & (h2 | h4)) == contents)
49391 {
49392 /* punpckh* */
49393 for (i = 0; i < nelt2; ++i)
49394 {
49395 remap[i + nelt2] = i * 2;
49396 remap[i + nelt + nelt2] = i * 2 + 1;
49397 dremap.perm[i * 2] = i + nelt2;
49398 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49399 }
49400 if (!TARGET_SSE2 && d->vmode == V4SImode)
49401 dremap.vmode = V4SFmode;
49402 }
49403 else if ((contents & (h1 | h4)) == contents)
49404 {
49405 /* shufps */
49406 for (i = 0; i < nelt2; ++i)
49407 {
49408 remap[i] = i;
49409 remap[i + nelt + nelt2] = i + nelt2;
49410 dremap.perm[i] = i;
49411 dremap.perm[i + nelt2] = i + nelt + nelt2;
49412 }
49413 if (nelt != 4)
49414 {
49415 /* shufpd */
49416 dremap.vmode = V2DImode;
49417 dremap.nelt = 2;
49418 dremap.perm[0] = 0;
49419 dremap.perm[1] = 3;
49420 }
49421 }
49422 else if ((contents & (h2 | h3)) == contents)
49423 {
49424 /* shufps */
49425 for (i = 0; i < nelt2; ++i)
49426 {
49427 remap[i + nelt2] = i;
49428 remap[i + nelt] = i + nelt2;
49429 dremap.perm[i] = i + nelt2;
49430 dremap.perm[i + nelt2] = i + nelt;
49431 }
49432 if (nelt != 4)
49433 {
49434 /* shufpd */
49435 dremap.vmode = V2DImode;
49436 dremap.nelt = 2;
49437 dremap.perm[0] = 1;
49438 dremap.perm[1] = 2;
49439 }
49440 }
49441 else
49442 return false;
49443 }
49444 else
49445 {
49446 unsigned int nelt4 = nelt / 4, nzcnt = 0;
49447 unsigned HOST_WIDE_INT q[8];
49448 unsigned int nonzero_halves[4];
49449
49450 /* Split the two input vectors into 8 quarters. */
49451 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
49452 for (i = 1; i < 8; ++i)
49453 q[i] = q[0] << (nelt4 * i);
49454 for (i = 0; i < 4; ++i)
49455 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
49456 {
49457 nonzero_halves[nzcnt] = i;
49458 ++nzcnt;
49459 }
49460
49461 if (nzcnt == 1)
49462 {
49463 gcc_assert (d->one_operand_p);
49464 nonzero_halves[1] = nonzero_halves[0];
49465 same_halves = true;
49466 }
49467 else if (d->one_operand_p)
49468 {
49469 gcc_assert (nonzero_halves[0] == 0);
49470 gcc_assert (nonzero_halves[1] == 1);
49471 }
49472
49473 if (nzcnt <= 2)
49474 {
49475 if (d->perm[0] / nelt2 == nonzero_halves[1])
49476 {
49477 /* Attempt to increase the likelihood that dfinal
49478 shuffle will be intra-lane. */
49479 std::swap (nonzero_halves[0], nonzero_halves[1]);
49480 }
49481
49482 /* vperm2f128 or vperm2i128. */
49483 for (i = 0; i < nelt2; ++i)
49484 {
49485 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
49486 remap[i + nonzero_halves[0] * nelt2] = i;
49487 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
49488 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
49489 }
49490
49491 if (d->vmode != V8SFmode
49492 && d->vmode != V4DFmode
49493 && d->vmode != V8SImode)
49494 {
49495 dremap.vmode = V8SImode;
49496 dremap.nelt = 8;
49497 for (i = 0; i < 4; ++i)
49498 {
49499 dremap.perm[i] = i + nonzero_halves[0] * 4;
49500 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
49501 }
49502 }
49503 }
49504 else if (d->one_operand_p)
49505 return false;
49506 else if (TARGET_AVX2
49507 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
49508 {
49509 /* vpunpckl* */
49510 for (i = 0; i < nelt4; ++i)
49511 {
49512 remap[i] = i * 2;
49513 remap[i + nelt] = i * 2 + 1;
49514 remap[i + nelt2] = i * 2 + nelt2;
49515 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
49516 dremap.perm[i * 2] = i;
49517 dremap.perm[i * 2 + 1] = i + nelt;
49518 dremap.perm[i * 2 + nelt2] = i + nelt2;
49519 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
49520 }
49521 }
49522 else if (TARGET_AVX2
49523 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
49524 {
49525 /* vpunpckh* */
49526 for (i = 0; i < nelt4; ++i)
49527 {
49528 remap[i + nelt4] = i * 2;
49529 remap[i + nelt + nelt4] = i * 2 + 1;
49530 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
49531 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
49532 dremap.perm[i * 2] = i + nelt4;
49533 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
49534 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
49535 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
49536 }
49537 }
49538 else
49539 return false;
49540 }
49541
49542 /* Use the remapping array set up above to move the elements from their
49543 swizzled locations into their final destinations. */
49544 dfinal = *d;
49545 for (i = 0; i < nelt; ++i)
49546 {
49547 unsigned e = remap[d->perm[i]];
49548 gcc_assert (e < nelt);
49549 /* If same_halves is true, both halves of the remapped vector are the
49550 same. Avoid cross-lane accesses if possible. */
49551 if (same_halves && i >= nelt2)
49552 {
49553 gcc_assert (e < nelt2);
49554 dfinal.perm[i] = e + nelt2;
49555 }
49556 else
49557 dfinal.perm[i] = e;
49558 }
49559 if (!d->testing_p)
49560 {
49561 dremap.target = gen_reg_rtx (dremap.vmode);
49562 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
49563 }
49564 dfinal.op1 = dfinal.op0;
49565 dfinal.one_operand_p = true;
49566
49567 /* Test if the final remap can be done with a single insn. For V4SFmode or
49568 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
49569 start_sequence ();
49570 ok = expand_vec_perm_1 (&dfinal);
49571 seq = get_insns ();
49572 end_sequence ();
49573
49574 if (!ok)
49575 return false;
49576
49577 if (d->testing_p)
49578 return true;
49579
49580 if (dremap.vmode != dfinal.vmode)
49581 {
49582 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
49583 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
49584 }
49585
49586 ok = expand_vec_perm_1 (&dremap);
49587 gcc_assert (ok);
49588
49589 emit_insn (seq);
49590 return true;
49591 }
49592
49593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49594 a single vector cross-lane permutation into vpermq followed
49595 by any of the single insn permutations. */
49596
49597 static bool
49598 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
49599 {
49600 struct expand_vec_perm_d dremap, dfinal;
49601 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
49602 unsigned contents[2];
49603 bool ok;
49604
49605 if (!(TARGET_AVX2
49606 && (d->vmode == V32QImode || d->vmode == V16HImode)
49607 && d->one_operand_p))
49608 return false;
49609
49610 contents[0] = 0;
49611 contents[1] = 0;
49612 for (i = 0; i < nelt2; ++i)
49613 {
49614 contents[0] |= 1u << (d->perm[i] / nelt4);
49615 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
49616 }
49617
49618 for (i = 0; i < 2; ++i)
49619 {
49620 unsigned int cnt = 0;
49621 for (j = 0; j < 4; ++j)
49622 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
49623 return false;
49624 }
49625
49626 if (d->testing_p)
49627 return true;
49628
49629 dremap = *d;
49630 dremap.vmode = V4DImode;
49631 dremap.nelt = 4;
49632 dremap.target = gen_reg_rtx (V4DImode);
49633 dremap.op0 = gen_lowpart (V4DImode, d->op0);
49634 dremap.op1 = dremap.op0;
49635 dremap.one_operand_p = true;
49636 for (i = 0; i < 2; ++i)
49637 {
49638 unsigned int cnt = 0;
49639 for (j = 0; j < 4; ++j)
49640 if ((contents[i] & (1u << j)) != 0)
49641 dremap.perm[2 * i + cnt++] = j;
49642 for (; cnt < 2; ++cnt)
49643 dremap.perm[2 * i + cnt] = 0;
49644 }
49645
49646 dfinal = *d;
49647 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
49648 dfinal.op1 = dfinal.op0;
49649 dfinal.one_operand_p = true;
49650 for (i = 0, j = 0; i < nelt; ++i)
49651 {
49652 if (i == nelt2)
49653 j = 2;
49654 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
49655 if ((d->perm[i] / nelt4) == dremap.perm[j])
49656 ;
49657 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
49658 dfinal.perm[i] |= nelt4;
49659 else
49660 gcc_unreachable ();
49661 }
49662
49663 ok = expand_vec_perm_1 (&dremap);
49664 gcc_assert (ok);
49665
49666 ok = expand_vec_perm_1 (&dfinal);
49667 gcc_assert (ok);
49668
49669 return true;
49670 }
49671
49672 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
49673 a vector permutation using two instructions, vperm2f128 resp.
49674 vperm2i128 followed by any single in-lane permutation. */
49675
49676 static bool
49677 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
49678 {
49679 struct expand_vec_perm_d dfirst, dsecond;
49680 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
49681 bool ok;
49682
49683 if (!TARGET_AVX
49684 || GET_MODE_SIZE (d->vmode) != 32
49685 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
49686 return false;
49687
49688 dsecond = *d;
49689 dsecond.one_operand_p = false;
49690 dsecond.testing_p = true;
49691
49692 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
49693 immediate. For perm < 16 the second permutation uses
49694 d->op0 as first operand, for perm >= 16 it uses d->op1
49695 as first operand. The second operand is the result of
49696 vperm2[fi]128. */
49697 for (perm = 0; perm < 32; perm++)
49698 {
49699 /* Ignore permutations which do not move anything cross-lane. */
49700 if (perm < 16)
49701 {
49702 /* The second shuffle for e.g. V4DFmode has
49703 0123 and ABCD operands.
49704 Ignore AB23, as 23 is already in the second lane
49705 of the first operand. */
49706 if ((perm & 0xc) == (1 << 2)) continue;
49707 /* And 01CD, as 01 is in the first lane of the first
49708 operand. */
49709 if ((perm & 3) == 0) continue;
49710 /* And 4567, as then the vperm2[fi]128 doesn't change
49711 anything on the original 4567 second operand. */
49712 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
49713 }
49714 else
49715 {
49716 /* The second shuffle for e.g. V4DFmode has
49717 4567 and ABCD operands.
49718 Ignore AB67, as 67 is already in the second lane
49719 of the first operand. */
49720 if ((perm & 0xc) == (3 << 2)) continue;
49721 /* And 45CD, as 45 is in the first lane of the first
49722 operand. */
49723 if ((perm & 3) == 2) continue;
49724 /* And 0123, as then the vperm2[fi]128 doesn't change
49725 anything on the original 0123 first operand. */
49726 if ((perm & 0xf) == (1 << 2)) continue;
49727 }
49728
49729 for (i = 0; i < nelt; i++)
49730 {
49731 j = d->perm[i] / nelt2;
49732 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
49733 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
49734 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
49735 dsecond.perm[i] = d->perm[i] & (nelt - 1);
49736 else
49737 break;
49738 }
49739
49740 if (i == nelt)
49741 {
49742 start_sequence ();
49743 ok = expand_vec_perm_1 (&dsecond);
49744 end_sequence ();
49745 }
49746 else
49747 ok = false;
49748
49749 if (ok)
49750 {
49751 if (d->testing_p)
49752 return true;
49753
49754 /* Found a usable second shuffle. dfirst will be
49755 vperm2f128 on d->op0 and d->op1. */
49756 dsecond.testing_p = false;
49757 dfirst = *d;
49758 dfirst.target = gen_reg_rtx (d->vmode);
49759 for (i = 0; i < nelt; i++)
49760 dfirst.perm[i] = (i & (nelt2 - 1))
49761 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
49762
49763 canonicalize_perm (&dfirst);
49764 ok = expand_vec_perm_1 (&dfirst);
49765 gcc_assert (ok);
49766
49767 /* And dsecond is some single insn shuffle, taking
49768 d->op0 and result of vperm2f128 (if perm < 16) or
49769 d->op1 and result of vperm2f128 (otherwise). */
49770 if (perm >= 16)
49771 dsecond.op0 = dsecond.op1;
49772 dsecond.op1 = dfirst.target;
49773
49774 ok = expand_vec_perm_1 (&dsecond);
49775 gcc_assert (ok);
49776
49777 return true;
49778 }
49779
49780 /* For one operand, the only useful vperm2f128 permutation is 0x01
49781 aka lanes swap. */
49782 if (d->one_operand_p)
49783 return false;
49784 }
49785
49786 return false;
49787 }
49788
49789 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49790 a two vector permutation using 2 intra-lane interleave insns
49791 and cross-lane shuffle for 32-byte vectors. */
49792
49793 static bool
49794 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
49795 {
49796 unsigned i, nelt;
49797 rtx (*gen) (rtx, rtx, rtx);
49798
49799 if (d->one_operand_p)
49800 return false;
49801 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
49802 ;
49803 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
49804 ;
49805 else
49806 return false;
49807
49808 nelt = d->nelt;
49809 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
49810 return false;
49811 for (i = 0; i < nelt; i += 2)
49812 if (d->perm[i] != d->perm[0] + i / 2
49813 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
49814 return false;
49815
49816 if (d->testing_p)
49817 return true;
49818
49819 switch (d->vmode)
49820 {
49821 case V32QImode:
49822 if (d->perm[0])
49823 gen = gen_vec_interleave_highv32qi;
49824 else
49825 gen = gen_vec_interleave_lowv32qi;
49826 break;
49827 case V16HImode:
49828 if (d->perm[0])
49829 gen = gen_vec_interleave_highv16hi;
49830 else
49831 gen = gen_vec_interleave_lowv16hi;
49832 break;
49833 case V8SImode:
49834 if (d->perm[0])
49835 gen = gen_vec_interleave_highv8si;
49836 else
49837 gen = gen_vec_interleave_lowv8si;
49838 break;
49839 case V4DImode:
49840 if (d->perm[0])
49841 gen = gen_vec_interleave_highv4di;
49842 else
49843 gen = gen_vec_interleave_lowv4di;
49844 break;
49845 case V8SFmode:
49846 if (d->perm[0])
49847 gen = gen_vec_interleave_highv8sf;
49848 else
49849 gen = gen_vec_interleave_lowv8sf;
49850 break;
49851 case V4DFmode:
49852 if (d->perm[0])
49853 gen = gen_vec_interleave_highv4df;
49854 else
49855 gen = gen_vec_interleave_lowv4df;
49856 break;
49857 default:
49858 gcc_unreachable ();
49859 }
49860
49861 emit_insn (gen (d->target, d->op0, d->op1));
49862 return true;
49863 }
49864
49865 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
49866 a single vector permutation using a single intra-lane vector
49867 permutation, vperm2f128 swapping the lanes and vblend* insn blending
49868 the non-swapped and swapped vectors together. */
49869
49870 static bool
49871 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
49872 {
49873 struct expand_vec_perm_d dfirst, dsecond;
49874 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
49875 rtx_insn *seq;
49876 bool ok;
49877 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
49878
49879 if (!TARGET_AVX
49880 || TARGET_AVX2
49881 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
49882 || !d->one_operand_p)
49883 return false;
49884
49885 dfirst = *d;
49886 for (i = 0; i < nelt; i++)
49887 dfirst.perm[i] = 0xff;
49888 for (i = 0, msk = 0; i < nelt; i++)
49889 {
49890 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
49891 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
49892 return false;
49893 dfirst.perm[j] = d->perm[i];
49894 if (j != i)
49895 msk |= (1 << i);
49896 }
49897 for (i = 0; i < nelt; i++)
49898 if (dfirst.perm[i] == 0xff)
49899 dfirst.perm[i] = i;
49900
49901 if (!d->testing_p)
49902 dfirst.target = gen_reg_rtx (dfirst.vmode);
49903
49904 start_sequence ();
49905 ok = expand_vec_perm_1 (&dfirst);
49906 seq = get_insns ();
49907 end_sequence ();
49908
49909 if (!ok)
49910 return false;
49911
49912 if (d->testing_p)
49913 return true;
49914
49915 emit_insn (seq);
49916
49917 dsecond = *d;
49918 dsecond.op0 = dfirst.target;
49919 dsecond.op1 = dfirst.target;
49920 dsecond.one_operand_p = true;
49921 dsecond.target = gen_reg_rtx (dsecond.vmode);
49922 for (i = 0; i < nelt; i++)
49923 dsecond.perm[i] = i ^ nelt2;
49924
49925 ok = expand_vec_perm_1 (&dsecond);
49926 gcc_assert (ok);
49927
49928 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
49929 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
49930 return true;
49931 }
49932
49933 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
49934 permutation using two vperm2f128, followed by a vshufpd insn blending
49935 the two vectors together. */
49936
49937 static bool
49938 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
49939 {
49940 struct expand_vec_perm_d dfirst, dsecond, dthird;
49941 bool ok;
49942
49943 if (!TARGET_AVX || (d->vmode != V4DFmode))
49944 return false;
49945
49946 if (d->testing_p)
49947 return true;
49948
49949 dfirst = *d;
49950 dsecond = *d;
49951 dthird = *d;
49952
49953 dfirst.perm[0] = (d->perm[0] & ~1);
49954 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
49955 dfirst.perm[2] = (d->perm[2] & ~1);
49956 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
49957 dsecond.perm[0] = (d->perm[1] & ~1);
49958 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
49959 dsecond.perm[2] = (d->perm[3] & ~1);
49960 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
49961 dthird.perm[0] = (d->perm[0] % 2);
49962 dthird.perm[1] = (d->perm[1] % 2) + 4;
49963 dthird.perm[2] = (d->perm[2] % 2) + 2;
49964 dthird.perm[3] = (d->perm[3] % 2) + 6;
49965
49966 dfirst.target = gen_reg_rtx (dfirst.vmode);
49967 dsecond.target = gen_reg_rtx (dsecond.vmode);
49968 dthird.op0 = dfirst.target;
49969 dthird.op1 = dsecond.target;
49970 dthird.one_operand_p = false;
49971
49972 canonicalize_perm (&dfirst);
49973 canonicalize_perm (&dsecond);
49974
49975 ok = expand_vec_perm_1 (&dfirst)
49976 && expand_vec_perm_1 (&dsecond)
49977 && expand_vec_perm_1 (&dthird);
49978
49979 gcc_assert (ok);
49980
49981 return true;
49982 }
49983
49984 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
49985 permutation with two pshufb insns and an ior. We should have already
49986 failed all two instruction sequences. */
49987
49988 static bool
49989 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
49990 {
49991 rtx rperm[2][16], vperm, l, h, op, m128;
49992 unsigned int i, nelt, eltsz;
49993
49994 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49995 return false;
49996 gcc_assert (!d->one_operand_p);
49997
49998 if (d->testing_p)
49999 return true;
50000
50001 nelt = d->nelt;
50002 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50003
50004 /* Generate two permutation masks. If the required element is within
50005 the given vector it is shuffled into the proper lane. If the required
50006 element is in the other vector, force a zero into the lane by setting
50007 bit 7 in the permutation mask. */
50008 m128 = GEN_INT (-128);
50009 for (i = 0; i < nelt; ++i)
50010 {
50011 unsigned j, e = d->perm[i];
50012 unsigned which = (e >= nelt);
50013 if (e >= nelt)
50014 e -= nelt;
50015
50016 for (j = 0; j < eltsz; ++j)
50017 {
50018 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50019 rperm[1-which][i*eltsz + j] = m128;
50020 }
50021 }
50022
50023 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50024 vperm = force_reg (V16QImode, vperm);
50025
50026 l = gen_reg_rtx (V16QImode);
50027 op = gen_lowpart (V16QImode, d->op0);
50028 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50029
50030 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50031 vperm = force_reg (V16QImode, vperm);
50032
50033 h = gen_reg_rtx (V16QImode);
50034 op = gen_lowpart (V16QImode, d->op1);
50035 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50036
50037 op = d->target;
50038 if (d->vmode != V16QImode)
50039 op = gen_reg_rtx (V16QImode);
50040 emit_insn (gen_iorv16qi3 (op, l, h));
50041 if (op != d->target)
50042 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50043
50044 return true;
50045 }
50046
50047 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50048 with two vpshufb insns, vpermq and vpor. We should have already failed
50049 all two or three instruction sequences. */
50050
50051 static bool
50052 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50053 {
50054 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50055 unsigned int i, nelt, eltsz;
50056
50057 if (!TARGET_AVX2
50058 || !d->one_operand_p
50059 || (d->vmode != V32QImode && d->vmode != V16HImode))
50060 return false;
50061
50062 if (d->testing_p)
50063 return true;
50064
50065 nelt = d->nelt;
50066 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50067
50068 /* Generate two permutation masks. If the required element is within
50069 the same lane, it is shuffled in. If the required element from the
50070 other lane, force a zero by setting bit 7 in the permutation mask.
50071 In the other mask the mask has non-negative elements if element
50072 is requested from the other lane, but also moved to the other lane,
50073 so that the result of vpshufb can have the two V2TImode halves
50074 swapped. */
50075 m128 = GEN_INT (-128);
50076 for (i = 0; i < nelt; ++i)
50077 {
50078 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50079 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50080
50081 for (j = 0; j < eltsz; ++j)
50082 {
50083 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50084 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50085 }
50086 }
50087
50088 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50089 vperm = force_reg (V32QImode, vperm);
50090
50091 h = gen_reg_rtx (V32QImode);
50092 op = gen_lowpart (V32QImode, d->op0);
50093 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50094
50095 /* Swap the 128-byte lanes of h into hp. */
50096 hp = gen_reg_rtx (V4DImode);
50097 op = gen_lowpart (V4DImode, h);
50098 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50099 const1_rtx));
50100
50101 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50102 vperm = force_reg (V32QImode, vperm);
50103
50104 l = gen_reg_rtx (V32QImode);
50105 op = gen_lowpart (V32QImode, d->op0);
50106 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50107
50108 op = d->target;
50109 if (d->vmode != V32QImode)
50110 op = gen_reg_rtx (V32QImode);
50111 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50112 if (op != d->target)
50113 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50114
50115 return true;
50116 }
50117
50118 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50119 and extract-odd permutations of two V32QImode and V16QImode operand
50120 with two vpshufb insns, vpor and vpermq. We should have already
50121 failed all two or three instruction sequences. */
50122
50123 static bool
50124 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50125 {
50126 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50127 unsigned int i, nelt, eltsz;
50128
50129 if (!TARGET_AVX2
50130 || d->one_operand_p
50131 || (d->vmode != V32QImode && d->vmode != V16HImode))
50132 return false;
50133
50134 for (i = 0; i < d->nelt; ++i)
50135 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50136 return false;
50137
50138 if (d->testing_p)
50139 return true;
50140
50141 nelt = d->nelt;
50142 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50143
50144 /* Generate two permutation masks. In the first permutation mask
50145 the first quarter will contain indexes for the first half
50146 of the op0, the second quarter will contain bit 7 set, third quarter
50147 will contain indexes for the second half of the op0 and the
50148 last quarter bit 7 set. In the second permutation mask
50149 the first quarter will contain bit 7 set, the second quarter
50150 indexes for the first half of the op1, the third quarter bit 7 set
50151 and last quarter indexes for the second half of the op1.
50152 I.e. the first mask e.g. for V32QImode extract even will be:
50153 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50154 (all values masked with 0xf except for -128) and second mask
50155 for extract even will be
50156 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50157 m128 = GEN_INT (-128);
50158 for (i = 0; i < nelt; ++i)
50159 {
50160 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50161 unsigned which = d->perm[i] >= nelt;
50162 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50163
50164 for (j = 0; j < eltsz; ++j)
50165 {
50166 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50167 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50168 }
50169 }
50170
50171 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50172 vperm = force_reg (V32QImode, vperm);
50173
50174 l = gen_reg_rtx (V32QImode);
50175 op = gen_lowpart (V32QImode, d->op0);
50176 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50177
50178 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50179 vperm = force_reg (V32QImode, vperm);
50180
50181 h = gen_reg_rtx (V32QImode);
50182 op = gen_lowpart (V32QImode, d->op1);
50183 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50184
50185 ior = gen_reg_rtx (V32QImode);
50186 emit_insn (gen_iorv32qi3 (ior, l, h));
50187
50188 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50189 op = gen_reg_rtx (V4DImode);
50190 ior = gen_lowpart (V4DImode, ior);
50191 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50192 const1_rtx, GEN_INT (3)));
50193 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50194
50195 return true;
50196 }
50197
50198 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50199 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50200 with two "and" and "pack" or two "shift" and "pack" insns. We should
50201 have already failed all two instruction sequences. */
50202
50203 static bool
50204 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50205 {
50206 rtx op, dop0, dop1, t, rperm[16];
50207 unsigned i, odd, c, s, nelt = d->nelt;
50208 bool end_perm = false;
50209 machine_mode half_mode;
50210 rtx (*gen_and) (rtx, rtx, rtx);
50211 rtx (*gen_pack) (rtx, rtx, rtx);
50212 rtx (*gen_shift) (rtx, rtx, rtx);
50213
50214 if (d->one_operand_p)
50215 return false;
50216
50217 switch (d->vmode)
50218 {
50219 case V8HImode:
50220 /* Required for "pack". */
50221 if (!TARGET_SSE4_1)
50222 return false;
50223 c = 0xffff;
50224 s = 16;
50225 half_mode = V4SImode;
50226 gen_and = gen_andv4si3;
50227 gen_pack = gen_sse4_1_packusdw;
50228 gen_shift = gen_lshrv4si3;
50229 break;
50230 case V16QImode:
50231 /* No check as all instructions are SSE2. */
50232 c = 0xff;
50233 s = 8;
50234 half_mode = V8HImode;
50235 gen_and = gen_andv8hi3;
50236 gen_pack = gen_sse2_packuswb;
50237 gen_shift = gen_lshrv8hi3;
50238 break;
50239 case V16HImode:
50240 if (!TARGET_AVX2)
50241 return false;
50242 c = 0xffff;
50243 s = 16;
50244 half_mode = V8SImode;
50245 gen_and = gen_andv8si3;
50246 gen_pack = gen_avx2_packusdw;
50247 gen_shift = gen_lshrv8si3;
50248 end_perm = true;
50249 break;
50250 case V32QImode:
50251 if (!TARGET_AVX2)
50252 return false;
50253 c = 0xff;
50254 s = 8;
50255 half_mode = V16HImode;
50256 gen_and = gen_andv16hi3;
50257 gen_pack = gen_avx2_packuswb;
50258 gen_shift = gen_lshrv16hi3;
50259 end_perm = true;
50260 break;
50261 default:
50262 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50263 general shuffles. */
50264 return false;
50265 }
50266
50267 /* Check that permutation is even or odd. */
50268 odd = d->perm[0];
50269 if (odd > 1)
50270 return false;
50271
50272 for (i = 1; i < nelt; ++i)
50273 if (d->perm[i] != 2 * i + odd)
50274 return false;
50275
50276 if (d->testing_p)
50277 return true;
50278
50279 dop0 = gen_reg_rtx (half_mode);
50280 dop1 = gen_reg_rtx (half_mode);
50281 if (odd == 0)
50282 {
50283 for (i = 0; i < nelt / 2; i++)
50284 rperm[i] = GEN_INT (c);
50285 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50286 t = force_reg (half_mode, t);
50287 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50288 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50289 }
50290 else
50291 {
50292 emit_insn (gen_shift (dop0,
50293 gen_lowpart (half_mode, d->op0),
50294 GEN_INT (s)));
50295 emit_insn (gen_shift (dop1,
50296 gen_lowpart (half_mode, d->op1),
50297 GEN_INT (s)));
50298 }
50299 /* In AVX2 for 256 bit case we need to permute pack result. */
50300 if (TARGET_AVX2 && end_perm)
50301 {
50302 op = gen_reg_rtx (d->vmode);
50303 t = gen_reg_rtx (V4DImode);
50304 emit_insn (gen_pack (op, dop0, dop1));
50305 emit_insn (gen_avx2_permv4di_1 (t,
50306 gen_lowpart (V4DImode, op),
50307 const0_rtx,
50308 const2_rtx,
50309 const1_rtx,
50310 GEN_INT (3)));
50311 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50312 }
50313 else
50314 emit_insn (gen_pack (d->target, dop0, dop1));
50315
50316 return true;
50317 }
50318
50319 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50320 and extract-odd permutations of two V64QI operands
50321 with two "shifts", two "truncs" and one "concat" insns for "odd"
50322 and two "truncs" and one concat insn for "even."
50323 Have already failed all two instruction sequences. */
50324
50325 static bool
50326 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50327 {
50328 rtx t1, t2, t3, t4;
50329 unsigned i, odd, nelt = d->nelt;
50330
50331 if (!TARGET_AVX512BW
50332 || d->one_operand_p
50333 || d->vmode != V64QImode)
50334 return false;
50335
50336 /* Check that permutation is even or odd. */
50337 odd = d->perm[0];
50338 if (odd > 1)
50339 return false;
50340
50341 for (i = 1; i < nelt; ++i)
50342 if (d->perm[i] != 2 * i + odd)
50343 return false;
50344
50345 if (d->testing_p)
50346 return true;
50347
50348
50349 if (odd)
50350 {
50351 t1 = gen_reg_rtx (V32HImode);
50352 t2 = gen_reg_rtx (V32HImode);
50353 emit_insn (gen_lshrv32hi3 (t1,
50354 gen_lowpart (V32HImode, d->op0),
50355 GEN_INT (8)));
50356 emit_insn (gen_lshrv32hi3 (t2,
50357 gen_lowpart (V32HImode, d->op1),
50358 GEN_INT (8)));
50359 }
50360 else
50361 {
50362 t1 = gen_lowpart (V32HImode, d->op0);
50363 t2 = gen_lowpart (V32HImode, d->op1);
50364 }
50365
50366 t3 = gen_reg_rtx (V32QImode);
50367 t4 = gen_reg_rtx (V32QImode);
50368 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50369 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50370 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50371
50372 return true;
50373 }
50374
50375 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50376 and extract-odd permutations. */
50377
50378 static bool
50379 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50380 {
50381 rtx t1, t2, t3, t4, t5;
50382
50383 switch (d->vmode)
50384 {
50385 case V4DFmode:
50386 if (d->testing_p)
50387 break;
50388 t1 = gen_reg_rtx (V4DFmode);
50389 t2 = gen_reg_rtx (V4DFmode);
50390
50391 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50392 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50393 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50394
50395 /* Now an unpck[lh]pd will produce the result required. */
50396 if (odd)
50397 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50398 else
50399 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50400 emit_insn (t3);
50401 break;
50402
50403 case V8SFmode:
50404 {
50405 int mask = odd ? 0xdd : 0x88;
50406
50407 if (d->testing_p)
50408 break;
50409 t1 = gen_reg_rtx (V8SFmode);
50410 t2 = gen_reg_rtx (V8SFmode);
50411 t3 = gen_reg_rtx (V8SFmode);
50412
50413 /* Shuffle within the 128-bit lanes to produce:
50414 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50415 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
50416 GEN_INT (mask)));
50417
50418 /* Shuffle the lanes around to produce:
50419 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
50420 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
50421 GEN_INT (0x3)));
50422
50423 /* Shuffle within the 128-bit lanes to produce:
50424 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
50425 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
50426
50427 /* Shuffle within the 128-bit lanes to produce:
50428 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
50429 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
50430
50431 /* Shuffle the lanes around to produce:
50432 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
50433 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
50434 GEN_INT (0x20)));
50435 }
50436 break;
50437
50438 case V2DFmode:
50439 case V4SFmode:
50440 case V2DImode:
50441 case V4SImode:
50442 /* These are always directly implementable by expand_vec_perm_1. */
50443 gcc_unreachable ();
50444
50445 case V8HImode:
50446 if (TARGET_SSE4_1)
50447 return expand_vec_perm_even_odd_pack (d);
50448 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
50449 return expand_vec_perm_pshufb2 (d);
50450 else
50451 {
50452 if (d->testing_p)
50453 break;
50454 /* We need 2*log2(N)-1 operations to achieve odd/even
50455 with interleave. */
50456 t1 = gen_reg_rtx (V8HImode);
50457 t2 = gen_reg_rtx (V8HImode);
50458 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
50459 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
50460 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
50461 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
50462 if (odd)
50463 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
50464 else
50465 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
50466 emit_insn (t3);
50467 }
50468 break;
50469
50470 case V16QImode:
50471 return expand_vec_perm_even_odd_pack (d);
50472
50473 case V16HImode:
50474 case V32QImode:
50475 return expand_vec_perm_even_odd_pack (d);
50476
50477 case V64QImode:
50478 return expand_vec_perm_even_odd_trunc (d);
50479
50480 case V4DImode:
50481 if (!TARGET_AVX2)
50482 {
50483 struct expand_vec_perm_d d_copy = *d;
50484 d_copy.vmode = V4DFmode;
50485 if (d->testing_p)
50486 d_copy.target = gen_lowpart (V4DFmode, d->target);
50487 else
50488 d_copy.target = gen_reg_rtx (V4DFmode);
50489 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
50490 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
50491 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
50492 {
50493 if (!d->testing_p)
50494 emit_move_insn (d->target,
50495 gen_lowpart (V4DImode, d_copy.target));
50496 return true;
50497 }
50498 return false;
50499 }
50500
50501 if (d->testing_p)
50502 break;
50503
50504 t1 = gen_reg_rtx (V4DImode);
50505 t2 = gen_reg_rtx (V4DImode);
50506
50507 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50508 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
50509 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
50510
50511 /* Now an vpunpck[lh]qdq will produce the result required. */
50512 if (odd)
50513 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
50514 else
50515 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
50516 emit_insn (t3);
50517 break;
50518
50519 case V8SImode:
50520 if (!TARGET_AVX2)
50521 {
50522 struct expand_vec_perm_d d_copy = *d;
50523 d_copy.vmode = V8SFmode;
50524 if (d->testing_p)
50525 d_copy.target = gen_lowpart (V8SFmode, d->target);
50526 else
50527 d_copy.target = gen_reg_rtx (V8SFmode);
50528 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
50529 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
50530 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
50531 {
50532 if (!d->testing_p)
50533 emit_move_insn (d->target,
50534 gen_lowpart (V8SImode, d_copy.target));
50535 return true;
50536 }
50537 return false;
50538 }
50539
50540 if (d->testing_p)
50541 break;
50542
50543 t1 = gen_reg_rtx (V8SImode);
50544 t2 = gen_reg_rtx (V8SImode);
50545 t3 = gen_reg_rtx (V4DImode);
50546 t4 = gen_reg_rtx (V4DImode);
50547 t5 = gen_reg_rtx (V4DImode);
50548
50549 /* Shuffle the lanes around into
50550 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
50551 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
50552 gen_lowpart (V4DImode, d->op1),
50553 GEN_INT (0x20)));
50554 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
50555 gen_lowpart (V4DImode, d->op1),
50556 GEN_INT (0x31)));
50557
50558 /* Swap the 2nd and 3rd position in each lane into
50559 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
50560 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
50561 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
50562 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
50563 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
50564
50565 /* Now an vpunpck[lh]qdq will produce
50566 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
50567 if (odd)
50568 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
50569 gen_lowpart (V4DImode, t2));
50570 else
50571 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
50572 gen_lowpart (V4DImode, t2));
50573 emit_insn (t3);
50574 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
50575 break;
50576
50577 default:
50578 gcc_unreachable ();
50579 }
50580
50581 return true;
50582 }
50583
50584 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
50585 extract-even and extract-odd permutations. */
50586
50587 static bool
50588 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
50589 {
50590 unsigned i, odd, nelt = d->nelt;
50591
50592 odd = d->perm[0];
50593 if (odd != 0 && odd != 1)
50594 return false;
50595
50596 for (i = 1; i < nelt; ++i)
50597 if (d->perm[i] != 2 * i + odd)
50598 return false;
50599
50600 return expand_vec_perm_even_odd_1 (d, odd);
50601 }
50602
50603 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
50604 permutations. We assume that expand_vec_perm_1 has already failed. */
50605
50606 static bool
50607 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
50608 {
50609 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
50610 machine_mode vmode = d->vmode;
50611 unsigned char perm2[4];
50612 rtx op0 = d->op0, dest;
50613 bool ok;
50614
50615 switch (vmode)
50616 {
50617 case V4DFmode:
50618 case V8SFmode:
50619 /* These are special-cased in sse.md so that we can optionally
50620 use the vbroadcast instruction. They expand to two insns
50621 if the input happens to be in a register. */
50622 gcc_unreachable ();
50623
50624 case V2DFmode:
50625 case V2DImode:
50626 case V4SFmode:
50627 case V4SImode:
50628 /* These are always implementable using standard shuffle patterns. */
50629 gcc_unreachable ();
50630
50631 case V8HImode:
50632 case V16QImode:
50633 /* These can be implemented via interleave. We save one insn by
50634 stopping once we have promoted to V4SImode and then use pshufd. */
50635 if (d->testing_p)
50636 return true;
50637 do
50638 {
50639 rtx dest;
50640 rtx (*gen) (rtx, rtx, rtx)
50641 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
50642 : gen_vec_interleave_lowv8hi;
50643
50644 if (elt >= nelt2)
50645 {
50646 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
50647 : gen_vec_interleave_highv8hi;
50648 elt -= nelt2;
50649 }
50650 nelt2 /= 2;
50651
50652 dest = gen_reg_rtx (vmode);
50653 emit_insn (gen (dest, op0, op0));
50654 vmode = get_mode_wider_vector (vmode);
50655 op0 = gen_lowpart (vmode, dest);
50656 }
50657 while (vmode != V4SImode);
50658
50659 memset (perm2, elt, 4);
50660 dest = gen_reg_rtx (V4SImode);
50661 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
50662 gcc_assert (ok);
50663 if (!d->testing_p)
50664 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
50665 return true;
50666
50667 case V64QImode:
50668 case V32QImode:
50669 case V16HImode:
50670 case V8SImode:
50671 case V4DImode:
50672 /* For AVX2 broadcasts of the first element vpbroadcast* or
50673 vpermq should be used by expand_vec_perm_1. */
50674 gcc_assert (!TARGET_AVX2 || d->perm[0]);
50675 return false;
50676
50677 default:
50678 gcc_unreachable ();
50679 }
50680 }
50681
50682 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
50683 broadcast permutations. */
50684
50685 static bool
50686 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
50687 {
50688 unsigned i, elt, nelt = d->nelt;
50689
50690 if (!d->one_operand_p)
50691 return false;
50692
50693 elt = d->perm[0];
50694 for (i = 1; i < nelt; ++i)
50695 if (d->perm[i] != elt)
50696 return false;
50697
50698 return expand_vec_perm_broadcast_1 (d);
50699 }
50700
50701 /* Implement arbitrary permutations of two V64QImode operands
50702 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
50703 static bool
50704 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
50705 {
50706 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
50707 return false;
50708
50709 if (d->testing_p)
50710 return true;
50711
50712 struct expand_vec_perm_d ds[2];
50713 rtx rperm[128], vperm, target0, target1;
50714 unsigned int i, nelt;
50715 machine_mode vmode;
50716
50717 nelt = d->nelt;
50718 vmode = V64QImode;
50719
50720 for (i = 0; i < 2; i++)
50721 {
50722 ds[i] = *d;
50723 ds[i].vmode = V32HImode;
50724 ds[i].nelt = 32;
50725 ds[i].target = gen_reg_rtx (V32HImode);
50726 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
50727 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
50728 }
50729
50730 /* Prepare permutations such that the first one takes care of
50731 putting the even bytes into the right positions or one higher
50732 positions (ds[0]) and the second one takes care of
50733 putting the odd bytes into the right positions or one below
50734 (ds[1]). */
50735
50736 for (i = 0; i < nelt; i++)
50737 {
50738 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
50739 if (i & 1)
50740 {
50741 rperm[i] = constm1_rtx;
50742 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
50743 }
50744 else
50745 {
50746 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
50747 rperm[i + 64] = constm1_rtx;
50748 }
50749 }
50750
50751 bool ok = expand_vec_perm_1 (&ds[0]);
50752 gcc_assert (ok);
50753 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
50754
50755 ok = expand_vec_perm_1 (&ds[1]);
50756 gcc_assert (ok);
50757 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
50758
50759 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
50760 vperm = force_reg (vmode, vperm);
50761 target0 = gen_reg_rtx (V64QImode);
50762 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
50763
50764 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
50765 vperm = force_reg (vmode, vperm);
50766 target1 = gen_reg_rtx (V64QImode);
50767 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
50768
50769 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
50770 return true;
50771 }
50772
50773 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
50774 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
50775 all the shorter instruction sequences. */
50776
50777 static bool
50778 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
50779 {
50780 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
50781 unsigned int i, nelt, eltsz;
50782 bool used[4];
50783
50784 if (!TARGET_AVX2
50785 || d->one_operand_p
50786 || (d->vmode != V32QImode && d->vmode != V16HImode))
50787 return false;
50788
50789 if (d->testing_p)
50790 return true;
50791
50792 nelt = d->nelt;
50793 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50794
50795 /* Generate 4 permutation masks. If the required element is within
50796 the same lane, it is shuffled in. If the required element from the
50797 other lane, force a zero by setting bit 7 in the permutation mask.
50798 In the other mask the mask has non-negative elements if element
50799 is requested from the other lane, but also moved to the other lane,
50800 so that the result of vpshufb can have the two V2TImode halves
50801 swapped. */
50802 m128 = GEN_INT (-128);
50803 for (i = 0; i < 32; ++i)
50804 {
50805 rperm[0][i] = m128;
50806 rperm[1][i] = m128;
50807 rperm[2][i] = m128;
50808 rperm[3][i] = m128;
50809 }
50810 used[0] = false;
50811 used[1] = false;
50812 used[2] = false;
50813 used[3] = false;
50814 for (i = 0; i < nelt; ++i)
50815 {
50816 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50817 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50818 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
50819
50820 for (j = 0; j < eltsz; ++j)
50821 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
50822 used[which] = true;
50823 }
50824
50825 for (i = 0; i < 2; ++i)
50826 {
50827 if (!used[2 * i + 1])
50828 {
50829 h[i] = NULL_RTX;
50830 continue;
50831 }
50832 vperm = gen_rtx_CONST_VECTOR (V32QImode,
50833 gen_rtvec_v (32, rperm[2 * i + 1]));
50834 vperm = force_reg (V32QImode, vperm);
50835 h[i] = gen_reg_rtx (V32QImode);
50836 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
50837 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
50838 }
50839
50840 /* Swap the 128-byte lanes of h[X]. */
50841 for (i = 0; i < 2; ++i)
50842 {
50843 if (h[i] == NULL_RTX)
50844 continue;
50845 op = gen_reg_rtx (V4DImode);
50846 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
50847 const2_rtx, GEN_INT (3), const0_rtx,
50848 const1_rtx));
50849 h[i] = gen_lowpart (V32QImode, op);
50850 }
50851
50852 for (i = 0; i < 2; ++i)
50853 {
50854 if (!used[2 * i])
50855 {
50856 l[i] = NULL_RTX;
50857 continue;
50858 }
50859 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
50860 vperm = force_reg (V32QImode, vperm);
50861 l[i] = gen_reg_rtx (V32QImode);
50862 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
50863 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
50864 }
50865
50866 for (i = 0; i < 2; ++i)
50867 {
50868 if (h[i] && l[i])
50869 {
50870 op = gen_reg_rtx (V32QImode);
50871 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
50872 l[i] = op;
50873 }
50874 else if (h[i])
50875 l[i] = h[i];
50876 }
50877
50878 gcc_assert (l[0] && l[1]);
50879 op = d->target;
50880 if (d->vmode != V32QImode)
50881 op = gen_reg_rtx (V32QImode);
50882 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
50883 if (op != d->target)
50884 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50885 return true;
50886 }
50887
50888 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
50889 With all of the interface bits taken care of, perform the expansion
50890 in D and return true on success. */
50891
50892 static bool
50893 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
50894 {
50895 /* Try a single instruction expansion. */
50896 if (expand_vec_perm_1 (d))
50897 return true;
50898
50899 /* Try sequences of two instructions. */
50900
50901 if (expand_vec_perm_pshuflw_pshufhw (d))
50902 return true;
50903
50904 if (expand_vec_perm_palignr (d, false))
50905 return true;
50906
50907 if (expand_vec_perm_interleave2 (d))
50908 return true;
50909
50910 if (expand_vec_perm_broadcast (d))
50911 return true;
50912
50913 if (expand_vec_perm_vpermq_perm_1 (d))
50914 return true;
50915
50916 if (expand_vec_perm_vperm2f128 (d))
50917 return true;
50918
50919 if (expand_vec_perm_pblendv (d))
50920 return true;
50921
50922 /* Try sequences of three instructions. */
50923
50924 if (expand_vec_perm_even_odd_pack (d))
50925 return true;
50926
50927 if (expand_vec_perm_2vperm2f128_vshuf (d))
50928 return true;
50929
50930 if (expand_vec_perm_pshufb2 (d))
50931 return true;
50932
50933 if (expand_vec_perm_interleave3 (d))
50934 return true;
50935
50936 if (expand_vec_perm_vperm2f128_vblend (d))
50937 return true;
50938
50939 /* Try sequences of four instructions. */
50940
50941 if (expand_vec_perm_even_odd_trunc (d))
50942 return true;
50943 if (expand_vec_perm_vpshufb2_vpermq (d))
50944 return true;
50945
50946 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
50947 return true;
50948
50949 if (expand_vec_perm_vpermi2_vpshub2 (d))
50950 return true;
50951
50952 /* ??? Look for narrow permutations whose element orderings would
50953 allow the promotion to a wider mode. */
50954
50955 /* ??? Look for sequences of interleave or a wider permute that place
50956 the data into the correct lanes for a half-vector shuffle like
50957 pshuf[lh]w or vpermilps. */
50958
50959 /* ??? Look for sequences of interleave that produce the desired results.
50960 The combinatorics of punpck[lh] get pretty ugly... */
50961
50962 if (expand_vec_perm_even_odd (d))
50963 return true;
50964
50965 /* Even longer sequences. */
50966 if (expand_vec_perm_vpshufb4_vpermq2 (d))
50967 return true;
50968
50969 return false;
50970 }
50971
50972 /* If a permutation only uses one operand, make it clear. Returns true
50973 if the permutation references both operands. */
50974
50975 static bool
50976 canonicalize_perm (struct expand_vec_perm_d *d)
50977 {
50978 int i, which, nelt = d->nelt;
50979
50980 for (i = which = 0; i < nelt; ++i)
50981 which |= (d->perm[i] < nelt ? 1 : 2);
50982
50983 d->one_operand_p = true;
50984 switch (which)
50985 {
50986 default:
50987 gcc_unreachable();
50988
50989 case 3:
50990 if (!rtx_equal_p (d->op0, d->op1))
50991 {
50992 d->one_operand_p = false;
50993 break;
50994 }
50995 /* The elements of PERM do not suggest that only the first operand
50996 is used, but both operands are identical. Allow easier matching
50997 of the permutation by folding the permutation into the single
50998 input vector. */
50999 /* FALLTHRU */
51000
51001 case 2:
51002 for (i = 0; i < nelt; ++i)
51003 d->perm[i] &= nelt - 1;
51004 d->op0 = d->op1;
51005 break;
51006
51007 case 1:
51008 d->op1 = d->op0;
51009 break;
51010 }
51011
51012 return (which == 3);
51013 }
51014
51015 bool
51016 ix86_expand_vec_perm_const (rtx operands[4])
51017 {
51018 struct expand_vec_perm_d d;
51019 unsigned char perm[MAX_VECT_LEN];
51020 int i, nelt;
51021 bool two_args;
51022 rtx sel;
51023
51024 d.target = operands[0];
51025 d.op0 = operands[1];
51026 d.op1 = operands[2];
51027 sel = operands[3];
51028
51029 d.vmode = GET_MODE (d.target);
51030 gcc_assert (VECTOR_MODE_P (d.vmode));
51031 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51032 d.testing_p = false;
51033
51034 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51035 gcc_assert (XVECLEN (sel, 0) == nelt);
51036 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51037
51038 for (i = 0; i < nelt; ++i)
51039 {
51040 rtx e = XVECEXP (sel, 0, i);
51041 int ei = INTVAL (e) & (2 * nelt - 1);
51042 d.perm[i] = ei;
51043 perm[i] = ei;
51044 }
51045
51046 two_args = canonicalize_perm (&d);
51047
51048 if (ix86_expand_vec_perm_const_1 (&d))
51049 return true;
51050
51051 /* If the selector says both arguments are needed, but the operands are the
51052 same, the above tried to expand with one_operand_p and flattened selector.
51053 If that didn't work, retry without one_operand_p; we succeeded with that
51054 during testing. */
51055 if (two_args && d.one_operand_p)
51056 {
51057 d.one_operand_p = false;
51058 memcpy (d.perm, perm, sizeof (perm));
51059 return ix86_expand_vec_perm_const_1 (&d);
51060 }
51061
51062 return false;
51063 }
51064
51065 /* Implement targetm.vectorize.vec_perm_const_ok. */
51066
51067 static bool
51068 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51069 const unsigned char *sel)
51070 {
51071 struct expand_vec_perm_d d;
51072 unsigned int i, nelt, which;
51073 bool ret;
51074
51075 d.vmode = vmode;
51076 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51077 d.testing_p = true;
51078
51079 /* Given sufficient ISA support we can just return true here
51080 for selected vector modes. */
51081 switch (d.vmode)
51082 {
51083 case V16SFmode:
51084 case V16SImode:
51085 case V8DImode:
51086 case V8DFmode:
51087 if (TARGET_AVX512F)
51088 /* All implementable with a single vpermi2 insn. */
51089 return true;
51090 break;
51091 case V32HImode:
51092 if (TARGET_AVX512BW)
51093 /* All implementable with a single vpermi2 insn. */
51094 return true;
51095 break;
51096 case V64QImode:
51097 if (TARGET_AVX512BW)
51098 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51099 return true;
51100 break;
51101 case V8SImode:
51102 case V8SFmode:
51103 case V4DFmode:
51104 case V4DImode:
51105 if (TARGET_AVX512VL)
51106 /* All implementable with a single vpermi2 insn. */
51107 return true;
51108 break;
51109 case V16HImode:
51110 if (TARGET_AVX2)
51111 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51112 return true;
51113 break;
51114 case V32QImode:
51115 if (TARGET_AVX2)
51116 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51117 return true;
51118 break;
51119 case V4SImode:
51120 case V4SFmode:
51121 case V8HImode:
51122 case V16QImode:
51123 /* All implementable with a single vpperm insn. */
51124 if (TARGET_XOP)
51125 return true;
51126 /* All implementable with 2 pshufb + 1 ior. */
51127 if (TARGET_SSSE3)
51128 return true;
51129 break;
51130 case V2DImode:
51131 case V2DFmode:
51132 /* All implementable with shufpd or unpck[lh]pd. */
51133 return true;
51134 default:
51135 return false;
51136 }
51137
51138 /* Extract the values from the vector CST into the permutation
51139 array in D. */
51140 memcpy (d.perm, sel, nelt);
51141 for (i = which = 0; i < nelt; ++i)
51142 {
51143 unsigned char e = d.perm[i];
51144 gcc_assert (e < 2 * nelt);
51145 which |= (e < nelt ? 1 : 2);
51146 }
51147
51148 /* For all elements from second vector, fold the elements to first. */
51149 if (which == 2)
51150 for (i = 0; i < nelt; ++i)
51151 d.perm[i] -= nelt;
51152
51153 /* Check whether the mask can be applied to the vector type. */
51154 d.one_operand_p = (which != 3);
51155
51156 /* Implementable with shufps or pshufd. */
51157 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51158 return true;
51159
51160 /* Otherwise we have to go through the motions and see if we can
51161 figure out how to generate the requested permutation. */
51162 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51163 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51164 if (!d.one_operand_p)
51165 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51166
51167 start_sequence ();
51168 ret = ix86_expand_vec_perm_const_1 (&d);
51169 end_sequence ();
51170
51171 return ret;
51172 }
51173
51174 void
51175 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51176 {
51177 struct expand_vec_perm_d d;
51178 unsigned i, nelt;
51179
51180 d.target = targ;
51181 d.op0 = op0;
51182 d.op1 = op1;
51183 d.vmode = GET_MODE (targ);
51184 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51185 d.one_operand_p = false;
51186 d.testing_p = false;
51187
51188 for (i = 0; i < nelt; ++i)
51189 d.perm[i] = i * 2 + odd;
51190
51191 /* We'll either be able to implement the permutation directly... */
51192 if (expand_vec_perm_1 (&d))
51193 return;
51194
51195 /* ... or we use the special-case patterns. */
51196 expand_vec_perm_even_odd_1 (&d, odd);
51197 }
51198
51199 static void
51200 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51201 {
51202 struct expand_vec_perm_d d;
51203 unsigned i, nelt, base;
51204 bool ok;
51205
51206 d.target = targ;
51207 d.op0 = op0;
51208 d.op1 = op1;
51209 d.vmode = GET_MODE (targ);
51210 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51211 d.one_operand_p = false;
51212 d.testing_p = false;
51213
51214 base = high_p ? nelt / 2 : 0;
51215 for (i = 0; i < nelt / 2; ++i)
51216 {
51217 d.perm[i * 2] = i + base;
51218 d.perm[i * 2 + 1] = i + base + nelt;
51219 }
51220
51221 /* Note that for AVX this isn't one instruction. */
51222 ok = ix86_expand_vec_perm_const_1 (&d);
51223 gcc_assert (ok);
51224 }
51225
51226
51227 /* Expand a vector operation CODE for a V*QImode in terms of the
51228 same operation on V*HImode. */
51229
51230 void
51231 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51232 {
51233 machine_mode qimode = GET_MODE (dest);
51234 machine_mode himode;
51235 rtx (*gen_il) (rtx, rtx, rtx);
51236 rtx (*gen_ih) (rtx, rtx, rtx);
51237 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51238 struct expand_vec_perm_d d;
51239 bool ok, full_interleave;
51240 bool uns_p = false;
51241 int i;
51242
51243 switch (qimode)
51244 {
51245 case V16QImode:
51246 himode = V8HImode;
51247 gen_il = gen_vec_interleave_lowv16qi;
51248 gen_ih = gen_vec_interleave_highv16qi;
51249 break;
51250 case V32QImode:
51251 himode = V16HImode;
51252 gen_il = gen_avx2_interleave_lowv32qi;
51253 gen_ih = gen_avx2_interleave_highv32qi;
51254 break;
51255 case V64QImode:
51256 himode = V32HImode;
51257 gen_il = gen_avx512bw_interleave_lowv64qi;
51258 gen_ih = gen_avx512bw_interleave_highv64qi;
51259 break;
51260 default:
51261 gcc_unreachable ();
51262 }
51263
51264 op2_l = op2_h = op2;
51265 switch (code)
51266 {
51267 case MULT:
51268 /* Unpack data such that we've got a source byte in each low byte of
51269 each word. We don't care what goes into the high byte of each word.
51270 Rather than trying to get zero in there, most convenient is to let
51271 it be a copy of the low byte. */
51272 op2_l = gen_reg_rtx (qimode);
51273 op2_h = gen_reg_rtx (qimode);
51274 emit_insn (gen_il (op2_l, op2, op2));
51275 emit_insn (gen_ih (op2_h, op2, op2));
51276 /* FALLTHRU */
51277
51278 op1_l = gen_reg_rtx (qimode);
51279 op1_h = gen_reg_rtx (qimode);
51280 emit_insn (gen_il (op1_l, op1, op1));
51281 emit_insn (gen_ih (op1_h, op1, op1));
51282 full_interleave = qimode == V16QImode;
51283 break;
51284
51285 case ASHIFT:
51286 case LSHIFTRT:
51287 uns_p = true;
51288 /* FALLTHRU */
51289 case ASHIFTRT:
51290 op1_l = gen_reg_rtx (himode);
51291 op1_h = gen_reg_rtx (himode);
51292 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51293 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51294 full_interleave = true;
51295 break;
51296 default:
51297 gcc_unreachable ();
51298 }
51299
51300 /* Perform the operation. */
51301 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51302 1, OPTAB_DIRECT);
51303 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51304 1, OPTAB_DIRECT);
51305 gcc_assert (res_l && res_h);
51306
51307 /* Merge the data back into the right place. */
51308 d.target = dest;
51309 d.op0 = gen_lowpart (qimode, res_l);
51310 d.op1 = gen_lowpart (qimode, res_h);
51311 d.vmode = qimode;
51312 d.nelt = GET_MODE_NUNITS (qimode);
51313 d.one_operand_p = false;
51314 d.testing_p = false;
51315
51316 if (full_interleave)
51317 {
51318 /* For SSE2, we used an full interleave, so the desired
51319 results are in the even elements. */
51320 for (i = 0; i < 64; ++i)
51321 d.perm[i] = i * 2;
51322 }
51323 else
51324 {
51325 /* For AVX, the interleave used above was not cross-lane. So the
51326 extraction is evens but with the second and third quarter swapped.
51327 Happily, that is even one insn shorter than even extraction. */
51328 for (i = 0; i < 64; ++i)
51329 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51330 }
51331
51332 ok = ix86_expand_vec_perm_const_1 (&d);
51333 gcc_assert (ok);
51334
51335 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51336 gen_rtx_fmt_ee (code, qimode, op1, op2));
51337 }
51338
51339 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51340 if op is CONST_VECTOR with all odd elements equal to their
51341 preceding element. */
51342
51343 static bool
51344 const_vector_equal_evenodd_p (rtx op)
51345 {
51346 machine_mode mode = GET_MODE (op);
51347 int i, nunits = GET_MODE_NUNITS (mode);
51348 if (GET_CODE (op) != CONST_VECTOR
51349 || nunits != CONST_VECTOR_NUNITS (op))
51350 return false;
51351 for (i = 0; i < nunits; i += 2)
51352 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51353 return false;
51354 return true;
51355 }
51356
51357 void
51358 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51359 bool uns_p, bool odd_p)
51360 {
51361 machine_mode mode = GET_MODE (op1);
51362 machine_mode wmode = GET_MODE (dest);
51363 rtx x;
51364 rtx orig_op1 = op1, orig_op2 = op2;
51365
51366 if (!nonimmediate_operand (op1, mode))
51367 op1 = force_reg (mode, op1);
51368 if (!nonimmediate_operand (op2, mode))
51369 op2 = force_reg (mode, op2);
51370
51371 /* We only play even/odd games with vectors of SImode. */
51372 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51373
51374 /* If we're looking for the odd results, shift those members down to
51375 the even slots. For some cpus this is faster than a PSHUFD. */
51376 if (odd_p)
51377 {
51378 /* For XOP use vpmacsdqh, but only for smult, as it is only
51379 signed. */
51380 if (TARGET_XOP && mode == V4SImode && !uns_p)
51381 {
51382 x = force_reg (wmode, CONST0_RTX (wmode));
51383 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51384 return;
51385 }
51386
51387 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51388 if (!const_vector_equal_evenodd_p (orig_op1))
51389 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51390 x, NULL, 1, OPTAB_DIRECT);
51391 if (!const_vector_equal_evenodd_p (orig_op2))
51392 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51393 x, NULL, 1, OPTAB_DIRECT);
51394 op1 = gen_lowpart (mode, op1);
51395 op2 = gen_lowpart (mode, op2);
51396 }
51397
51398 if (mode == V16SImode)
51399 {
51400 if (uns_p)
51401 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51402 else
51403 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51404 }
51405 else if (mode == V8SImode)
51406 {
51407 if (uns_p)
51408 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
51409 else
51410 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
51411 }
51412 else if (uns_p)
51413 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
51414 else if (TARGET_SSE4_1)
51415 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
51416 else
51417 {
51418 rtx s1, s2, t0, t1, t2;
51419
51420 /* The easiest way to implement this without PMULDQ is to go through
51421 the motions as if we are performing a full 64-bit multiply. With
51422 the exception that we need to do less shuffling of the elements. */
51423
51424 /* Compute the sign-extension, aka highparts, of the two operands. */
51425 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51426 op1, pc_rtx, pc_rtx);
51427 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51428 op2, pc_rtx, pc_rtx);
51429
51430 /* Multiply LO(A) * HI(B), and vice-versa. */
51431 t1 = gen_reg_rtx (wmode);
51432 t2 = gen_reg_rtx (wmode);
51433 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
51434 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
51435
51436 /* Multiply LO(A) * LO(B). */
51437 t0 = gen_reg_rtx (wmode);
51438 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
51439
51440 /* Combine and shift the highparts into place. */
51441 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
51442 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
51443 1, OPTAB_DIRECT);
51444
51445 /* Combine high and low parts. */
51446 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
51447 return;
51448 }
51449 emit_insn (x);
51450 }
51451
51452 void
51453 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
51454 bool uns_p, bool high_p)
51455 {
51456 machine_mode wmode = GET_MODE (dest);
51457 machine_mode mode = GET_MODE (op1);
51458 rtx t1, t2, t3, t4, mask;
51459
51460 switch (mode)
51461 {
51462 case V4SImode:
51463 t1 = gen_reg_rtx (mode);
51464 t2 = gen_reg_rtx (mode);
51465 if (TARGET_XOP && !uns_p)
51466 {
51467 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
51468 shuffle the elements once so that all elements are in the right
51469 place for immediate use: { A C B D }. */
51470 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
51471 const1_rtx, GEN_INT (3)));
51472 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
51473 const1_rtx, GEN_INT (3)));
51474 }
51475 else
51476 {
51477 /* Put the elements into place for the multiply. */
51478 ix86_expand_vec_interleave (t1, op1, op1, high_p);
51479 ix86_expand_vec_interleave (t2, op2, op2, high_p);
51480 high_p = false;
51481 }
51482 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
51483 break;
51484
51485 case V8SImode:
51486 /* Shuffle the elements between the lanes. After this we
51487 have { A B E F | C D G H } for each operand. */
51488 t1 = gen_reg_rtx (V4DImode);
51489 t2 = gen_reg_rtx (V4DImode);
51490 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
51491 const0_rtx, const2_rtx,
51492 const1_rtx, GEN_INT (3)));
51493 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
51494 const0_rtx, const2_rtx,
51495 const1_rtx, GEN_INT (3)));
51496
51497 /* Shuffle the elements within the lanes. After this we
51498 have { A A B B | C C D D } or { E E F F | G G H H }. */
51499 t3 = gen_reg_rtx (V8SImode);
51500 t4 = gen_reg_rtx (V8SImode);
51501 mask = GEN_INT (high_p
51502 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
51503 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
51504 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
51505 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
51506
51507 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
51508 break;
51509
51510 case V8HImode:
51511 case V16HImode:
51512 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
51513 uns_p, OPTAB_DIRECT);
51514 t2 = expand_binop (mode,
51515 uns_p ? umul_highpart_optab : smul_highpart_optab,
51516 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
51517 gcc_assert (t1 && t2);
51518
51519 t3 = gen_reg_rtx (mode);
51520 ix86_expand_vec_interleave (t3, t1, t2, high_p);
51521 emit_move_insn (dest, gen_lowpart (wmode, t3));
51522 break;
51523
51524 case V16QImode:
51525 case V32QImode:
51526 case V32HImode:
51527 case V16SImode:
51528 case V64QImode:
51529 t1 = gen_reg_rtx (wmode);
51530 t2 = gen_reg_rtx (wmode);
51531 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
51532 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
51533
51534 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
51535 break;
51536
51537 default:
51538 gcc_unreachable ();
51539 }
51540 }
51541
51542 void
51543 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
51544 {
51545 rtx res_1, res_2, res_3, res_4;
51546
51547 res_1 = gen_reg_rtx (V4SImode);
51548 res_2 = gen_reg_rtx (V4SImode);
51549 res_3 = gen_reg_rtx (V2DImode);
51550 res_4 = gen_reg_rtx (V2DImode);
51551 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
51552 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
51553
51554 /* Move the results in element 2 down to element 1; we don't care
51555 what goes in elements 2 and 3. Then we can merge the parts
51556 back together with an interleave.
51557
51558 Note that two other sequences were tried:
51559 (1) Use interleaves at the start instead of psrldq, which allows
51560 us to use a single shufps to merge things back at the end.
51561 (2) Use shufps here to combine the two vectors, then pshufd to
51562 put the elements in the correct order.
51563 In both cases the cost of the reformatting stall was too high
51564 and the overall sequence slower. */
51565
51566 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
51567 const0_rtx, const2_rtx,
51568 const0_rtx, const0_rtx));
51569 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
51570 const0_rtx, const2_rtx,
51571 const0_rtx, const0_rtx));
51572 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
51573
51574 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
51575 }
51576
51577 void
51578 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
51579 {
51580 machine_mode mode = GET_MODE (op0);
51581 rtx t1, t2, t3, t4, t5, t6;
51582
51583 if (TARGET_AVX512DQ && mode == V8DImode)
51584 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
51585 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
51586 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
51587 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
51588 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
51589 else if (TARGET_XOP && mode == V2DImode)
51590 {
51591 /* op1: A,B,C,D, op2: E,F,G,H */
51592 op1 = gen_lowpart (V4SImode, op1);
51593 op2 = gen_lowpart (V4SImode, op2);
51594
51595 t1 = gen_reg_rtx (V4SImode);
51596 t2 = gen_reg_rtx (V4SImode);
51597 t3 = gen_reg_rtx (V2DImode);
51598 t4 = gen_reg_rtx (V2DImode);
51599
51600 /* t1: B,A,D,C */
51601 emit_insn (gen_sse2_pshufd_1 (t1, op1,
51602 GEN_INT (1),
51603 GEN_INT (0),
51604 GEN_INT (3),
51605 GEN_INT (2)));
51606
51607 /* t2: (B*E),(A*F),(D*G),(C*H) */
51608 emit_insn (gen_mulv4si3 (t2, t1, op2));
51609
51610 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
51611 emit_insn (gen_xop_phadddq (t3, t2));
51612
51613 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
51614 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
51615
51616 /* Multiply lower parts and add all */
51617 t5 = gen_reg_rtx (V2DImode);
51618 emit_insn (gen_vec_widen_umult_even_v4si (t5,
51619 gen_lowpart (V4SImode, op1),
51620 gen_lowpart (V4SImode, op2)));
51621 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
51622
51623 }
51624 else
51625 {
51626 machine_mode nmode;
51627 rtx (*umul) (rtx, rtx, rtx);
51628
51629 if (mode == V2DImode)
51630 {
51631 umul = gen_vec_widen_umult_even_v4si;
51632 nmode = V4SImode;
51633 }
51634 else if (mode == V4DImode)
51635 {
51636 umul = gen_vec_widen_umult_even_v8si;
51637 nmode = V8SImode;
51638 }
51639 else if (mode == V8DImode)
51640 {
51641 umul = gen_vec_widen_umult_even_v16si;
51642 nmode = V16SImode;
51643 }
51644 else
51645 gcc_unreachable ();
51646
51647
51648 /* Multiply low parts. */
51649 t1 = gen_reg_rtx (mode);
51650 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
51651
51652 /* Shift input vectors right 32 bits so we can multiply high parts. */
51653 t6 = GEN_INT (32);
51654 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
51655 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
51656
51657 /* Multiply high parts by low parts. */
51658 t4 = gen_reg_rtx (mode);
51659 t5 = gen_reg_rtx (mode);
51660 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
51661 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
51662
51663 /* Combine and shift the highparts back. */
51664 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
51665 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
51666
51667 /* Combine high and low parts. */
51668 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
51669 }
51670
51671 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51672 gen_rtx_MULT (mode, op1, op2));
51673 }
51674
51675 /* Return 1 if control tansfer instruction INSN
51676 should be encoded with bnd prefix.
51677 If insn is NULL then return 1 when control
51678 transfer instructions should be prefixed with
51679 bnd by default for current function. */
51680
51681 bool
51682 ix86_bnd_prefixed_insn_p (rtx insn)
51683 {
51684 /* For call insns check special flag. */
51685 if (insn && CALL_P (insn))
51686 {
51687 rtx call = get_call_rtx_from (insn);
51688 if (call)
51689 return CALL_EXPR_WITH_BOUNDS_P (call);
51690 }
51691
51692 /* All other insns are prefixed only if function is instrumented. */
51693 return chkp_function_instrumented_p (current_function_decl);
51694 }
51695
51696 /* Calculate integer abs() using only SSE2 instructions. */
51697
51698 void
51699 ix86_expand_sse2_abs (rtx target, rtx input)
51700 {
51701 machine_mode mode = GET_MODE (target);
51702 rtx tmp0, tmp1, x;
51703
51704 switch (mode)
51705 {
51706 /* For 32-bit signed integer X, the best way to calculate the absolute
51707 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
51708 case V4SImode:
51709 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
51710 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
51711 NULL, 0, OPTAB_DIRECT);
51712 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
51713 NULL, 0, OPTAB_DIRECT);
51714 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
51715 target, 0, OPTAB_DIRECT);
51716 break;
51717
51718 /* For 16-bit signed integer X, the best way to calculate the absolute
51719 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
51720 case V8HImode:
51721 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
51722
51723 x = expand_simple_binop (mode, SMAX, tmp0, input,
51724 target, 0, OPTAB_DIRECT);
51725 break;
51726
51727 /* For 8-bit signed integer X, the best way to calculate the absolute
51728 value of X is min ((unsigned char) X, (unsigned char) (-X)),
51729 as SSE2 provides the PMINUB insn. */
51730 case V16QImode:
51731 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
51732
51733 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
51734 target, 0, OPTAB_DIRECT);
51735 break;
51736
51737 default:
51738 gcc_unreachable ();
51739 }
51740
51741 if (x != target)
51742 emit_move_insn (target, x);
51743 }
51744
51745 /* Expand an extract from a vector register through pextr insn.
51746 Return true if successful. */
51747
51748 bool
51749 ix86_expand_pextr (rtx *operands)
51750 {
51751 rtx dst = operands[0];
51752 rtx src = operands[1];
51753
51754 unsigned int size = INTVAL (operands[2]);
51755 unsigned int pos = INTVAL (operands[3]);
51756
51757 if (SUBREG_P (dst))
51758 {
51759 /* Reject non-lowpart subregs. */
51760 if (SUBREG_BYTE (dst) > 0)
51761 return false;
51762 dst = SUBREG_REG (dst);
51763 }
51764
51765 if (SUBREG_P (src))
51766 {
51767 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
51768 src = SUBREG_REG (src);
51769 }
51770
51771 switch (GET_MODE (src))
51772 {
51773 case V16QImode:
51774 case V8HImode:
51775 case V4SImode:
51776 case V2DImode:
51777 case V1TImode:
51778 case TImode:
51779 {
51780 machine_mode srcmode, dstmode;
51781 rtx d, pat;
51782
51783 dstmode = mode_for_size (size, MODE_INT, 0);
51784
51785 switch (dstmode)
51786 {
51787 case QImode:
51788 if (!TARGET_SSE4_1)
51789 return false;
51790 srcmode = V16QImode;
51791 break;
51792
51793 case HImode:
51794 if (!TARGET_SSE2)
51795 return false;
51796 srcmode = V8HImode;
51797 break;
51798
51799 case SImode:
51800 if (!TARGET_SSE4_1)
51801 return false;
51802 srcmode = V4SImode;
51803 break;
51804
51805 case DImode:
51806 gcc_assert (TARGET_64BIT);
51807 if (!TARGET_SSE4_1)
51808 return false;
51809 srcmode = V2DImode;
51810 break;
51811
51812 default:
51813 return false;
51814 }
51815
51816 /* Reject extractions from misaligned positions. */
51817 if (pos & (size-1))
51818 return false;
51819
51820 if (GET_MODE (dst) == dstmode)
51821 d = dst;
51822 else
51823 d = gen_reg_rtx (dstmode);
51824
51825 /* Construct insn pattern. */
51826 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
51827 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
51828
51829 /* Let the rtl optimizers know about the zero extension performed. */
51830 if (dstmode == QImode || dstmode == HImode)
51831 {
51832 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
51833 d = gen_lowpart (SImode, d);
51834 }
51835
51836 emit_insn (gen_rtx_SET (d, pat));
51837
51838 if (d != dst)
51839 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
51840 return true;
51841 }
51842
51843 default:
51844 return false;
51845 }
51846 }
51847
51848 /* Expand an insert into a vector register through pinsr insn.
51849 Return true if successful. */
51850
51851 bool
51852 ix86_expand_pinsr (rtx *operands)
51853 {
51854 rtx dst = operands[0];
51855 rtx src = operands[3];
51856
51857 unsigned int size = INTVAL (operands[1]);
51858 unsigned int pos = INTVAL (operands[2]);
51859
51860 if (SUBREG_P (dst))
51861 {
51862 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
51863 dst = SUBREG_REG (dst);
51864 }
51865
51866 switch (GET_MODE (dst))
51867 {
51868 case V16QImode:
51869 case V8HImode:
51870 case V4SImode:
51871 case V2DImode:
51872 case V1TImode:
51873 case TImode:
51874 {
51875 machine_mode srcmode, dstmode;
51876 rtx (*pinsr)(rtx, rtx, rtx, rtx);
51877 rtx d;
51878
51879 srcmode = mode_for_size (size, MODE_INT, 0);
51880
51881 switch (srcmode)
51882 {
51883 case QImode:
51884 if (!TARGET_SSE4_1)
51885 return false;
51886 dstmode = V16QImode;
51887 pinsr = gen_sse4_1_pinsrb;
51888 break;
51889
51890 case HImode:
51891 if (!TARGET_SSE2)
51892 return false;
51893 dstmode = V8HImode;
51894 pinsr = gen_sse2_pinsrw;
51895 break;
51896
51897 case SImode:
51898 if (!TARGET_SSE4_1)
51899 return false;
51900 dstmode = V4SImode;
51901 pinsr = gen_sse4_1_pinsrd;
51902 break;
51903
51904 case DImode:
51905 gcc_assert (TARGET_64BIT);
51906 if (!TARGET_SSE4_1)
51907 return false;
51908 dstmode = V2DImode;
51909 pinsr = gen_sse4_1_pinsrq;
51910 break;
51911
51912 default:
51913 return false;
51914 }
51915
51916 /* Reject insertions to misaligned positions. */
51917 if (pos & (size-1))
51918 return false;
51919
51920 if (SUBREG_P (src))
51921 {
51922 unsigned int srcpos = SUBREG_BYTE (src);
51923
51924 if (srcpos > 0)
51925 {
51926 rtx extr_ops[4];
51927
51928 extr_ops[0] = gen_reg_rtx (srcmode);
51929 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
51930 extr_ops[2] = GEN_INT (size);
51931 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
51932
51933 if (!ix86_expand_pextr (extr_ops))
51934 return false;
51935
51936 src = extr_ops[0];
51937 }
51938 else
51939 src = gen_lowpart (srcmode, SUBREG_REG (src));
51940 }
51941
51942 if (GET_MODE (dst) == dstmode)
51943 d = dst;
51944 else
51945 d = gen_reg_rtx (dstmode);
51946
51947 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
51948 gen_lowpart (srcmode, src),
51949 GEN_INT (1 << (pos / size))));
51950 if (d != dst)
51951 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
51952 return true;
51953 }
51954
51955 default:
51956 return false;
51957 }
51958 }
51959 \f
51960 /* This function returns the calling abi specific va_list type node.
51961 It returns the FNDECL specific va_list type. */
51962
51963 static tree
51964 ix86_fn_abi_va_list (tree fndecl)
51965 {
51966 if (!TARGET_64BIT)
51967 return va_list_type_node;
51968 gcc_assert (fndecl != NULL_TREE);
51969
51970 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
51971 return ms_va_list_type_node;
51972 else
51973 return sysv_va_list_type_node;
51974 }
51975
51976 /* Returns the canonical va_list type specified by TYPE. If there
51977 is no valid TYPE provided, it return NULL_TREE. */
51978
51979 static tree
51980 ix86_canonical_va_list_type (tree type)
51981 {
51982 tree wtype, htype;
51983
51984 /* Resolve references and pointers to va_list type. */
51985 if (TREE_CODE (type) == MEM_REF)
51986 type = TREE_TYPE (type);
51987 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
51988 type = TREE_TYPE (type);
51989 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
51990 type = TREE_TYPE (type);
51991
51992 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
51993 {
51994 wtype = va_list_type_node;
51995 gcc_assert (wtype != NULL_TREE);
51996 htype = type;
51997 if (TREE_CODE (wtype) == ARRAY_TYPE)
51998 {
51999 /* If va_list is an array type, the argument may have decayed
52000 to a pointer type, e.g. by being passed to another function.
52001 In that case, unwrap both types so that we can compare the
52002 underlying records. */
52003 if (TREE_CODE (htype) == ARRAY_TYPE
52004 || POINTER_TYPE_P (htype))
52005 {
52006 wtype = TREE_TYPE (wtype);
52007 htype = TREE_TYPE (htype);
52008 }
52009 }
52010 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52011 return va_list_type_node;
52012 wtype = sysv_va_list_type_node;
52013 gcc_assert (wtype != NULL_TREE);
52014 htype = type;
52015 if (TREE_CODE (wtype) == ARRAY_TYPE)
52016 {
52017 /* If va_list is an array type, the argument may have decayed
52018 to a pointer type, e.g. by being passed to another function.
52019 In that case, unwrap both types so that we can compare the
52020 underlying records. */
52021 if (TREE_CODE (htype) == ARRAY_TYPE
52022 || POINTER_TYPE_P (htype))
52023 {
52024 wtype = TREE_TYPE (wtype);
52025 htype = TREE_TYPE (htype);
52026 }
52027 }
52028 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52029 return sysv_va_list_type_node;
52030 wtype = ms_va_list_type_node;
52031 gcc_assert (wtype != NULL_TREE);
52032 htype = type;
52033 if (TREE_CODE (wtype) == ARRAY_TYPE)
52034 {
52035 /* If va_list is an array type, the argument may have decayed
52036 to a pointer type, e.g. by being passed to another function.
52037 In that case, unwrap both types so that we can compare the
52038 underlying records. */
52039 if (TREE_CODE (htype) == ARRAY_TYPE
52040 || POINTER_TYPE_P (htype))
52041 {
52042 wtype = TREE_TYPE (wtype);
52043 htype = TREE_TYPE (htype);
52044 }
52045 }
52046 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52047 return ms_va_list_type_node;
52048 return NULL_TREE;
52049 }
52050 return std_canonical_va_list_type (type);
52051 }
52052
52053 /* Iterate through the target-specific builtin types for va_list.
52054 IDX denotes the iterator, *PTREE is set to the result type of
52055 the va_list builtin, and *PNAME to its internal type.
52056 Returns zero if there is no element for this index, otherwise
52057 IDX should be increased upon the next call.
52058 Note, do not iterate a base builtin's name like __builtin_va_list.
52059 Used from c_common_nodes_and_builtins. */
52060
52061 static int
52062 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52063 {
52064 if (TARGET_64BIT)
52065 {
52066 switch (idx)
52067 {
52068 default:
52069 break;
52070
52071 case 0:
52072 *ptree = ms_va_list_type_node;
52073 *pname = "__builtin_ms_va_list";
52074 return 1;
52075
52076 case 1:
52077 *ptree = sysv_va_list_type_node;
52078 *pname = "__builtin_sysv_va_list";
52079 return 1;
52080 }
52081 }
52082
52083 return 0;
52084 }
52085
52086 #undef TARGET_SCHED_DISPATCH
52087 #define TARGET_SCHED_DISPATCH has_dispatch
52088 #undef TARGET_SCHED_DISPATCH_DO
52089 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52090 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52091 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52092 #undef TARGET_SCHED_REORDER
52093 #define TARGET_SCHED_REORDER ix86_sched_reorder
52094 #undef TARGET_SCHED_ADJUST_PRIORITY
52095 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52096 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52097 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52098 ix86_dependencies_evaluation_hook
52099
52100 /* The size of the dispatch window is the total number of bytes of
52101 object code allowed in a window. */
52102 #define DISPATCH_WINDOW_SIZE 16
52103
52104 /* Number of dispatch windows considered for scheduling. */
52105 #define MAX_DISPATCH_WINDOWS 3
52106
52107 /* Maximum number of instructions in a window. */
52108 #define MAX_INSN 4
52109
52110 /* Maximum number of immediate operands in a window. */
52111 #define MAX_IMM 4
52112
52113 /* Maximum number of immediate bits allowed in a window. */
52114 #define MAX_IMM_SIZE 128
52115
52116 /* Maximum number of 32 bit immediates allowed in a window. */
52117 #define MAX_IMM_32 4
52118
52119 /* Maximum number of 64 bit immediates allowed in a window. */
52120 #define MAX_IMM_64 2
52121
52122 /* Maximum total of loads or prefetches allowed in a window. */
52123 #define MAX_LOAD 2
52124
52125 /* Maximum total of stores allowed in a window. */
52126 #define MAX_STORE 1
52127
52128 #undef BIG
52129 #define BIG 100
52130
52131
52132 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52133 enum dispatch_group {
52134 disp_no_group = 0,
52135 disp_load,
52136 disp_store,
52137 disp_load_store,
52138 disp_prefetch,
52139 disp_imm,
52140 disp_imm_32,
52141 disp_imm_64,
52142 disp_branch,
52143 disp_cmp,
52144 disp_jcc,
52145 disp_last
52146 };
52147
52148 /* Number of allowable groups in a dispatch window. It is an array
52149 indexed by dispatch_group enum. 100 is used as a big number,
52150 because the number of these kind of operations does not have any
52151 effect in dispatch window, but we need them for other reasons in
52152 the table. */
52153 static unsigned int num_allowable_groups[disp_last] = {
52154 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52155 };
52156
52157 char group_name[disp_last + 1][16] = {
52158 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52159 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52160 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52161 };
52162
52163 /* Instruction path. */
52164 enum insn_path {
52165 no_path = 0,
52166 path_single, /* Single micro op. */
52167 path_double, /* Double micro op. */
52168 path_multi, /* Instructions with more than 2 micro op.. */
52169 last_path
52170 };
52171
52172 /* sched_insn_info defines a window to the instructions scheduled in
52173 the basic block. It contains a pointer to the insn_info table and
52174 the instruction scheduled.
52175
52176 Windows are allocated for each basic block and are linked
52177 together. */
52178 typedef struct sched_insn_info_s {
52179 rtx insn;
52180 enum dispatch_group group;
52181 enum insn_path path;
52182 int byte_len;
52183 int imm_bytes;
52184 } sched_insn_info;
52185
52186 /* Linked list of dispatch windows. This is a two way list of
52187 dispatch windows of a basic block. It contains information about
52188 the number of uops in the window and the total number of
52189 instructions and of bytes in the object code for this dispatch
52190 window. */
52191 typedef struct dispatch_windows_s {
52192 int num_insn; /* Number of insn in the window. */
52193 int num_uops; /* Number of uops in the window. */
52194 int window_size; /* Number of bytes in the window. */
52195 int window_num; /* Window number between 0 or 1. */
52196 int num_imm; /* Number of immediates in an insn. */
52197 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52198 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52199 int imm_size; /* Total immediates in the window. */
52200 int num_loads; /* Total memory loads in the window. */
52201 int num_stores; /* Total memory stores in the window. */
52202 int violation; /* Violation exists in window. */
52203 sched_insn_info *window; /* Pointer to the window. */
52204 struct dispatch_windows_s *next;
52205 struct dispatch_windows_s *prev;
52206 } dispatch_windows;
52207
52208 /* Immediate valuse used in an insn. */
52209 typedef struct imm_info_s
52210 {
52211 int imm;
52212 int imm32;
52213 int imm64;
52214 } imm_info;
52215
52216 static dispatch_windows *dispatch_window_list;
52217 static dispatch_windows *dispatch_window_list1;
52218
52219 /* Get dispatch group of insn. */
52220
52221 static enum dispatch_group
52222 get_mem_group (rtx_insn *insn)
52223 {
52224 enum attr_memory memory;
52225
52226 if (INSN_CODE (insn) < 0)
52227 return disp_no_group;
52228 memory = get_attr_memory (insn);
52229 if (memory == MEMORY_STORE)
52230 return disp_store;
52231
52232 if (memory == MEMORY_LOAD)
52233 return disp_load;
52234
52235 if (memory == MEMORY_BOTH)
52236 return disp_load_store;
52237
52238 return disp_no_group;
52239 }
52240
52241 /* Return true if insn is a compare instruction. */
52242
52243 static bool
52244 is_cmp (rtx_insn *insn)
52245 {
52246 enum attr_type type;
52247
52248 type = get_attr_type (insn);
52249 return (type == TYPE_TEST
52250 || type == TYPE_ICMP
52251 || type == TYPE_FCMP
52252 || GET_CODE (PATTERN (insn)) == COMPARE);
52253 }
52254
52255 /* Return true if a dispatch violation encountered. */
52256
52257 static bool
52258 dispatch_violation (void)
52259 {
52260 if (dispatch_window_list->next)
52261 return dispatch_window_list->next->violation;
52262 return dispatch_window_list->violation;
52263 }
52264
52265 /* Return true if insn is a branch instruction. */
52266
52267 static bool
52268 is_branch (rtx_insn *insn)
52269 {
52270 return (CALL_P (insn) || JUMP_P (insn));
52271 }
52272
52273 /* Return true if insn is a prefetch instruction. */
52274
52275 static bool
52276 is_prefetch (rtx_insn *insn)
52277 {
52278 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52279 }
52280
52281 /* This function initializes a dispatch window and the list container holding a
52282 pointer to the window. */
52283
52284 static void
52285 init_window (int window_num)
52286 {
52287 int i;
52288 dispatch_windows *new_list;
52289
52290 if (window_num == 0)
52291 new_list = dispatch_window_list;
52292 else
52293 new_list = dispatch_window_list1;
52294
52295 new_list->num_insn = 0;
52296 new_list->num_uops = 0;
52297 new_list->window_size = 0;
52298 new_list->next = NULL;
52299 new_list->prev = NULL;
52300 new_list->window_num = window_num;
52301 new_list->num_imm = 0;
52302 new_list->num_imm_32 = 0;
52303 new_list->num_imm_64 = 0;
52304 new_list->imm_size = 0;
52305 new_list->num_loads = 0;
52306 new_list->num_stores = 0;
52307 new_list->violation = false;
52308
52309 for (i = 0; i < MAX_INSN; i++)
52310 {
52311 new_list->window[i].insn = NULL;
52312 new_list->window[i].group = disp_no_group;
52313 new_list->window[i].path = no_path;
52314 new_list->window[i].byte_len = 0;
52315 new_list->window[i].imm_bytes = 0;
52316 }
52317 return;
52318 }
52319
52320 /* This function allocates and initializes a dispatch window and the
52321 list container holding a pointer to the window. */
52322
52323 static dispatch_windows *
52324 allocate_window (void)
52325 {
52326 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52327 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52328
52329 return new_list;
52330 }
52331
52332 /* This routine initializes the dispatch scheduling information. It
52333 initiates building dispatch scheduler tables and constructs the
52334 first dispatch window. */
52335
52336 static void
52337 init_dispatch_sched (void)
52338 {
52339 /* Allocate a dispatch list and a window. */
52340 dispatch_window_list = allocate_window ();
52341 dispatch_window_list1 = allocate_window ();
52342 init_window (0);
52343 init_window (1);
52344 }
52345
52346 /* This function returns true if a branch is detected. End of a basic block
52347 does not have to be a branch, but here we assume only branches end a
52348 window. */
52349
52350 static bool
52351 is_end_basic_block (enum dispatch_group group)
52352 {
52353 return group == disp_branch;
52354 }
52355
52356 /* This function is called when the end of a window processing is reached. */
52357
52358 static void
52359 process_end_window (void)
52360 {
52361 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52362 if (dispatch_window_list->next)
52363 {
52364 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52365 gcc_assert (dispatch_window_list->window_size
52366 + dispatch_window_list1->window_size <= 48);
52367 init_window (1);
52368 }
52369 init_window (0);
52370 }
52371
52372 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52373 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52374 for 48 bytes of instructions. Note that these windows are not dispatch
52375 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52376
52377 static dispatch_windows *
52378 allocate_next_window (int window_num)
52379 {
52380 if (window_num == 0)
52381 {
52382 if (dispatch_window_list->next)
52383 init_window (1);
52384 init_window (0);
52385 return dispatch_window_list;
52386 }
52387
52388 dispatch_window_list->next = dispatch_window_list1;
52389 dispatch_window_list1->prev = dispatch_window_list;
52390
52391 return dispatch_window_list1;
52392 }
52393
52394 /* Compute number of immediate operands of an instruction. */
52395
52396 static void
52397 find_constant (rtx in_rtx, imm_info *imm_values)
52398 {
52399 if (INSN_P (in_rtx))
52400 in_rtx = PATTERN (in_rtx);
52401 subrtx_iterator::array_type array;
52402 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52403 if (const_rtx x = *iter)
52404 switch (GET_CODE (x))
52405 {
52406 case CONST:
52407 case SYMBOL_REF:
52408 case CONST_INT:
52409 (imm_values->imm)++;
52410 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
52411 (imm_values->imm32)++;
52412 else
52413 (imm_values->imm64)++;
52414 break;
52415
52416 case CONST_DOUBLE:
52417 case CONST_WIDE_INT:
52418 (imm_values->imm)++;
52419 (imm_values->imm64)++;
52420 break;
52421
52422 case CODE_LABEL:
52423 if (LABEL_KIND (x) == LABEL_NORMAL)
52424 {
52425 (imm_values->imm)++;
52426 (imm_values->imm32)++;
52427 }
52428 break;
52429
52430 default:
52431 break;
52432 }
52433 }
52434
52435 /* Return total size of immediate operands of an instruction along with number
52436 of corresponding immediate-operands. It initializes its parameters to zero
52437 befor calling FIND_CONSTANT.
52438 INSN is the input instruction. IMM is the total of immediates.
52439 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
52440 bit immediates. */
52441
52442 static int
52443 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
52444 {
52445 imm_info imm_values = {0, 0, 0};
52446
52447 find_constant (insn, &imm_values);
52448 *imm = imm_values.imm;
52449 *imm32 = imm_values.imm32;
52450 *imm64 = imm_values.imm64;
52451 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
52452 }
52453
52454 /* This function indicates if an operand of an instruction is an
52455 immediate. */
52456
52457 static bool
52458 has_immediate (rtx_insn *insn)
52459 {
52460 int num_imm_operand;
52461 int num_imm32_operand;
52462 int num_imm64_operand;
52463
52464 if (insn)
52465 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
52466 &num_imm64_operand);
52467 return false;
52468 }
52469
52470 /* Return single or double path for instructions. */
52471
52472 static enum insn_path
52473 get_insn_path (rtx_insn *insn)
52474 {
52475 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
52476
52477 if ((int)path == 0)
52478 return path_single;
52479
52480 if ((int)path == 1)
52481 return path_double;
52482
52483 return path_multi;
52484 }
52485
52486 /* Return insn dispatch group. */
52487
52488 static enum dispatch_group
52489 get_insn_group (rtx_insn *insn)
52490 {
52491 enum dispatch_group group = get_mem_group (insn);
52492 if (group)
52493 return group;
52494
52495 if (is_branch (insn))
52496 return disp_branch;
52497
52498 if (is_cmp (insn))
52499 return disp_cmp;
52500
52501 if (has_immediate (insn))
52502 return disp_imm;
52503
52504 if (is_prefetch (insn))
52505 return disp_prefetch;
52506
52507 return disp_no_group;
52508 }
52509
52510 /* Count number of GROUP restricted instructions in a dispatch
52511 window WINDOW_LIST. */
52512
52513 static int
52514 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
52515 {
52516 enum dispatch_group group = get_insn_group (insn);
52517 int imm_size;
52518 int num_imm_operand;
52519 int num_imm32_operand;
52520 int num_imm64_operand;
52521
52522 if (group == disp_no_group)
52523 return 0;
52524
52525 if (group == disp_imm)
52526 {
52527 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
52528 &num_imm64_operand);
52529 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
52530 || num_imm_operand + window_list->num_imm > MAX_IMM
52531 || (num_imm32_operand > 0
52532 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
52533 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
52534 || (num_imm64_operand > 0
52535 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
52536 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
52537 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
52538 && num_imm64_operand > 0
52539 && ((window_list->num_imm_64 > 0
52540 && window_list->num_insn >= 2)
52541 || window_list->num_insn >= 3)))
52542 return BIG;
52543
52544 return 1;
52545 }
52546
52547 if ((group == disp_load_store
52548 && (window_list->num_loads >= MAX_LOAD
52549 || window_list->num_stores >= MAX_STORE))
52550 || ((group == disp_load
52551 || group == disp_prefetch)
52552 && window_list->num_loads >= MAX_LOAD)
52553 || (group == disp_store
52554 && window_list->num_stores >= MAX_STORE))
52555 return BIG;
52556
52557 return 1;
52558 }
52559
52560 /* This function returns true if insn satisfies dispatch rules on the
52561 last window scheduled. */
52562
52563 static bool
52564 fits_dispatch_window (rtx_insn *insn)
52565 {
52566 dispatch_windows *window_list = dispatch_window_list;
52567 dispatch_windows *window_list_next = dispatch_window_list->next;
52568 unsigned int num_restrict;
52569 enum dispatch_group group = get_insn_group (insn);
52570 enum insn_path path = get_insn_path (insn);
52571 int sum;
52572
52573 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
52574 instructions should be given the lowest priority in the
52575 scheduling process in Haifa scheduler to make sure they will be
52576 scheduled in the same dispatch window as the reference to them. */
52577 if (group == disp_jcc || group == disp_cmp)
52578 return false;
52579
52580 /* Check nonrestricted. */
52581 if (group == disp_no_group || group == disp_branch)
52582 return true;
52583
52584 /* Get last dispatch window. */
52585 if (window_list_next)
52586 window_list = window_list_next;
52587
52588 if (window_list->window_num == 1)
52589 {
52590 sum = window_list->prev->window_size + window_list->window_size;
52591
52592 if (sum == 32
52593 || (min_insn_size (insn) + sum) >= 48)
52594 /* Window 1 is full. Go for next window. */
52595 return true;
52596 }
52597
52598 num_restrict = count_num_restricted (insn, window_list);
52599
52600 if (num_restrict > num_allowable_groups[group])
52601 return false;
52602
52603 /* See if it fits in the first window. */
52604 if (window_list->window_num == 0)
52605 {
52606 /* The first widow should have only single and double path
52607 uops. */
52608 if (path == path_double
52609 && (window_list->num_uops + 2) > MAX_INSN)
52610 return false;
52611 else if (path != path_single)
52612 return false;
52613 }
52614 return true;
52615 }
52616
52617 /* Add an instruction INSN with NUM_UOPS micro-operations to the
52618 dispatch window WINDOW_LIST. */
52619
52620 static void
52621 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
52622 {
52623 int byte_len = min_insn_size (insn);
52624 int num_insn = window_list->num_insn;
52625 int imm_size;
52626 sched_insn_info *window = window_list->window;
52627 enum dispatch_group group = get_insn_group (insn);
52628 enum insn_path path = get_insn_path (insn);
52629 int num_imm_operand;
52630 int num_imm32_operand;
52631 int num_imm64_operand;
52632
52633 if (!window_list->violation && group != disp_cmp
52634 && !fits_dispatch_window (insn))
52635 window_list->violation = true;
52636
52637 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
52638 &num_imm64_operand);
52639
52640 /* Initialize window with new instruction. */
52641 window[num_insn].insn = insn;
52642 window[num_insn].byte_len = byte_len;
52643 window[num_insn].group = group;
52644 window[num_insn].path = path;
52645 window[num_insn].imm_bytes = imm_size;
52646
52647 window_list->window_size += byte_len;
52648 window_list->num_insn = num_insn + 1;
52649 window_list->num_uops = window_list->num_uops + num_uops;
52650 window_list->imm_size += imm_size;
52651 window_list->num_imm += num_imm_operand;
52652 window_list->num_imm_32 += num_imm32_operand;
52653 window_list->num_imm_64 += num_imm64_operand;
52654
52655 if (group == disp_store)
52656 window_list->num_stores += 1;
52657 else if (group == disp_load
52658 || group == disp_prefetch)
52659 window_list->num_loads += 1;
52660 else if (group == disp_load_store)
52661 {
52662 window_list->num_stores += 1;
52663 window_list->num_loads += 1;
52664 }
52665 }
52666
52667 /* Adds a scheduled instruction, INSN, to the current dispatch window.
52668 If the total bytes of instructions or the number of instructions in
52669 the window exceed allowable, it allocates a new window. */
52670
52671 static void
52672 add_to_dispatch_window (rtx_insn *insn)
52673 {
52674 int byte_len;
52675 dispatch_windows *window_list;
52676 dispatch_windows *next_list;
52677 dispatch_windows *window0_list;
52678 enum insn_path path;
52679 enum dispatch_group insn_group;
52680 bool insn_fits;
52681 int num_insn;
52682 int num_uops;
52683 int window_num;
52684 int insn_num_uops;
52685 int sum;
52686
52687 if (INSN_CODE (insn) < 0)
52688 return;
52689
52690 byte_len = min_insn_size (insn);
52691 window_list = dispatch_window_list;
52692 next_list = window_list->next;
52693 path = get_insn_path (insn);
52694 insn_group = get_insn_group (insn);
52695
52696 /* Get the last dispatch window. */
52697 if (next_list)
52698 window_list = dispatch_window_list->next;
52699
52700 if (path == path_single)
52701 insn_num_uops = 1;
52702 else if (path == path_double)
52703 insn_num_uops = 2;
52704 else
52705 insn_num_uops = (int) path;
52706
52707 /* If current window is full, get a new window.
52708 Window number zero is full, if MAX_INSN uops are scheduled in it.
52709 Window number one is full, if window zero's bytes plus window
52710 one's bytes is 32, or if the bytes of the new instruction added
52711 to the total makes it greater than 48, or it has already MAX_INSN
52712 instructions in it. */
52713 num_insn = window_list->num_insn;
52714 num_uops = window_list->num_uops;
52715 window_num = window_list->window_num;
52716 insn_fits = fits_dispatch_window (insn);
52717
52718 if (num_insn >= MAX_INSN
52719 || num_uops + insn_num_uops > MAX_INSN
52720 || !(insn_fits))
52721 {
52722 window_num = ~window_num & 1;
52723 window_list = allocate_next_window (window_num);
52724 }
52725
52726 if (window_num == 0)
52727 {
52728 add_insn_window (insn, window_list, insn_num_uops);
52729 if (window_list->num_insn >= MAX_INSN
52730 && insn_group == disp_branch)
52731 {
52732 process_end_window ();
52733 return;
52734 }
52735 }
52736 else if (window_num == 1)
52737 {
52738 window0_list = window_list->prev;
52739 sum = window0_list->window_size + window_list->window_size;
52740 if (sum == 32
52741 || (byte_len + sum) >= 48)
52742 {
52743 process_end_window ();
52744 window_list = dispatch_window_list;
52745 }
52746
52747 add_insn_window (insn, window_list, insn_num_uops);
52748 }
52749 else
52750 gcc_unreachable ();
52751
52752 if (is_end_basic_block (insn_group))
52753 {
52754 /* End of basic block is reached do end-basic-block process. */
52755 process_end_window ();
52756 return;
52757 }
52758 }
52759
52760 /* Print the dispatch window, WINDOW_NUM, to FILE. */
52761
52762 DEBUG_FUNCTION static void
52763 debug_dispatch_window_file (FILE *file, int window_num)
52764 {
52765 dispatch_windows *list;
52766 int i;
52767
52768 if (window_num == 0)
52769 list = dispatch_window_list;
52770 else
52771 list = dispatch_window_list1;
52772
52773 fprintf (file, "Window #%d:\n", list->window_num);
52774 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
52775 list->num_insn, list->num_uops, list->window_size);
52776 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
52777 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
52778
52779 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
52780 list->num_stores);
52781 fprintf (file, " insn info:\n");
52782
52783 for (i = 0; i < MAX_INSN; i++)
52784 {
52785 if (!list->window[i].insn)
52786 break;
52787 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
52788 i, group_name[list->window[i].group],
52789 i, (void *)list->window[i].insn,
52790 i, list->window[i].path,
52791 i, list->window[i].byte_len,
52792 i, list->window[i].imm_bytes);
52793 }
52794 }
52795
52796 /* Print to stdout a dispatch window. */
52797
52798 DEBUG_FUNCTION void
52799 debug_dispatch_window (int window_num)
52800 {
52801 debug_dispatch_window_file (stdout, window_num);
52802 }
52803
52804 /* Print INSN dispatch information to FILE. */
52805
52806 DEBUG_FUNCTION static void
52807 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
52808 {
52809 int byte_len;
52810 enum insn_path path;
52811 enum dispatch_group group;
52812 int imm_size;
52813 int num_imm_operand;
52814 int num_imm32_operand;
52815 int num_imm64_operand;
52816
52817 if (INSN_CODE (insn) < 0)
52818 return;
52819
52820 byte_len = min_insn_size (insn);
52821 path = get_insn_path (insn);
52822 group = get_insn_group (insn);
52823 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
52824 &num_imm64_operand);
52825
52826 fprintf (file, " insn info:\n");
52827 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
52828 group_name[group], path, byte_len);
52829 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
52830 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
52831 }
52832
52833 /* Print to STDERR the status of the ready list with respect to
52834 dispatch windows. */
52835
52836 DEBUG_FUNCTION void
52837 debug_ready_dispatch (void)
52838 {
52839 int i;
52840 int no_ready = number_in_ready ();
52841
52842 fprintf (stdout, "Number of ready: %d\n", no_ready);
52843
52844 for (i = 0; i < no_ready; i++)
52845 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
52846 }
52847
52848 /* This routine is the driver of the dispatch scheduler. */
52849
52850 static void
52851 do_dispatch (rtx_insn *insn, int mode)
52852 {
52853 if (mode == DISPATCH_INIT)
52854 init_dispatch_sched ();
52855 else if (mode == ADD_TO_DISPATCH_WINDOW)
52856 add_to_dispatch_window (insn);
52857 }
52858
52859 /* Return TRUE if Dispatch Scheduling is supported. */
52860
52861 static bool
52862 has_dispatch (rtx_insn *insn, int action)
52863 {
52864 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
52865 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
52866 switch (action)
52867 {
52868 default:
52869 return false;
52870
52871 case IS_DISPATCH_ON:
52872 return true;
52873 break;
52874
52875 case IS_CMP:
52876 return is_cmp (insn);
52877
52878 case DISPATCH_VIOLATION:
52879 return dispatch_violation ();
52880
52881 case FITS_DISPATCH_WINDOW:
52882 return fits_dispatch_window (insn);
52883 }
52884
52885 return false;
52886 }
52887
52888 /* Implementation of reassociation_width target hook used by
52889 reassoc phase to identify parallelism level in reassociated
52890 tree. Statements tree_code is passed in OPC. Arguments type
52891 is passed in MODE.
52892
52893 Currently parallel reassociation is enabled for Atom
52894 processors only and we set reassociation width to be 2
52895 because Atom may issue up to 2 instructions per cycle.
52896
52897 Return value should be fixed if parallel reassociation is
52898 enabled for other processors. */
52899
52900 static int
52901 ix86_reassociation_width (unsigned int, machine_mode mode)
52902 {
52903 /* Vector part. */
52904 if (VECTOR_MODE_P (mode))
52905 {
52906 if (TARGET_VECTOR_PARALLEL_EXECUTION)
52907 return 2;
52908 else
52909 return 1;
52910 }
52911
52912 /* Scalar part. */
52913 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
52914 return 2;
52915 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
52916 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
52917 else
52918 return 1;
52919 }
52920
52921 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
52922 place emms and femms instructions. */
52923
52924 static machine_mode
52925 ix86_preferred_simd_mode (machine_mode mode)
52926 {
52927 if (!TARGET_SSE)
52928 return word_mode;
52929
52930 switch (mode)
52931 {
52932 case QImode:
52933 return TARGET_AVX512BW ? V64QImode :
52934 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
52935 case HImode:
52936 return TARGET_AVX512BW ? V32HImode :
52937 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
52938 case SImode:
52939 return TARGET_AVX512F ? V16SImode :
52940 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
52941 case DImode:
52942 return TARGET_AVX512F ? V8DImode :
52943 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
52944
52945 case SFmode:
52946 if (TARGET_AVX512F)
52947 return V16SFmode;
52948 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
52949 return V8SFmode;
52950 else
52951 return V4SFmode;
52952
52953 case DFmode:
52954 if (!TARGET_VECTORIZE_DOUBLE)
52955 return word_mode;
52956 else if (TARGET_AVX512F)
52957 return V8DFmode;
52958 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
52959 return V4DFmode;
52960 else if (TARGET_SSE2)
52961 return V2DFmode;
52962 /* FALLTHRU */
52963
52964 default:
52965 return word_mode;
52966 }
52967 }
52968
52969 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
52970 vectors. If AVX512F is enabled then try vectorizing with 512bit,
52971 256bit and 128bit vectors. */
52972
52973 static unsigned int
52974 ix86_autovectorize_vector_sizes (void)
52975 {
52976 return TARGET_AVX512F ? 64 | 32 | 16 :
52977 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
52978 }
52979
52980 \f
52981
52982 /* Return class of registers which could be used for pseudo of MODE
52983 and of class RCLASS for spilling instead of memory. Return NO_REGS
52984 if it is not possible or non-profitable. */
52985 static reg_class_t
52986 ix86_spill_class (reg_class_t rclass, machine_mode mode)
52987 {
52988 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
52989 && (mode == SImode || (TARGET_64BIT && mode == DImode))
52990 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
52991 return ALL_SSE_REGS;
52992 return NO_REGS;
52993 }
52994
52995 /* Implement targetm.vectorize.init_cost. */
52996
52997 static void *
52998 ix86_init_cost (struct loop *)
52999 {
53000 unsigned *cost = XNEWVEC (unsigned, 3);
53001 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53002 return cost;
53003 }
53004
53005 /* Implement targetm.vectorize.add_stmt_cost. */
53006
53007 static unsigned
53008 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53009 struct _stmt_vec_info *stmt_info, int misalign,
53010 enum vect_cost_model_location where)
53011 {
53012 unsigned *cost = (unsigned *) data;
53013 unsigned retval = 0;
53014
53015 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53016 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53017
53018 /* Statements in an inner loop relative to the loop being
53019 vectorized are weighted more heavily. The value here is
53020 arbitrary and could potentially be improved with analysis. */
53021 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53022 count *= 50; /* FIXME. */
53023
53024 retval = (unsigned) (count * stmt_cost);
53025
53026 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53027 for Silvermont as it has out of order integer pipeline and can execute
53028 2 scalar instruction per tick, but has in order SIMD pipeline. */
53029 if (TARGET_SILVERMONT || TARGET_INTEL)
53030 if (stmt_info && stmt_info->stmt)
53031 {
53032 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53033 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53034 retval = (retval * 17) / 10;
53035 }
53036
53037 cost[where] += retval;
53038
53039 return retval;
53040 }
53041
53042 /* Implement targetm.vectorize.finish_cost. */
53043
53044 static void
53045 ix86_finish_cost (void *data, unsigned *prologue_cost,
53046 unsigned *body_cost, unsigned *epilogue_cost)
53047 {
53048 unsigned *cost = (unsigned *) data;
53049 *prologue_cost = cost[vect_prologue];
53050 *body_cost = cost[vect_body];
53051 *epilogue_cost = cost[vect_epilogue];
53052 }
53053
53054 /* Implement targetm.vectorize.destroy_cost_data. */
53055
53056 static void
53057 ix86_destroy_cost_data (void *data)
53058 {
53059 free (data);
53060 }
53061
53062 /* Validate target specific memory model bits in VAL. */
53063
53064 static unsigned HOST_WIDE_INT
53065 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53066 {
53067 enum memmodel model = memmodel_from_int (val);
53068 bool strong;
53069
53070 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53071 |MEMMODEL_MASK)
53072 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53073 {
53074 warning (OPT_Winvalid_memory_model,
53075 "Unknown architecture specific memory model");
53076 return MEMMODEL_SEQ_CST;
53077 }
53078 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53079 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53080 {
53081 warning (OPT_Winvalid_memory_model,
53082 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53083 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53084 }
53085 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53086 {
53087 warning (OPT_Winvalid_memory_model,
53088 "HLE_RELEASE not used with RELEASE or stronger memory model");
53089 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53090 }
53091 return val;
53092 }
53093
53094 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53095 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53096 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53097 or number of vecsize_mangle variants that should be emitted. */
53098
53099 static int
53100 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53101 struct cgraph_simd_clone *clonei,
53102 tree base_type, int num)
53103 {
53104 int ret = 1;
53105
53106 if (clonei->simdlen
53107 && (clonei->simdlen < 2
53108 || clonei->simdlen > 16
53109 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53110 {
53111 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53112 "unsupported simdlen %d", clonei->simdlen);
53113 return 0;
53114 }
53115
53116 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53117 if (TREE_CODE (ret_type) != VOID_TYPE)
53118 switch (TYPE_MODE (ret_type))
53119 {
53120 case QImode:
53121 case HImode:
53122 case SImode:
53123 case DImode:
53124 case SFmode:
53125 case DFmode:
53126 /* case SCmode: */
53127 /* case DCmode: */
53128 break;
53129 default:
53130 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53131 "unsupported return type %qT for simd\n", ret_type);
53132 return 0;
53133 }
53134
53135 tree t;
53136 int i;
53137
53138 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53139 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53140 switch (TYPE_MODE (TREE_TYPE (t)))
53141 {
53142 case QImode:
53143 case HImode:
53144 case SImode:
53145 case DImode:
53146 case SFmode:
53147 case DFmode:
53148 /* case SCmode: */
53149 /* case DCmode: */
53150 break;
53151 default:
53152 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53153 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53154 return 0;
53155 }
53156
53157 if (clonei->cilk_elemental)
53158 {
53159 /* Parse here processor clause. If not present, default to 'b'. */
53160 clonei->vecsize_mangle = 'b';
53161 }
53162 else if (!TREE_PUBLIC (node->decl))
53163 {
53164 /* If the function isn't exported, we can pick up just one ISA
53165 for the clones. */
53166 if (TARGET_AVX2)
53167 clonei->vecsize_mangle = 'd';
53168 else if (TARGET_AVX)
53169 clonei->vecsize_mangle = 'c';
53170 else
53171 clonei->vecsize_mangle = 'b';
53172 ret = 1;
53173 }
53174 else
53175 {
53176 clonei->vecsize_mangle = "bcd"[num];
53177 ret = 3;
53178 }
53179 switch (clonei->vecsize_mangle)
53180 {
53181 case 'b':
53182 clonei->vecsize_int = 128;
53183 clonei->vecsize_float = 128;
53184 break;
53185 case 'c':
53186 clonei->vecsize_int = 128;
53187 clonei->vecsize_float = 256;
53188 break;
53189 case 'd':
53190 clonei->vecsize_int = 256;
53191 clonei->vecsize_float = 256;
53192 break;
53193 }
53194 if (clonei->simdlen == 0)
53195 {
53196 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53197 clonei->simdlen = clonei->vecsize_int;
53198 else
53199 clonei->simdlen = clonei->vecsize_float;
53200 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53201 if (clonei->simdlen > 16)
53202 clonei->simdlen = 16;
53203 }
53204 return ret;
53205 }
53206
53207 /* Add target attribute to SIMD clone NODE if needed. */
53208
53209 static void
53210 ix86_simd_clone_adjust (struct cgraph_node *node)
53211 {
53212 const char *str = NULL;
53213 gcc_assert (node->decl == cfun->decl);
53214 switch (node->simdclone->vecsize_mangle)
53215 {
53216 case 'b':
53217 if (!TARGET_SSE2)
53218 str = "sse2";
53219 break;
53220 case 'c':
53221 if (!TARGET_AVX)
53222 str = "avx";
53223 break;
53224 case 'd':
53225 if (!TARGET_AVX2)
53226 str = "avx2";
53227 break;
53228 default:
53229 gcc_unreachable ();
53230 }
53231 if (str == NULL)
53232 return;
53233 push_cfun (NULL);
53234 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53235 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53236 gcc_assert (ok);
53237 pop_cfun ();
53238 ix86_reset_previous_fndecl ();
53239 ix86_set_current_function (node->decl);
53240 }
53241
53242 /* If SIMD clone NODE can't be used in a vectorized loop
53243 in current function, return -1, otherwise return a badness of using it
53244 (0 if it is most desirable from vecsize_mangle point of view, 1
53245 slightly less desirable, etc.). */
53246
53247 static int
53248 ix86_simd_clone_usable (struct cgraph_node *node)
53249 {
53250 switch (node->simdclone->vecsize_mangle)
53251 {
53252 case 'b':
53253 if (!TARGET_SSE2)
53254 return -1;
53255 if (!TARGET_AVX)
53256 return 0;
53257 return TARGET_AVX2 ? 2 : 1;
53258 case 'c':
53259 if (!TARGET_AVX)
53260 return -1;
53261 return TARGET_AVX2 ? 1 : 0;
53262 break;
53263 case 'd':
53264 if (!TARGET_AVX2)
53265 return -1;
53266 return 0;
53267 default:
53268 gcc_unreachable ();
53269 }
53270 }
53271
53272 /* This function adjusts the unroll factor based on
53273 the hardware capabilities. For ex, bdver3 has
53274 a loop buffer which makes unrolling of smaller
53275 loops less important. This function decides the
53276 unroll factor using number of memory references
53277 (value 32 is used) as a heuristic. */
53278
53279 static unsigned
53280 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53281 {
53282 basic_block *bbs;
53283 rtx_insn *insn;
53284 unsigned i;
53285 unsigned mem_count = 0;
53286
53287 if (!TARGET_ADJUST_UNROLL)
53288 return nunroll;
53289
53290 /* Count the number of memory references within the loop body.
53291 This value determines the unrolling factor for bdver3 and bdver4
53292 architectures. */
53293 subrtx_iterator::array_type array;
53294 bbs = get_loop_body (loop);
53295 for (i = 0; i < loop->num_nodes; i++)
53296 FOR_BB_INSNS (bbs[i], insn)
53297 if (NONDEBUG_INSN_P (insn))
53298 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53299 if (const_rtx x = *iter)
53300 if (MEM_P (x))
53301 {
53302 machine_mode mode = GET_MODE (x);
53303 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53304 if (n_words > 4)
53305 mem_count += 2;
53306 else
53307 mem_count += 1;
53308 }
53309 free (bbs);
53310
53311 if (mem_count && mem_count <=32)
53312 return 32/mem_count;
53313
53314 return nunroll;
53315 }
53316
53317
53318 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53319
53320 static bool
53321 ix86_float_exceptions_rounding_supported_p (void)
53322 {
53323 /* For x87 floating point with standard excess precision handling,
53324 there is no adddf3 pattern (since x87 floating point only has
53325 XFmode operations) so the default hook implementation gets this
53326 wrong. */
53327 return TARGET_80387 || TARGET_SSE_MATH;
53328 }
53329
53330 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53331
53332 static void
53333 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53334 {
53335 if (!TARGET_80387 && !TARGET_SSE_MATH)
53336 return;
53337 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53338 if (TARGET_80387)
53339 {
53340 tree fenv_index_type = build_index_type (size_int (6));
53341 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53342 tree fenv_var = create_tmp_var_raw (fenv_type);
53343 TREE_ADDRESSABLE (fenv_var) = 1;
53344 tree fenv_ptr = build_pointer_type (fenv_type);
53345 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53346 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53347 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53348 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53349 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53350 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53351 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53352 tree hold_fnclex = build_call_expr (fnclex, 0);
53353 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53354 NULL_TREE, NULL_TREE);
53355 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53356 hold_fnclex);
53357 *clear = build_call_expr (fnclex, 0);
53358 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53359 tree fnstsw_call = build_call_expr (fnstsw, 0);
53360 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53361 sw_var, fnstsw_call);
53362 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53363 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53364 exceptions_var, exceptions_x87);
53365 *update = build2 (COMPOUND_EXPR, integer_type_node,
53366 sw_mod, update_mod);
53367 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53368 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53369 }
53370 if (TARGET_SSE_MATH)
53371 {
53372 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53373 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53374 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53375 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53376 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53377 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53378 mxcsr_orig_var, stmxcsr_hold_call);
53379 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53380 mxcsr_orig_var,
53381 build_int_cst (unsigned_type_node, 0x1f80));
53382 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53383 build_int_cst (unsigned_type_node, 0xffffffc0));
53384 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
53385 mxcsr_mod_var, hold_mod_val);
53386 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53387 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
53388 hold_assign_orig, hold_assign_mod);
53389 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
53390 ldmxcsr_hold_call);
53391 if (*hold)
53392 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
53393 else
53394 *hold = hold_all;
53395 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53396 if (*clear)
53397 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
53398 ldmxcsr_clear_call);
53399 else
53400 *clear = ldmxcsr_clear_call;
53401 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
53402 tree exceptions_sse = fold_convert (integer_type_node,
53403 stxmcsr_update_call);
53404 if (*update)
53405 {
53406 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
53407 exceptions_var, exceptions_sse);
53408 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
53409 exceptions_var, exceptions_mod);
53410 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
53411 exceptions_assign);
53412 }
53413 else
53414 *update = build2 (MODIFY_EXPR, integer_type_node,
53415 exceptions_var, exceptions_sse);
53416 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
53417 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
53418 ldmxcsr_update_call);
53419 }
53420 tree atomic_feraiseexcept
53421 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
53422 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
53423 1, exceptions_var);
53424 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
53425 atomic_feraiseexcept_call);
53426 }
53427
53428 /* Return mode to be used for bounds or VOIDmode
53429 if bounds are not supported. */
53430
53431 static enum machine_mode
53432 ix86_mpx_bound_mode ()
53433 {
53434 /* Do not support pointer checker if MPX
53435 is not enabled. */
53436 if (!TARGET_MPX)
53437 {
53438 if (flag_check_pointer_bounds)
53439 warning (0, "Pointer Checker requires MPX support on this target."
53440 " Use -mmpx options to enable MPX.");
53441 return VOIDmode;
53442 }
53443
53444 return BNDmode;
53445 }
53446
53447 /* Return constant used to statically initialize constant bounds.
53448
53449 This function is used to create special bound values. For now
53450 only INIT bounds and NONE bounds are expected. More special
53451 values may be added later. */
53452
53453 static tree
53454 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
53455 {
53456 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
53457 : build_zero_cst (pointer_sized_int_node);
53458 tree high = ub ? build_zero_cst (pointer_sized_int_node)
53459 : build_minus_one_cst (pointer_sized_int_node);
53460
53461 /* This function is supposed to be used to create INIT and
53462 NONE bounds only. */
53463 gcc_assert ((lb == 0 && ub == -1)
53464 || (lb == -1 && ub == 0));
53465
53466 return build_complex (NULL, low, high);
53467 }
53468
53469 /* Generate a list of statements STMTS to initialize pointer bounds
53470 variable VAR with bounds LB and UB. Return the number of generated
53471 statements. */
53472
53473 static int
53474 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
53475 {
53476 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
53477 tree lhs, modify, var_p;
53478
53479 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
53480 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
53481
53482 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
53483 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
53484 append_to_statement_list (modify, stmts);
53485
53486 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
53487 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
53488 TYPE_SIZE_UNIT (pointer_sized_int_node)));
53489 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
53490 append_to_statement_list (modify, stmts);
53491
53492 return 2;
53493 }
53494
53495 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
53496 /* For i386, common symbol is local only for non-PIE binaries. For
53497 x86-64, common symbol is local only for non-PIE binaries or linker
53498 supports copy reloc in PIE binaries. */
53499
53500 static bool
53501 ix86_binds_local_p (const_tree exp)
53502 {
53503 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
53504 (!flag_pic
53505 || (TARGET_64BIT
53506 && HAVE_LD_PIE_COPYRELOC != 0)));
53507 }
53508 #endif
53509
53510 /* If MEM is in the form of [base+offset], extract the two parts
53511 of address and set to BASE and OFFSET, otherwise return false. */
53512
53513 static bool
53514 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
53515 {
53516 rtx addr;
53517
53518 gcc_assert (MEM_P (mem));
53519
53520 addr = XEXP (mem, 0);
53521
53522 if (GET_CODE (addr) == CONST)
53523 addr = XEXP (addr, 0);
53524
53525 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
53526 {
53527 *base = addr;
53528 *offset = const0_rtx;
53529 return true;
53530 }
53531
53532 if (GET_CODE (addr) == PLUS
53533 && (REG_P (XEXP (addr, 0))
53534 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
53535 && CONST_INT_P (XEXP (addr, 1)))
53536 {
53537 *base = XEXP (addr, 0);
53538 *offset = XEXP (addr, 1);
53539 return true;
53540 }
53541
53542 return false;
53543 }
53544
53545 /* Given OPERANDS of consecutive load/store, check if we can merge
53546 them into move multiple. LOAD is true if they are load instructions.
53547 MODE is the mode of memory operands. */
53548
53549 bool
53550 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
53551 enum machine_mode mode)
53552 {
53553 HOST_WIDE_INT offval_1, offval_2, msize;
53554 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
53555
53556 if (load)
53557 {
53558 mem_1 = operands[1];
53559 mem_2 = operands[3];
53560 reg_1 = operands[0];
53561 reg_2 = operands[2];
53562 }
53563 else
53564 {
53565 mem_1 = operands[0];
53566 mem_2 = operands[2];
53567 reg_1 = operands[1];
53568 reg_2 = operands[3];
53569 }
53570
53571 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
53572
53573 if (REGNO (reg_1) != REGNO (reg_2))
53574 return false;
53575
53576 /* Check if the addresses are in the form of [base+offset]. */
53577 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
53578 return false;
53579 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
53580 return false;
53581
53582 /* Check if the bases are the same. */
53583 if (!rtx_equal_p (base_1, base_2))
53584 return false;
53585
53586 offval_1 = INTVAL (offset_1);
53587 offval_2 = INTVAL (offset_2);
53588 msize = GET_MODE_SIZE (mode);
53589 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
53590 if (offval_1 + msize != offval_2)
53591 return false;
53592
53593 return true;
53594 }
53595
53596 /* Initialize the GCC target structure. */
53597 #undef TARGET_RETURN_IN_MEMORY
53598 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
53599
53600 #undef TARGET_LEGITIMIZE_ADDRESS
53601 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
53602
53603 #undef TARGET_ATTRIBUTE_TABLE
53604 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
53605 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
53606 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
53607 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
53608 # undef TARGET_MERGE_DECL_ATTRIBUTES
53609 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
53610 #endif
53611
53612 #undef TARGET_COMP_TYPE_ATTRIBUTES
53613 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
53614
53615 #undef TARGET_INIT_BUILTINS
53616 #define TARGET_INIT_BUILTINS ix86_init_builtins
53617 #undef TARGET_BUILTIN_DECL
53618 #define TARGET_BUILTIN_DECL ix86_builtin_decl
53619 #undef TARGET_EXPAND_BUILTIN
53620 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
53621
53622 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
53623 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
53624 ix86_builtin_vectorized_function
53625
53626 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
53627 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
53628
53629 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
53630 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
53631
53632 #undef TARGET_VECTORIZE_BUILTIN_GATHER
53633 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
53634
53635 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
53636 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
53637
53638 #undef TARGET_BUILTIN_RECIPROCAL
53639 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
53640
53641 #undef TARGET_ASM_FUNCTION_EPILOGUE
53642 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
53643
53644 #undef TARGET_ENCODE_SECTION_INFO
53645 #ifndef SUBTARGET_ENCODE_SECTION_INFO
53646 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
53647 #else
53648 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
53649 #endif
53650
53651 #undef TARGET_ASM_OPEN_PAREN
53652 #define TARGET_ASM_OPEN_PAREN ""
53653 #undef TARGET_ASM_CLOSE_PAREN
53654 #define TARGET_ASM_CLOSE_PAREN ""
53655
53656 #undef TARGET_ASM_BYTE_OP
53657 #define TARGET_ASM_BYTE_OP ASM_BYTE
53658
53659 #undef TARGET_ASM_ALIGNED_HI_OP
53660 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
53661 #undef TARGET_ASM_ALIGNED_SI_OP
53662 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
53663 #ifdef ASM_QUAD
53664 #undef TARGET_ASM_ALIGNED_DI_OP
53665 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
53666 #endif
53667
53668 #undef TARGET_PROFILE_BEFORE_PROLOGUE
53669 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
53670
53671 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
53672 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
53673
53674 #undef TARGET_ASM_UNALIGNED_HI_OP
53675 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
53676 #undef TARGET_ASM_UNALIGNED_SI_OP
53677 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
53678 #undef TARGET_ASM_UNALIGNED_DI_OP
53679 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
53680
53681 #undef TARGET_PRINT_OPERAND
53682 #define TARGET_PRINT_OPERAND ix86_print_operand
53683 #undef TARGET_PRINT_OPERAND_ADDRESS
53684 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
53685 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
53686 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
53687 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
53688 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
53689
53690 #undef TARGET_SCHED_INIT_GLOBAL
53691 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
53692 #undef TARGET_SCHED_ADJUST_COST
53693 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
53694 #undef TARGET_SCHED_ISSUE_RATE
53695 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
53696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
53697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
53698 ia32_multipass_dfa_lookahead
53699 #undef TARGET_SCHED_MACRO_FUSION_P
53700 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
53701 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
53702 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
53703
53704 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
53705 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
53706
53707 #undef TARGET_MEMMODEL_CHECK
53708 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
53709
53710 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
53711 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
53712
53713 #ifdef HAVE_AS_TLS
53714 #undef TARGET_HAVE_TLS
53715 #define TARGET_HAVE_TLS true
53716 #endif
53717 #undef TARGET_CANNOT_FORCE_CONST_MEM
53718 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
53719 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
53720 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
53721
53722 #undef TARGET_DELEGITIMIZE_ADDRESS
53723 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
53724
53725 #undef TARGET_MS_BITFIELD_LAYOUT_P
53726 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
53727
53728 #if TARGET_MACHO
53729 #undef TARGET_BINDS_LOCAL_P
53730 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
53731 #else
53732 #undef TARGET_BINDS_LOCAL_P
53733 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
53734 #endif
53735 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
53736 #undef TARGET_BINDS_LOCAL_P
53737 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
53738 #endif
53739
53740 #undef TARGET_ASM_OUTPUT_MI_THUNK
53741 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
53742 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
53743 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
53744
53745 #undef TARGET_ASM_FILE_START
53746 #define TARGET_ASM_FILE_START x86_file_start
53747
53748 #undef TARGET_OPTION_OVERRIDE
53749 #define TARGET_OPTION_OVERRIDE ix86_option_override
53750
53751 #undef TARGET_REGISTER_MOVE_COST
53752 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
53753 #undef TARGET_MEMORY_MOVE_COST
53754 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
53755 #undef TARGET_RTX_COSTS
53756 #define TARGET_RTX_COSTS ix86_rtx_costs
53757 #undef TARGET_ADDRESS_COST
53758 #define TARGET_ADDRESS_COST ix86_address_cost
53759
53760 #undef TARGET_FIXED_CONDITION_CODE_REGS
53761 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
53762 #undef TARGET_CC_MODES_COMPATIBLE
53763 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
53764
53765 #undef TARGET_MACHINE_DEPENDENT_REORG
53766 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
53767
53768 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
53769 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
53770
53771 #undef TARGET_BUILD_BUILTIN_VA_LIST
53772 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
53773
53774 #undef TARGET_FOLD_BUILTIN
53775 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
53776
53777 #undef TARGET_COMPARE_VERSION_PRIORITY
53778 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
53779
53780 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
53781 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
53782 ix86_generate_version_dispatcher_body
53783
53784 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
53785 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
53786 ix86_get_function_versions_dispatcher
53787
53788 #undef TARGET_ENUM_VA_LIST_P
53789 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
53790
53791 #undef TARGET_FN_ABI_VA_LIST
53792 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
53793
53794 #undef TARGET_CANONICAL_VA_LIST_TYPE
53795 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
53796
53797 #undef TARGET_EXPAND_BUILTIN_VA_START
53798 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
53799
53800 #undef TARGET_MD_ASM_ADJUST
53801 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
53802
53803 #undef TARGET_PROMOTE_PROTOTYPES
53804 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
53805 #undef TARGET_SETUP_INCOMING_VARARGS
53806 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
53807 #undef TARGET_MUST_PASS_IN_STACK
53808 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
53809 #undef TARGET_FUNCTION_ARG_ADVANCE
53810 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
53811 #undef TARGET_FUNCTION_ARG
53812 #define TARGET_FUNCTION_ARG ix86_function_arg
53813 #undef TARGET_INIT_PIC_REG
53814 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
53815 #undef TARGET_USE_PSEUDO_PIC_REG
53816 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
53817 #undef TARGET_FUNCTION_ARG_BOUNDARY
53818 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
53819 #undef TARGET_PASS_BY_REFERENCE
53820 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
53821 #undef TARGET_INTERNAL_ARG_POINTER
53822 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
53823 #undef TARGET_UPDATE_STACK_BOUNDARY
53824 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
53825 #undef TARGET_GET_DRAP_RTX
53826 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
53827 #undef TARGET_STRICT_ARGUMENT_NAMING
53828 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
53829 #undef TARGET_STATIC_CHAIN
53830 #define TARGET_STATIC_CHAIN ix86_static_chain
53831 #undef TARGET_TRAMPOLINE_INIT
53832 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
53833 #undef TARGET_RETURN_POPS_ARGS
53834 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
53835
53836 #undef TARGET_LEGITIMATE_COMBINED_INSN
53837 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
53838
53839 #undef TARGET_ASAN_SHADOW_OFFSET
53840 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
53841
53842 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
53843 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
53844
53845 #undef TARGET_SCALAR_MODE_SUPPORTED_P
53846 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
53847
53848 #undef TARGET_VECTOR_MODE_SUPPORTED_P
53849 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
53850
53851 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
53852 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
53853 ix86_libgcc_floating_mode_supported_p
53854
53855 #undef TARGET_C_MODE_FOR_SUFFIX
53856 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
53857
53858 #ifdef HAVE_AS_TLS
53859 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
53860 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
53861 #endif
53862
53863 #ifdef SUBTARGET_INSERT_ATTRIBUTES
53864 #undef TARGET_INSERT_ATTRIBUTES
53865 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
53866 #endif
53867
53868 #undef TARGET_MANGLE_TYPE
53869 #define TARGET_MANGLE_TYPE ix86_mangle_type
53870
53871 #if !TARGET_MACHO
53872 #undef TARGET_STACK_PROTECT_FAIL
53873 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
53874 #endif
53875
53876 #undef TARGET_FUNCTION_VALUE
53877 #define TARGET_FUNCTION_VALUE ix86_function_value
53878
53879 #undef TARGET_FUNCTION_VALUE_REGNO_P
53880 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
53881
53882 #undef TARGET_PROMOTE_FUNCTION_MODE
53883 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
53884
53885 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
53886 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
53887
53888 #undef TARGET_MEMBER_TYPE_FORCES_BLK
53889 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
53890
53891 #undef TARGET_INSTANTIATE_DECLS
53892 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
53893
53894 #undef TARGET_SECONDARY_RELOAD
53895 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
53896
53897 #undef TARGET_CLASS_MAX_NREGS
53898 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
53899
53900 #undef TARGET_PREFERRED_RELOAD_CLASS
53901 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
53902 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
53903 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
53904 #undef TARGET_CLASS_LIKELY_SPILLED_P
53905 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
53906
53907 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
53908 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
53909 ix86_builtin_vectorization_cost
53910 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
53911 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
53912 ix86_vectorize_vec_perm_const_ok
53913 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
53914 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
53915 ix86_preferred_simd_mode
53916 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
53917 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
53918 ix86_autovectorize_vector_sizes
53919 #undef TARGET_VECTORIZE_INIT_COST
53920 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
53921 #undef TARGET_VECTORIZE_ADD_STMT_COST
53922 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
53923 #undef TARGET_VECTORIZE_FINISH_COST
53924 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
53925 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
53926 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
53927
53928 #undef TARGET_SET_CURRENT_FUNCTION
53929 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
53930
53931 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
53932 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
53933
53934 #undef TARGET_OPTION_SAVE
53935 #define TARGET_OPTION_SAVE ix86_function_specific_save
53936
53937 #undef TARGET_OPTION_RESTORE
53938 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
53939
53940 #undef TARGET_OPTION_POST_STREAM_IN
53941 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
53942
53943 #undef TARGET_OPTION_PRINT
53944 #define TARGET_OPTION_PRINT ix86_function_specific_print
53945
53946 #undef TARGET_OPTION_FUNCTION_VERSIONS
53947 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
53948
53949 #undef TARGET_CAN_INLINE_P
53950 #define TARGET_CAN_INLINE_P ix86_can_inline_p
53951
53952 #undef TARGET_LEGITIMATE_ADDRESS_P
53953 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
53954
53955 #undef TARGET_LRA_P
53956 #define TARGET_LRA_P hook_bool_void_true
53957
53958 #undef TARGET_REGISTER_PRIORITY
53959 #define TARGET_REGISTER_PRIORITY ix86_register_priority
53960
53961 #undef TARGET_REGISTER_USAGE_LEVELING_P
53962 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
53963
53964 #undef TARGET_LEGITIMATE_CONSTANT_P
53965 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
53966
53967 #undef TARGET_FRAME_POINTER_REQUIRED
53968 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
53969
53970 #undef TARGET_CAN_ELIMINATE
53971 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
53972
53973 #undef TARGET_EXTRA_LIVE_ON_ENTRY
53974 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
53975
53976 #undef TARGET_ASM_CODE_END
53977 #define TARGET_ASM_CODE_END ix86_code_end
53978
53979 #undef TARGET_CONDITIONAL_REGISTER_USAGE
53980 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
53981
53982 #if TARGET_MACHO
53983 #undef TARGET_INIT_LIBFUNCS
53984 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
53985 #endif
53986
53987 #undef TARGET_LOOP_UNROLL_ADJUST
53988 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
53989
53990 #undef TARGET_SPILL_CLASS
53991 #define TARGET_SPILL_CLASS ix86_spill_class
53992
53993 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
53994 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
53995 ix86_simd_clone_compute_vecsize_and_simdlen
53996
53997 #undef TARGET_SIMD_CLONE_ADJUST
53998 #define TARGET_SIMD_CLONE_ADJUST \
53999 ix86_simd_clone_adjust
54000
54001 #undef TARGET_SIMD_CLONE_USABLE
54002 #define TARGET_SIMD_CLONE_USABLE \
54003 ix86_simd_clone_usable
54004
54005 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54006 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54007 ix86_float_exceptions_rounding_supported_p
54008
54009 #undef TARGET_MODE_EMIT
54010 #define TARGET_MODE_EMIT ix86_emit_mode_set
54011
54012 #undef TARGET_MODE_NEEDED
54013 #define TARGET_MODE_NEEDED ix86_mode_needed
54014
54015 #undef TARGET_MODE_AFTER
54016 #define TARGET_MODE_AFTER ix86_mode_after
54017
54018 #undef TARGET_MODE_ENTRY
54019 #define TARGET_MODE_ENTRY ix86_mode_entry
54020
54021 #undef TARGET_MODE_EXIT
54022 #define TARGET_MODE_EXIT ix86_mode_exit
54023
54024 #undef TARGET_MODE_PRIORITY
54025 #define TARGET_MODE_PRIORITY ix86_mode_priority
54026
54027 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54028 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54029
54030 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54031 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54032
54033 #undef TARGET_STORE_BOUNDS_FOR_ARG
54034 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54035
54036 #undef TARGET_LOAD_RETURNED_BOUNDS
54037 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54038
54039 #undef TARGET_STORE_RETURNED_BOUNDS
54040 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54041
54042 #undef TARGET_CHKP_BOUND_MODE
54043 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54044
54045 #undef TARGET_BUILTIN_CHKP_FUNCTION
54046 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54047
54048 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54049 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54050
54051 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54052 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54053
54054 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54055 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54056
54057 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54058 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54059
54060 #undef TARGET_OFFLOAD_OPTIONS
54061 #define TARGET_OFFLOAD_OPTIONS \
54062 ix86_offload_options
54063
54064 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54065 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54066
54067 struct gcc_target targetm = TARGET_INITIALIZER;
54068 \f
54069 #include "gt-i386.h"