2442f3464dca722effb8361acf4c1b4b4dbedc28
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102
103 static rtx legitimize_dllimport_symbol (rtx, bool);
104 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
105 static rtx legitimize_pe_coff_symbol (rtx, bool);
106
107 #ifndef CHECK_STACK_LIMIT
108 #define CHECK_STACK_LIMIT (-1)
109 #endif
110
111 /* Return index of given mode in mult and division cost tables. */
112 #define MODE_INDEX(mode) \
113 ((mode) == QImode ? 0 \
114 : (mode) == HImode ? 1 \
115 : (mode) == SImode ? 2 \
116 : (mode) == DImode ? 3 \
117 : 4)
118
119 /* Processor costs (relative to an add) */
120 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
121 #define COSTS_N_BYTES(N) ((N) * 2)
122
123 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
124
125 static stringop_algs ix86_size_memcpy[2] = {
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
128 static stringop_algs ix86_size_memset[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131
132 const
133 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
134 COSTS_N_BYTES (2), /* cost of an add instruction */
135 COSTS_N_BYTES (3), /* cost of a lea instruction */
136 COSTS_N_BYTES (2), /* variable shift costs */
137 COSTS_N_BYTES (3), /* constant shift costs */
138 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
139 COSTS_N_BYTES (3), /* HI */
140 COSTS_N_BYTES (3), /* SI */
141 COSTS_N_BYTES (3), /* DI */
142 COSTS_N_BYTES (5)}, /* other */
143 0, /* cost of multiply per each bit set */
144 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
145 COSTS_N_BYTES (3), /* HI */
146 COSTS_N_BYTES (3), /* SI */
147 COSTS_N_BYTES (3), /* DI */
148 COSTS_N_BYTES (5)}, /* other */
149 COSTS_N_BYTES (3), /* cost of movsx */
150 COSTS_N_BYTES (3), /* cost of movzx */
151 0, /* "large" insn */
152 2, /* MOVE_RATIO */
153 2, /* cost for loading QImode using movzbl */
154 {2, 2, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 2, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {2, 2, 2}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {2, 2, 2}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 3, /* cost of moving MMX register */
164 {3, 3}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {3, 3}, /* cost of storing MMX registers
167 in SImode and DImode */
168 3, /* cost of moving SSE register */
169 {3, 3, 3}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {3, 3, 3}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of l1 cache */
175 0, /* size of l2 cache */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
178 2, /* Branch cost */
179 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
180 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
181 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
182 COSTS_N_BYTES (2), /* cost of FABS instruction. */
183 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
184 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
185 ix86_size_memcpy,
186 ix86_size_memset,
187 1, /* scalar_stmt_cost. */
188 1, /* scalar load_cost. */
189 1, /* scalar_store_cost. */
190 1, /* vec_stmt_cost. */
191 1, /* vec_to_scalar_cost. */
192 1, /* scalar_to_vec_cost. */
193 1, /* vec_align_load_cost. */
194 1, /* vec_unalign_load_cost. */
195 1, /* vec_store_cost. */
196 1, /* cond_taken_branch_cost. */
197 1, /* cond_not_taken_branch_cost. */
198 };
199
200 /* Processor costs (relative to an add) */
201 static stringop_algs i386_memcpy[2] = {
202 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
203 DUMMY_STRINGOP_ALGS};
204 static stringop_algs i386_memset[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207
208 static const
209 struct processor_costs i386_cost = { /* 386 specific costs */
210 COSTS_N_INSNS (1), /* cost of an add instruction */
211 COSTS_N_INSNS (1), /* cost of a lea instruction */
212 COSTS_N_INSNS (3), /* variable shift costs */
213 COSTS_N_INSNS (2), /* constant shift costs */
214 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
215 COSTS_N_INSNS (6), /* HI */
216 COSTS_N_INSNS (6), /* SI */
217 COSTS_N_INSNS (6), /* DI */
218 COSTS_N_INSNS (6)}, /* other */
219 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
220 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
221 COSTS_N_INSNS (23), /* HI */
222 COSTS_N_INSNS (23), /* SI */
223 COSTS_N_INSNS (23), /* DI */
224 COSTS_N_INSNS (23)}, /* other */
225 COSTS_N_INSNS (3), /* cost of movsx */
226 COSTS_N_INSNS (2), /* cost of movzx */
227 15, /* "large" insn */
228 3, /* MOVE_RATIO */
229 4, /* cost for loading QImode using movzbl */
230 {2, 4, 2}, /* cost of loading integer registers
231 in QImode, HImode and SImode.
232 Relative to reg-reg move (2). */
233 {2, 4, 2}, /* cost of storing integer registers */
234 2, /* cost of reg,reg fld/fst */
235 {8, 8, 8}, /* cost of loading fp registers
236 in SFmode, DFmode and XFmode */
237 {8, 8, 8}, /* cost of storing fp registers
238 in SFmode, DFmode and XFmode */
239 2, /* cost of moving MMX register */
240 {4, 8}, /* cost of loading MMX registers
241 in SImode and DImode */
242 {4, 8}, /* cost of storing MMX registers
243 in SImode and DImode */
244 2, /* cost of moving SSE register */
245 {4, 8, 16}, /* cost of loading SSE registers
246 in SImode, DImode and TImode */
247 {4, 8, 16}, /* cost of storing SSE registers
248 in SImode, DImode and TImode */
249 3, /* MMX or SSE register to integer */
250 0, /* size of l1 cache */
251 0, /* size of l2 cache */
252 0, /* size of prefetch block */
253 0, /* number of parallel prefetches */
254 1, /* Branch cost */
255 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
256 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
257 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
258 COSTS_N_INSNS (22), /* cost of FABS instruction. */
259 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
260 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
261 i386_memcpy,
262 i386_memset,
263 1, /* scalar_stmt_cost. */
264 1, /* scalar load_cost. */
265 1, /* scalar_store_cost. */
266 1, /* vec_stmt_cost. */
267 1, /* vec_to_scalar_cost. */
268 1, /* scalar_to_vec_cost. */
269 1, /* vec_align_load_cost. */
270 2, /* vec_unalign_load_cost. */
271 1, /* vec_store_cost. */
272 3, /* cond_taken_branch_cost. */
273 1, /* cond_not_taken_branch_cost. */
274 };
275
276 static stringop_algs i486_memcpy[2] = {
277 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
278 DUMMY_STRINGOP_ALGS};
279 static stringop_algs i486_memset[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282
283 static const
284 struct processor_costs i486_cost = { /* 486 specific costs */
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (3), /* variable shift costs */
288 COSTS_N_INSNS (2), /* constant shift costs */
289 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (12), /* HI */
291 COSTS_N_INSNS (12), /* SI */
292 COSTS_N_INSNS (12), /* DI */
293 COSTS_N_INSNS (12)}, /* other */
294 1, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (40), /* HI */
297 COSTS_N_INSNS (40), /* SI */
298 COSTS_N_INSNS (40), /* DI */
299 COSTS_N_INSNS (40)}, /* other */
300 COSTS_N_INSNS (3), /* cost of movsx */
301 COSTS_N_INSNS (2), /* cost of movzx */
302 15, /* "large" insn */
303 3, /* MOVE_RATIO */
304 4, /* cost for loading QImode using movzbl */
305 {2, 4, 2}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 4, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {8, 8, 8}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {8, 8, 8}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {4, 8}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {4, 8}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {4, 8, 16}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {4, 8, 16}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 4, /* size of l1 cache. 486 has 8kB cache
326 shared for code and data, so 4kB is
327 not really precise. */
328 4, /* size of l2 cache */
329 0, /* size of prefetch block */
330 0, /* number of parallel prefetches */
331 1, /* Branch cost */
332 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
333 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
334 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
335 COSTS_N_INSNS (3), /* cost of FABS instruction. */
336 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
337 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
338 i486_memcpy,
339 i486_memset,
340 1, /* scalar_stmt_cost. */
341 1, /* scalar load_cost. */
342 1, /* scalar_store_cost. */
343 1, /* vec_stmt_cost. */
344 1, /* vec_to_scalar_cost. */
345 1, /* scalar_to_vec_cost. */
346 1, /* vec_align_load_cost. */
347 2, /* vec_unalign_load_cost. */
348 1, /* vec_store_cost. */
349 3, /* cond_taken_branch_cost. */
350 1, /* cond_not_taken_branch_cost. */
351 };
352
353 static stringop_algs pentium_memcpy[2] = {
354 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
355 DUMMY_STRINGOP_ALGS};
356 static stringop_algs pentium_memset[2] = {
357 {libcall, {{-1, rep_prefix_4_byte, false}}},
358 DUMMY_STRINGOP_ALGS};
359
360 static const
361 struct processor_costs pentium_cost = {
362 COSTS_N_INSNS (1), /* cost of an add instruction */
363 COSTS_N_INSNS (1), /* cost of a lea instruction */
364 COSTS_N_INSNS (4), /* variable shift costs */
365 COSTS_N_INSNS (1), /* constant shift costs */
366 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
367 COSTS_N_INSNS (11), /* HI */
368 COSTS_N_INSNS (11), /* SI */
369 COSTS_N_INSNS (11), /* DI */
370 COSTS_N_INSNS (11)}, /* other */
371 0, /* cost of multiply per each bit set */
372 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
373 COSTS_N_INSNS (25), /* HI */
374 COSTS_N_INSNS (25), /* SI */
375 COSTS_N_INSNS (25), /* DI */
376 COSTS_N_INSNS (25)}, /* other */
377 COSTS_N_INSNS (3), /* cost of movsx */
378 COSTS_N_INSNS (2), /* cost of movzx */
379 8, /* "large" insn */
380 6, /* MOVE_RATIO */
381 6, /* cost for loading QImode using movzbl */
382 {2, 4, 2}, /* cost of loading integer registers
383 in QImode, HImode and SImode.
384 Relative to reg-reg move (2). */
385 {2, 4, 2}, /* cost of storing integer registers */
386 2, /* cost of reg,reg fld/fst */
387 {2, 2, 6}, /* cost of loading fp registers
388 in SFmode, DFmode and XFmode */
389 {4, 4, 6}, /* cost of storing fp registers
390 in SFmode, DFmode and XFmode */
391 8, /* cost of moving MMX register */
392 {8, 8}, /* cost of loading MMX registers
393 in SImode and DImode */
394 {8, 8}, /* cost of storing MMX registers
395 in SImode and DImode */
396 2, /* cost of moving SSE register */
397 {4, 8, 16}, /* cost of loading SSE registers
398 in SImode, DImode and TImode */
399 {4, 8, 16}, /* cost of storing SSE registers
400 in SImode, DImode and TImode */
401 3, /* MMX or SSE register to integer */
402 8, /* size of l1 cache. */
403 8, /* size of l2 cache */
404 0, /* size of prefetch block */
405 0, /* number of parallel prefetches */
406 2, /* Branch cost */
407 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
408 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
409 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
410 COSTS_N_INSNS (1), /* cost of FABS instruction. */
411 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
412 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
413 pentium_memcpy,
414 pentium_memset,
415 1, /* scalar_stmt_cost. */
416 1, /* scalar load_cost. */
417 1, /* scalar_store_cost. */
418 1, /* vec_stmt_cost. */
419 1, /* vec_to_scalar_cost. */
420 1, /* scalar_to_vec_cost. */
421 1, /* vec_align_load_cost. */
422 2, /* vec_unalign_load_cost. */
423 1, /* vec_store_cost. */
424 3, /* cond_taken_branch_cost. */
425 1, /* cond_not_taken_branch_cost. */
426 };
427
428 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
429 (we ensure the alignment). For small blocks inline loop is still a
430 noticeable win, for bigger blocks either rep movsl or rep movsb is
431 way to go. Rep movsb has apparently more expensive startup time in CPU,
432 but after 4K the difference is down in the noise. */
433 static stringop_algs pentiumpro_memcpy[2] = {
434 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
435 {8192, rep_prefix_4_byte, false},
436 {-1, rep_prefix_1_byte, false}}},
437 DUMMY_STRINGOP_ALGS};
438 static stringop_algs pentiumpro_memset[2] = {
439 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
440 {8192, rep_prefix_4_byte, false},
441 {-1, libcall, false}}},
442 DUMMY_STRINGOP_ALGS};
443 static const
444 struct processor_costs pentiumpro_cost = {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (1), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (4), /* HI */
451 COSTS_N_INSNS (4), /* SI */
452 COSTS_N_INSNS (4), /* DI */
453 COSTS_N_INSNS (4)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (17), /* HI */
457 COSTS_N_INSNS (17), /* SI */
458 COSTS_N_INSNS (17), /* DI */
459 COSTS_N_INSNS (17)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
463 6, /* MOVE_RATIO */
464 2, /* cost for loading QImode using movzbl */
465 {4, 4, 4}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {2, 2, 2}, /* cost of storing integer registers */
469 2, /* cost of reg,reg fld/fst */
470 {2, 2, 6}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {4, 4, 6}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {2, 2}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {2, 2}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {2, 2, 8}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {2, 2, 8}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 3, /* MMX or SSE register to integer */
485 8, /* size of l1 cache. */
486 256, /* size of l2 cache */
487 32, /* size of prefetch block */
488 6, /* number of parallel prefetches */
489 2, /* Branch cost */
490 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
496 pentiumpro_memcpy,
497 pentiumpro_memset,
498 1, /* scalar_stmt_cost. */
499 1, /* scalar load_cost. */
500 1, /* scalar_store_cost. */
501 1, /* vec_stmt_cost. */
502 1, /* vec_to_scalar_cost. */
503 1, /* scalar_to_vec_cost. */
504 1, /* vec_align_load_cost. */
505 2, /* vec_unalign_load_cost. */
506 1, /* vec_store_cost. */
507 3, /* cond_taken_branch_cost. */
508 1, /* cond_not_taken_branch_cost. */
509 };
510
511 static stringop_algs geode_memcpy[2] = {
512 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
513 DUMMY_STRINGOP_ALGS};
514 static stringop_algs geode_memset[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static const
518 struct processor_costs geode_cost = {
519 COSTS_N_INSNS (1), /* cost of an add instruction */
520 COSTS_N_INSNS (1), /* cost of a lea instruction */
521 COSTS_N_INSNS (2), /* variable shift costs */
522 COSTS_N_INSNS (1), /* constant shift costs */
523 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
524 COSTS_N_INSNS (4), /* HI */
525 COSTS_N_INSNS (7), /* SI */
526 COSTS_N_INSNS (7), /* DI */
527 COSTS_N_INSNS (7)}, /* other */
528 0, /* cost of multiply per each bit set */
529 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
530 COSTS_N_INSNS (23), /* HI */
531 COSTS_N_INSNS (39), /* SI */
532 COSTS_N_INSNS (39), /* DI */
533 COSTS_N_INSNS (39)}, /* other */
534 COSTS_N_INSNS (1), /* cost of movsx */
535 COSTS_N_INSNS (1), /* cost of movzx */
536 8, /* "large" insn */
537 4, /* MOVE_RATIO */
538 1, /* cost for loading QImode using movzbl */
539 {1, 1, 1}, /* cost of loading integer registers
540 in QImode, HImode and SImode.
541 Relative to reg-reg move (2). */
542 {1, 1, 1}, /* cost of storing integer registers */
543 1, /* cost of reg,reg fld/fst */
544 {1, 1, 1}, /* cost of loading fp registers
545 in SFmode, DFmode and XFmode */
546 {4, 6, 6}, /* cost of storing fp registers
547 in SFmode, DFmode and XFmode */
548
549 1, /* cost of moving MMX register */
550 {1, 1}, /* cost of loading MMX registers
551 in SImode and DImode */
552 {1, 1}, /* cost of storing MMX registers
553 in SImode and DImode */
554 1, /* cost of moving SSE register */
555 {1, 1, 1}, /* cost of loading SSE registers
556 in SImode, DImode and TImode */
557 {1, 1, 1}, /* cost of storing SSE registers
558 in SImode, DImode and TImode */
559 1, /* MMX or SSE register to integer */
560 64, /* size of l1 cache. */
561 128, /* size of l2 cache. */
562 32, /* size of prefetch block */
563 1, /* number of parallel prefetches */
564 1, /* Branch cost */
565 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
566 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
567 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
568 COSTS_N_INSNS (1), /* cost of FABS instruction. */
569 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
570 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
571 geode_memcpy,
572 geode_memset,
573 1, /* scalar_stmt_cost. */
574 1, /* scalar load_cost. */
575 1, /* scalar_store_cost. */
576 1, /* vec_stmt_cost. */
577 1, /* vec_to_scalar_cost. */
578 1, /* scalar_to_vec_cost. */
579 1, /* vec_align_load_cost. */
580 2, /* vec_unalign_load_cost. */
581 1, /* vec_store_cost. */
582 3, /* cond_taken_branch_cost. */
583 1, /* cond_not_taken_branch_cost. */
584 };
585
586 static stringop_algs k6_memcpy[2] = {
587 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
588 DUMMY_STRINGOP_ALGS};
589 static stringop_algs k6_memset[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static const
593 struct processor_costs k6_cost = {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (3), /* HI */
600 COSTS_N_INSNS (3), /* SI */
601 COSTS_N_INSNS (3), /* DI */
602 COSTS_N_INSNS (3)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (18), /* HI */
606 COSTS_N_INSNS (18), /* SI */
607 COSTS_N_INSNS (18), /* DI */
608 COSTS_N_INSNS (18)}, /* other */
609 COSTS_N_INSNS (2), /* cost of movsx */
610 COSTS_N_INSNS (2), /* cost of movzx */
611 8, /* "large" insn */
612 4, /* MOVE_RATIO */
613 3, /* cost for loading QImode using movzbl */
614 {4, 5, 4}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {2, 3, 2}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {6, 6, 6}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {4, 4, 4}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {2, 2}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {2, 2}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {2, 2, 8}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {2, 2, 8}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 6, /* MMX or SSE register to integer */
634 32, /* size of l1 cache. */
635 32, /* size of l2 cache. Some models
636 have integrated l2 cache, but
637 optimizing for k6 is not important
638 enough to worry about that. */
639 32, /* size of prefetch block */
640 1, /* number of parallel prefetches */
641 1, /* Branch cost */
642 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
643 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
644 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
645 COSTS_N_INSNS (2), /* cost of FABS instruction. */
646 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
647 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
648 k6_memcpy,
649 k6_memset,
650 1, /* scalar_stmt_cost. */
651 1, /* scalar load_cost. */
652 1, /* scalar_store_cost. */
653 1, /* vec_stmt_cost. */
654 1, /* vec_to_scalar_cost. */
655 1, /* scalar_to_vec_cost. */
656 1, /* vec_align_load_cost. */
657 2, /* vec_unalign_load_cost. */
658 1, /* vec_store_cost. */
659 3, /* cond_taken_branch_cost. */
660 1, /* cond_not_taken_branch_cost. */
661 };
662
663 /* For some reason, Athlon deals better with REP prefix (relative to loops)
664 compared to K8. Alignment becomes important after 8 bytes for memcpy and
665 128 bytes for memset. */
666 static stringop_algs athlon_memcpy[2] = {
667 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
668 DUMMY_STRINGOP_ALGS};
669 static stringop_algs athlon_memset[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static const
673 struct processor_costs athlon_cost = {
674 COSTS_N_INSNS (1), /* cost of an add instruction */
675 COSTS_N_INSNS (2), /* cost of a lea instruction */
676 COSTS_N_INSNS (1), /* variable shift costs */
677 COSTS_N_INSNS (1), /* constant shift costs */
678 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
679 COSTS_N_INSNS (5), /* HI */
680 COSTS_N_INSNS (5), /* SI */
681 COSTS_N_INSNS (5), /* DI */
682 COSTS_N_INSNS (5)}, /* other */
683 0, /* cost of multiply per each bit set */
684 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
685 COSTS_N_INSNS (26), /* HI */
686 COSTS_N_INSNS (42), /* SI */
687 COSTS_N_INSNS (74), /* DI */
688 COSTS_N_INSNS (74)}, /* other */
689 COSTS_N_INSNS (1), /* cost of movsx */
690 COSTS_N_INSNS (1), /* cost of movzx */
691 8, /* "large" insn */
692 9, /* MOVE_RATIO */
693 4, /* cost for loading QImode using movzbl */
694 {3, 4, 3}, /* cost of loading integer registers
695 in QImode, HImode and SImode.
696 Relative to reg-reg move (2). */
697 {3, 4, 3}, /* cost of storing integer registers */
698 4, /* cost of reg,reg fld/fst */
699 {4, 4, 12}, /* cost of loading fp registers
700 in SFmode, DFmode and XFmode */
701 {6, 6, 8}, /* cost of storing fp registers
702 in SFmode, DFmode and XFmode */
703 2, /* cost of moving MMX register */
704 {4, 4}, /* cost of loading MMX registers
705 in SImode and DImode */
706 {4, 4}, /* cost of storing MMX registers
707 in SImode and DImode */
708 2, /* cost of moving SSE register */
709 {4, 4, 6}, /* cost of loading SSE registers
710 in SImode, DImode and TImode */
711 {4, 4, 5}, /* cost of storing SSE registers
712 in SImode, DImode and TImode */
713 5, /* MMX or SSE register to integer */
714 64, /* size of l1 cache. */
715 256, /* size of l2 cache. */
716 64, /* size of prefetch block */
717 6, /* number of parallel prefetches */
718 5, /* Branch cost */
719 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
720 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
721 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
722 COSTS_N_INSNS (2), /* cost of FABS instruction. */
723 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
724 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
725 athlon_memcpy,
726 athlon_memset,
727 1, /* scalar_stmt_cost. */
728 1, /* scalar load_cost. */
729 1, /* scalar_store_cost. */
730 1, /* vec_stmt_cost. */
731 1, /* vec_to_scalar_cost. */
732 1, /* scalar_to_vec_cost. */
733 1, /* vec_align_load_cost. */
734 2, /* vec_unalign_load_cost. */
735 1, /* vec_store_cost. */
736 3, /* cond_taken_branch_cost. */
737 1, /* cond_not_taken_branch_cost. */
738 };
739
740 /* K8 has optimized REP instruction for medium sized blocks, but for very
741 small blocks it is better to use loop. For large blocks, libcall can
742 do nontemporary accesses and beat inline considerably. */
743 static stringop_algs k8_memcpy[2] = {
744 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
745 {-1, rep_prefix_4_byte, false}}},
746 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
747 {-1, libcall, false}}}};
748 static stringop_algs k8_memset[2] = {
749 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
750 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
751 {libcall, {{48, unrolled_loop, false},
752 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
753 static const
754 struct processor_costs k8_cost = {
755 COSTS_N_INSNS (1), /* cost of an add instruction */
756 COSTS_N_INSNS (2), /* cost of a lea instruction */
757 COSTS_N_INSNS (1), /* variable shift costs */
758 COSTS_N_INSNS (1), /* constant shift costs */
759 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
760 COSTS_N_INSNS (4), /* HI */
761 COSTS_N_INSNS (3), /* SI */
762 COSTS_N_INSNS (4), /* DI */
763 COSTS_N_INSNS (5)}, /* other */
764 0, /* cost of multiply per each bit set */
765 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
766 COSTS_N_INSNS (26), /* HI */
767 COSTS_N_INSNS (42), /* SI */
768 COSTS_N_INSNS (74), /* DI */
769 COSTS_N_INSNS (74)}, /* other */
770 COSTS_N_INSNS (1), /* cost of movsx */
771 COSTS_N_INSNS (1), /* cost of movzx */
772 8, /* "large" insn */
773 9, /* MOVE_RATIO */
774 4, /* cost for loading QImode using movzbl */
775 {3, 4, 3}, /* cost of loading integer registers
776 in QImode, HImode and SImode.
777 Relative to reg-reg move (2). */
778 {3, 4, 3}, /* cost of storing integer registers */
779 4, /* cost of reg,reg fld/fst */
780 {4, 4, 12}, /* cost of loading fp registers
781 in SFmode, DFmode and XFmode */
782 {6, 6, 8}, /* cost of storing fp registers
783 in SFmode, DFmode and XFmode */
784 2, /* cost of moving MMX register */
785 {3, 3}, /* cost of loading MMX registers
786 in SImode and DImode */
787 {4, 4}, /* cost of storing MMX registers
788 in SImode and DImode */
789 2, /* cost of moving SSE register */
790 {4, 3, 6}, /* cost of loading SSE registers
791 in SImode, DImode and TImode */
792 {4, 4, 5}, /* cost of storing SSE registers
793 in SImode, DImode and TImode */
794 5, /* MMX or SSE register to integer */
795 64, /* size of l1 cache. */
796 512, /* size of l2 cache. */
797 64, /* size of prefetch block */
798 /* New AMD processors never drop prefetches; if they cannot be performed
799 immediately, they are queued. We set number of simultaneous prefetches
800 to a large constant to reflect this (it probably is not a good idea not
801 to limit number of prefetches at all, as their execution also takes some
802 time). */
803 100, /* number of parallel prefetches */
804 3, /* Branch cost */
805 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
806 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
807 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
808 COSTS_N_INSNS (2), /* cost of FABS instruction. */
809 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
810 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
811
812 k8_memcpy,
813 k8_memset,
814 4, /* scalar_stmt_cost. */
815 2, /* scalar load_cost. */
816 2, /* scalar_store_cost. */
817 5, /* vec_stmt_cost. */
818 0, /* vec_to_scalar_cost. */
819 2, /* scalar_to_vec_cost. */
820 2, /* vec_align_load_cost. */
821 3, /* vec_unalign_load_cost. */
822 3, /* vec_store_cost. */
823 3, /* cond_taken_branch_cost. */
824 2, /* cond_not_taken_branch_cost. */
825 };
826
827 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
828 very small blocks it is better to use loop. For large blocks, libcall can
829 do nontemporary accesses and beat inline considerably. */
830 static stringop_algs amdfam10_memcpy[2] = {
831 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
832 {-1, rep_prefix_4_byte, false}}},
833 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
834 {-1, libcall, false}}}};
835 static stringop_algs amdfam10_memset[2] = {
836 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
837 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
838 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
839 {-1, libcall, false}}}};
840 struct processor_costs amdfam10_cost = {
841 COSTS_N_INSNS (1), /* cost of an add instruction */
842 COSTS_N_INSNS (2), /* cost of a lea instruction */
843 COSTS_N_INSNS (1), /* variable shift costs */
844 COSTS_N_INSNS (1), /* constant shift costs */
845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
846 COSTS_N_INSNS (4), /* HI */
847 COSTS_N_INSNS (3), /* SI */
848 COSTS_N_INSNS (4), /* DI */
849 COSTS_N_INSNS (5)}, /* other */
850 0, /* cost of multiply per each bit set */
851 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
852 COSTS_N_INSNS (35), /* HI */
853 COSTS_N_INSNS (51), /* SI */
854 COSTS_N_INSNS (83), /* DI */
855 COSTS_N_INSNS (83)}, /* other */
856 COSTS_N_INSNS (1), /* cost of movsx */
857 COSTS_N_INSNS (1), /* cost of movzx */
858 8, /* "large" insn */
859 9, /* MOVE_RATIO */
860 4, /* cost for loading QImode using movzbl */
861 {3, 4, 3}, /* cost of loading integer registers
862 in QImode, HImode and SImode.
863 Relative to reg-reg move (2). */
864 {3, 4, 3}, /* cost of storing integer registers */
865 4, /* cost of reg,reg fld/fst */
866 {4, 4, 12}, /* cost of loading fp registers
867 in SFmode, DFmode and XFmode */
868 {6, 6, 8}, /* cost of storing fp registers
869 in SFmode, DFmode and XFmode */
870 2, /* cost of moving MMX register */
871 {3, 3}, /* cost of loading MMX registers
872 in SImode and DImode */
873 {4, 4}, /* cost of storing MMX registers
874 in SImode and DImode */
875 2, /* cost of moving SSE register */
876 {4, 4, 3}, /* cost of loading SSE registers
877 in SImode, DImode and TImode */
878 {4, 4, 5}, /* cost of storing SSE registers
879 in SImode, DImode and TImode */
880 3, /* MMX or SSE register to integer */
881 /* On K8:
882 MOVD reg64, xmmreg Double FSTORE 4
883 MOVD reg32, xmmreg Double FSTORE 4
884 On AMDFAM10:
885 MOVD reg64, xmmreg Double FADD 3
886 1/1 1/1
887 MOVD reg32, xmmreg Double FADD 3
888 1/1 1/1 */
889 64, /* size of l1 cache. */
890 512, /* size of l2 cache. */
891 64, /* size of prefetch block */
892 /* New AMD processors never drop prefetches; if they cannot be performed
893 immediately, they are queued. We set number of simultaneous prefetches
894 to a large constant to reflect this (it probably is not a good idea not
895 to limit number of prefetches at all, as their execution also takes some
896 time). */
897 100, /* number of parallel prefetches */
898 2, /* Branch cost */
899 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
900 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
901 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
902 COSTS_N_INSNS (2), /* cost of FABS instruction. */
903 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
904 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
905
906 amdfam10_memcpy,
907 amdfam10_memset,
908 4, /* scalar_stmt_cost. */
909 2, /* scalar load_cost. */
910 2, /* scalar_store_cost. */
911 6, /* vec_stmt_cost. */
912 0, /* vec_to_scalar_cost. */
913 2, /* scalar_to_vec_cost. */
914 2, /* vec_align_load_cost. */
915 2, /* vec_unalign_load_cost. */
916 2, /* vec_store_cost. */
917 2, /* cond_taken_branch_cost. */
918 1, /* cond_not_taken_branch_cost. */
919 };
920
921 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
922 very small blocks it is better to use loop. For large blocks, libcall
923 can do nontemporary accesses and beat inline considerably. */
924 static stringop_algs bdver1_memcpy[2] = {
925 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
926 {-1, rep_prefix_4_byte, false}}},
927 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
928 {-1, libcall, false}}}};
929 static stringop_algs bdver1_memset[2] = {
930 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
931 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
932 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
933 {-1, libcall, false}}}};
934
935 const struct processor_costs bdver1_cost = {
936 COSTS_N_INSNS (1), /* cost of an add instruction */
937 COSTS_N_INSNS (1), /* cost of a lea instruction */
938 COSTS_N_INSNS (1), /* variable shift costs */
939 COSTS_N_INSNS (1), /* constant shift costs */
940 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
941 COSTS_N_INSNS (4), /* HI */
942 COSTS_N_INSNS (4), /* SI */
943 COSTS_N_INSNS (6), /* DI */
944 COSTS_N_INSNS (6)}, /* other */
945 0, /* cost of multiply per each bit set */
946 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
947 COSTS_N_INSNS (35), /* HI */
948 COSTS_N_INSNS (51), /* SI */
949 COSTS_N_INSNS (83), /* DI */
950 COSTS_N_INSNS (83)}, /* other */
951 COSTS_N_INSNS (1), /* cost of movsx */
952 COSTS_N_INSNS (1), /* cost of movzx */
953 8, /* "large" insn */
954 9, /* MOVE_RATIO */
955 4, /* cost for loading QImode using movzbl */
956 {5, 5, 4}, /* cost of loading integer registers
957 in QImode, HImode and SImode.
958 Relative to reg-reg move (2). */
959 {4, 4, 4}, /* cost of storing integer registers */
960 2, /* cost of reg,reg fld/fst */
961 {5, 5, 12}, /* cost of loading fp registers
962 in SFmode, DFmode and XFmode */
963 {4, 4, 8}, /* cost of storing fp registers
964 in SFmode, DFmode and XFmode */
965 2, /* cost of moving MMX register */
966 {4, 4}, /* cost of loading MMX registers
967 in SImode and DImode */
968 {4, 4}, /* cost of storing MMX registers
969 in SImode and DImode */
970 2, /* cost of moving SSE register */
971 {4, 4, 4}, /* cost of loading SSE registers
972 in SImode, DImode and TImode */
973 {4, 4, 4}, /* cost of storing SSE registers
974 in SImode, DImode and TImode */
975 2, /* MMX or SSE register to integer */
976 /* On K8:
977 MOVD reg64, xmmreg Double FSTORE 4
978 MOVD reg32, xmmreg Double FSTORE 4
979 On AMDFAM10:
980 MOVD reg64, xmmreg Double FADD 3
981 1/1 1/1
982 MOVD reg32, xmmreg Double FADD 3
983 1/1 1/1 */
984 16, /* size of l1 cache. */
985 2048, /* size of l2 cache. */
986 64, /* size of prefetch block */
987 /* New AMD processors never drop prefetches; if they cannot be performed
988 immediately, they are queued. We set number of simultaneous prefetches
989 to a large constant to reflect this (it probably is not a good idea not
990 to limit number of prefetches at all, as their execution also takes some
991 time). */
992 100, /* number of parallel prefetches */
993 2, /* Branch cost */
994 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1000
1001 bdver1_memcpy,
1002 bdver1_memset,
1003 6, /* scalar_stmt_cost. */
1004 4, /* scalar load_cost. */
1005 4, /* scalar_store_cost. */
1006 6, /* vec_stmt_cost. */
1007 0, /* vec_to_scalar_cost. */
1008 2, /* scalar_to_vec_cost. */
1009 4, /* vec_align_load_cost. */
1010 4, /* vec_unalign_load_cost. */
1011 4, /* vec_store_cost. */
1012 2, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1014 };
1015
1016 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1017 very small blocks it is better to use loop. For large blocks, libcall
1018 can do nontemporary accesses and beat inline considerably. */
1019
1020 static stringop_algs bdver2_memcpy[2] = {
1021 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1022 {-1, rep_prefix_4_byte, false}}},
1023 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1024 {-1, libcall, false}}}};
1025 static stringop_algs bdver2_memset[2] = {
1026 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1027 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1028 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1029 {-1, libcall, false}}}};
1030
1031 const struct processor_costs bdver2_cost = {
1032 COSTS_N_INSNS (1), /* cost of an add instruction */
1033 COSTS_N_INSNS (1), /* cost of a lea instruction */
1034 COSTS_N_INSNS (1), /* variable shift costs */
1035 COSTS_N_INSNS (1), /* constant shift costs */
1036 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1037 COSTS_N_INSNS (4), /* HI */
1038 COSTS_N_INSNS (4), /* SI */
1039 COSTS_N_INSNS (6), /* DI */
1040 COSTS_N_INSNS (6)}, /* other */
1041 0, /* cost of multiply per each bit set */
1042 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1043 COSTS_N_INSNS (35), /* HI */
1044 COSTS_N_INSNS (51), /* SI */
1045 COSTS_N_INSNS (83), /* DI */
1046 COSTS_N_INSNS (83)}, /* other */
1047 COSTS_N_INSNS (1), /* cost of movsx */
1048 COSTS_N_INSNS (1), /* cost of movzx */
1049 8, /* "large" insn */
1050 9, /* MOVE_RATIO */
1051 4, /* cost for loading QImode using movzbl */
1052 {5, 5, 4}, /* cost of loading integer registers
1053 in QImode, HImode and SImode.
1054 Relative to reg-reg move (2). */
1055 {4, 4, 4}, /* cost of storing integer registers */
1056 2, /* cost of reg,reg fld/fst */
1057 {5, 5, 12}, /* cost of loading fp registers
1058 in SFmode, DFmode and XFmode */
1059 {4, 4, 8}, /* cost of storing fp registers
1060 in SFmode, DFmode and XFmode */
1061 2, /* cost of moving MMX register */
1062 {4, 4}, /* cost of loading MMX registers
1063 in SImode and DImode */
1064 {4, 4}, /* cost of storing MMX registers
1065 in SImode and DImode */
1066 2, /* cost of moving SSE register */
1067 {4, 4, 4}, /* cost of loading SSE registers
1068 in SImode, DImode and TImode */
1069 {4, 4, 4}, /* cost of storing SSE registers
1070 in SImode, DImode and TImode */
1071 2, /* MMX or SSE register to integer */
1072 /* On K8:
1073 MOVD reg64, xmmreg Double FSTORE 4
1074 MOVD reg32, xmmreg Double FSTORE 4
1075 On AMDFAM10:
1076 MOVD reg64, xmmreg Double FADD 3
1077 1/1 1/1
1078 MOVD reg32, xmmreg Double FADD 3
1079 1/1 1/1 */
1080 16, /* size of l1 cache. */
1081 2048, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 /* New AMD processors never drop prefetches; if they cannot be performed
1084 immediately, they are queued. We set number of simultaneous prefetches
1085 to a large constant to reflect this (it probably is not a good idea not
1086 to limit number of prefetches at all, as their execution also takes some
1087 time). */
1088 100, /* number of parallel prefetches */
1089 2, /* Branch cost */
1090 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1091 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1092 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1093 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1094 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1095 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1096
1097 bdver2_memcpy,
1098 bdver2_memset,
1099 6, /* scalar_stmt_cost. */
1100 4, /* scalar load_cost. */
1101 4, /* scalar_store_cost. */
1102 6, /* vec_stmt_cost. */
1103 0, /* vec_to_scalar_cost. */
1104 2, /* scalar_to_vec_cost. */
1105 4, /* vec_align_load_cost. */
1106 4, /* vec_unalign_load_cost. */
1107 4, /* vec_store_cost. */
1108 2, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1110 };
1111
1112
1113 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1114 very small blocks it is better to use loop. For large blocks, libcall
1115 can do nontemporary accesses and beat inline considerably. */
1116 static stringop_algs bdver3_memcpy[2] = {
1117 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1118 {-1, rep_prefix_4_byte, false}}},
1119 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1120 {-1, libcall, false}}}};
1121 static stringop_algs bdver3_memset[2] = {
1122 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1123 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1124 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1125 {-1, libcall, false}}}};
1126 struct processor_costs bdver3_cost = {
1127 COSTS_N_INSNS (1), /* cost of an add instruction */
1128 COSTS_N_INSNS (1), /* cost of a lea instruction */
1129 COSTS_N_INSNS (1), /* variable shift costs */
1130 COSTS_N_INSNS (1), /* constant shift costs */
1131 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1132 COSTS_N_INSNS (4), /* HI */
1133 COSTS_N_INSNS (4), /* SI */
1134 COSTS_N_INSNS (6), /* DI */
1135 COSTS_N_INSNS (6)}, /* other */
1136 0, /* cost of multiply per each bit set */
1137 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1138 COSTS_N_INSNS (35), /* HI */
1139 COSTS_N_INSNS (51), /* SI */
1140 COSTS_N_INSNS (83), /* DI */
1141 COSTS_N_INSNS (83)}, /* other */
1142 COSTS_N_INSNS (1), /* cost of movsx */
1143 COSTS_N_INSNS (1), /* cost of movzx */
1144 8, /* "large" insn */
1145 9, /* MOVE_RATIO */
1146 4, /* cost for loading QImode using movzbl */
1147 {5, 5, 4}, /* cost of loading integer registers
1148 in QImode, HImode and SImode.
1149 Relative to reg-reg move (2). */
1150 {4, 4, 4}, /* cost of storing integer registers */
1151 2, /* cost of reg,reg fld/fst */
1152 {5, 5, 12}, /* cost of loading fp registers
1153 in SFmode, DFmode and XFmode */
1154 {4, 4, 8}, /* cost of storing fp registers
1155 in SFmode, DFmode and XFmode */
1156 2, /* cost of moving MMX register */
1157 {4, 4}, /* cost of loading MMX registers
1158 in SImode and DImode */
1159 {4, 4}, /* cost of storing MMX registers
1160 in SImode and DImode */
1161 2, /* cost of moving SSE register */
1162 {4, 4, 4}, /* cost of loading SSE registers
1163 in SImode, DImode and TImode */
1164 {4, 4, 4}, /* cost of storing SSE registers
1165 in SImode, DImode and TImode */
1166 2, /* MMX or SSE register to integer */
1167 16, /* size of l1 cache. */
1168 2048, /* size of l2 cache. */
1169 64, /* size of prefetch block */
1170 /* New AMD processors never drop prefetches; if they cannot be performed
1171 immediately, they are queued. We set number of simultaneous prefetches
1172 to a large constant to reflect this (it probably is not a good idea not
1173 to limit number of prefetches at all, as their execution also takes some
1174 time). */
1175 100, /* number of parallel prefetches */
1176 2, /* Branch cost */
1177 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1178 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1179 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1180 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1181 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1182 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1183
1184 bdver3_memcpy,
1185 bdver3_memset,
1186 6, /* scalar_stmt_cost. */
1187 4, /* scalar load_cost. */
1188 4, /* scalar_store_cost. */
1189 6, /* vec_stmt_cost. */
1190 0, /* vec_to_scalar_cost. */
1191 2, /* scalar_to_vec_cost. */
1192 4, /* vec_align_load_cost. */
1193 4, /* vec_unalign_load_cost. */
1194 4, /* vec_store_cost. */
1195 2, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1197 };
1198
1199 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1200 very small blocks it is better to use loop. For large blocks, libcall
1201 can do nontemporary accesses and beat inline considerably. */
1202 static stringop_algs bdver4_memcpy[2] = {
1203 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1204 {-1, rep_prefix_4_byte, false}}},
1205 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1206 {-1, libcall, false}}}};
1207 static stringop_algs bdver4_memset[2] = {
1208 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1209 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1210 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1211 {-1, libcall, false}}}};
1212 struct processor_costs bdver4_cost = {
1213 COSTS_N_INSNS (1), /* cost of an add instruction */
1214 COSTS_N_INSNS (1), /* cost of a lea instruction */
1215 COSTS_N_INSNS (1), /* variable shift costs */
1216 COSTS_N_INSNS (1), /* constant shift costs */
1217 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1218 COSTS_N_INSNS (4), /* HI */
1219 COSTS_N_INSNS (4), /* SI */
1220 COSTS_N_INSNS (6), /* DI */
1221 COSTS_N_INSNS (6)}, /* other */
1222 0, /* cost of multiply per each bit set */
1223 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1224 COSTS_N_INSNS (35), /* HI */
1225 COSTS_N_INSNS (51), /* SI */
1226 COSTS_N_INSNS (83), /* DI */
1227 COSTS_N_INSNS (83)}, /* other */
1228 COSTS_N_INSNS (1), /* cost of movsx */
1229 COSTS_N_INSNS (1), /* cost of movzx */
1230 8, /* "large" insn */
1231 9, /* MOVE_RATIO */
1232 4, /* cost for loading QImode using movzbl */
1233 {5, 5, 4}, /* cost of loading integer registers
1234 in QImode, HImode and SImode.
1235 Relative to reg-reg move (2). */
1236 {4, 4, 4}, /* cost of storing integer registers */
1237 2, /* cost of reg,reg fld/fst */
1238 {5, 5, 12}, /* cost of loading fp registers
1239 in SFmode, DFmode and XFmode */
1240 {4, 4, 8}, /* cost of storing fp registers
1241 in SFmode, DFmode and XFmode */
1242 2, /* cost of moving MMX register */
1243 {4, 4}, /* cost of loading MMX registers
1244 in SImode and DImode */
1245 {4, 4}, /* cost of storing MMX registers
1246 in SImode and DImode */
1247 2, /* cost of moving SSE register */
1248 {4, 4, 4}, /* cost of loading SSE registers
1249 in SImode, DImode and TImode */
1250 {4, 4, 4}, /* cost of storing SSE registers
1251 in SImode, DImode and TImode */
1252 2, /* MMX or SSE register to integer */
1253 16, /* size of l1 cache. */
1254 2048, /* size of l2 cache. */
1255 64, /* size of prefetch block */
1256 /* New AMD processors never drop prefetches; if they cannot be performed
1257 immediately, they are queued. We set number of simultaneous prefetches
1258 to a large constant to reflect this (it probably is not a good idea not
1259 to limit number of prefetches at all, as their execution also takes some
1260 time). */
1261 100, /* number of parallel prefetches */
1262 2, /* Branch cost */
1263 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1264 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1265 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1266 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1267 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1268 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1269
1270 bdver4_memcpy,
1271 bdver4_memset,
1272 6, /* scalar_stmt_cost. */
1273 4, /* scalar load_cost. */
1274 4, /* scalar_store_cost. */
1275 6, /* vec_stmt_cost. */
1276 0, /* vec_to_scalar_cost. */
1277 2, /* scalar_to_vec_cost. */
1278 4, /* vec_align_load_cost. */
1279 4, /* vec_unalign_load_cost. */
1280 4, /* vec_store_cost. */
1281 2, /* cond_taken_branch_cost. */
1282 1, /* cond_not_taken_branch_cost. */
1283 };
1284
1285 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1286 very small blocks it is better to use loop. For large blocks, libcall can
1287 do nontemporary accesses and beat inline considerably. */
1288 static stringop_algs btver1_memcpy[2] = {
1289 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1290 {-1, rep_prefix_4_byte, false}}},
1291 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1292 {-1, libcall, false}}}};
1293 static stringop_algs btver1_memset[2] = {
1294 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1295 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1296 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1297 {-1, libcall, false}}}};
1298 const struct processor_costs btver1_cost = {
1299 COSTS_N_INSNS (1), /* cost of an add instruction */
1300 COSTS_N_INSNS (2), /* cost of a lea instruction */
1301 COSTS_N_INSNS (1), /* variable shift costs */
1302 COSTS_N_INSNS (1), /* constant shift costs */
1303 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1304 COSTS_N_INSNS (4), /* HI */
1305 COSTS_N_INSNS (3), /* SI */
1306 COSTS_N_INSNS (4), /* DI */
1307 COSTS_N_INSNS (5)}, /* other */
1308 0, /* cost of multiply per each bit set */
1309 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1310 COSTS_N_INSNS (35), /* HI */
1311 COSTS_N_INSNS (51), /* SI */
1312 COSTS_N_INSNS (83), /* DI */
1313 COSTS_N_INSNS (83)}, /* other */
1314 COSTS_N_INSNS (1), /* cost of movsx */
1315 COSTS_N_INSNS (1), /* cost of movzx */
1316 8, /* "large" insn */
1317 9, /* MOVE_RATIO */
1318 4, /* cost for loading QImode using movzbl */
1319 {3, 4, 3}, /* cost of loading integer registers
1320 in QImode, HImode and SImode.
1321 Relative to reg-reg move (2). */
1322 {3, 4, 3}, /* cost of storing integer registers */
1323 4, /* cost of reg,reg fld/fst */
1324 {4, 4, 12}, /* cost of loading fp registers
1325 in SFmode, DFmode and XFmode */
1326 {6, 6, 8}, /* cost of storing fp registers
1327 in SFmode, DFmode and XFmode */
1328 2, /* cost of moving MMX register */
1329 {3, 3}, /* cost of loading MMX registers
1330 in SImode and DImode */
1331 {4, 4}, /* cost of storing MMX registers
1332 in SImode and DImode */
1333 2, /* cost of moving SSE register */
1334 {4, 4, 3}, /* cost of loading SSE registers
1335 in SImode, DImode and TImode */
1336 {4, 4, 5}, /* cost of storing SSE registers
1337 in SImode, DImode and TImode */
1338 3, /* MMX or SSE register to integer */
1339 /* On K8:
1340 MOVD reg64, xmmreg Double FSTORE 4
1341 MOVD reg32, xmmreg Double FSTORE 4
1342 On AMDFAM10:
1343 MOVD reg64, xmmreg Double FADD 3
1344 1/1 1/1
1345 MOVD reg32, xmmreg Double FADD 3
1346 1/1 1/1 */
1347 32, /* size of l1 cache. */
1348 512, /* size of l2 cache. */
1349 64, /* size of prefetch block */
1350 100, /* number of parallel prefetches */
1351 2, /* Branch cost */
1352 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1353 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1354 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1355 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1356 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1357 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1358
1359 btver1_memcpy,
1360 btver1_memset,
1361 4, /* scalar_stmt_cost. */
1362 2, /* scalar load_cost. */
1363 2, /* scalar_store_cost. */
1364 6, /* vec_stmt_cost. */
1365 0, /* vec_to_scalar_cost. */
1366 2, /* scalar_to_vec_cost. */
1367 2, /* vec_align_load_cost. */
1368 2, /* vec_unalign_load_cost. */
1369 2, /* vec_store_cost. */
1370 2, /* cond_taken_branch_cost. */
1371 1, /* cond_not_taken_branch_cost. */
1372 };
1373
1374 static stringop_algs btver2_memcpy[2] = {
1375 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1376 {-1, rep_prefix_4_byte, false}}},
1377 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1378 {-1, libcall, false}}}};
1379 static stringop_algs btver2_memset[2] = {
1380 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1381 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1382 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1383 {-1, libcall, false}}}};
1384 const struct processor_costs btver2_cost = {
1385 COSTS_N_INSNS (1), /* cost of an add instruction */
1386 COSTS_N_INSNS (2), /* cost of a lea instruction */
1387 COSTS_N_INSNS (1), /* variable shift costs */
1388 COSTS_N_INSNS (1), /* constant shift costs */
1389 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1390 COSTS_N_INSNS (4), /* HI */
1391 COSTS_N_INSNS (3), /* SI */
1392 COSTS_N_INSNS (4), /* DI */
1393 COSTS_N_INSNS (5)}, /* other */
1394 0, /* cost of multiply per each bit set */
1395 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1396 COSTS_N_INSNS (35), /* HI */
1397 COSTS_N_INSNS (51), /* SI */
1398 COSTS_N_INSNS (83), /* DI */
1399 COSTS_N_INSNS (83)}, /* other */
1400 COSTS_N_INSNS (1), /* cost of movsx */
1401 COSTS_N_INSNS (1), /* cost of movzx */
1402 8, /* "large" insn */
1403 9, /* MOVE_RATIO */
1404 4, /* cost for loading QImode using movzbl */
1405 {3, 4, 3}, /* cost of loading integer registers
1406 in QImode, HImode and SImode.
1407 Relative to reg-reg move (2). */
1408 {3, 4, 3}, /* cost of storing integer registers */
1409 4, /* cost of reg,reg fld/fst */
1410 {4, 4, 12}, /* cost of loading fp registers
1411 in SFmode, DFmode and XFmode */
1412 {6, 6, 8}, /* cost of storing fp registers
1413 in SFmode, DFmode and XFmode */
1414 2, /* cost of moving MMX register */
1415 {3, 3}, /* cost of loading MMX registers
1416 in SImode and DImode */
1417 {4, 4}, /* cost of storing MMX registers
1418 in SImode and DImode */
1419 2, /* cost of moving SSE register */
1420 {4, 4, 3}, /* cost of loading SSE registers
1421 in SImode, DImode and TImode */
1422 {4, 4, 5}, /* cost of storing SSE registers
1423 in SImode, DImode and TImode */
1424 3, /* MMX or SSE register to integer */
1425 /* On K8:
1426 MOVD reg64, xmmreg Double FSTORE 4
1427 MOVD reg32, xmmreg Double FSTORE 4
1428 On AMDFAM10:
1429 MOVD reg64, xmmreg Double FADD 3
1430 1/1 1/1
1431 MOVD reg32, xmmreg Double FADD 3
1432 1/1 1/1 */
1433 32, /* size of l1 cache. */
1434 2048, /* size of l2 cache. */
1435 64, /* size of prefetch block */
1436 100, /* number of parallel prefetches */
1437 2, /* Branch cost */
1438 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1439 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1440 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1441 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1442 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1443 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1444 btver2_memcpy,
1445 btver2_memset,
1446 4, /* scalar_stmt_cost. */
1447 2, /* scalar load_cost. */
1448 2, /* scalar_store_cost. */
1449 6, /* vec_stmt_cost. */
1450 0, /* vec_to_scalar_cost. */
1451 2, /* scalar_to_vec_cost. */
1452 2, /* vec_align_load_cost. */
1453 2, /* vec_unalign_load_cost. */
1454 2, /* vec_store_cost. */
1455 2, /* cond_taken_branch_cost. */
1456 1, /* cond_not_taken_branch_cost. */
1457 };
1458
1459 static stringop_algs pentium4_memcpy[2] = {
1460 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1461 DUMMY_STRINGOP_ALGS};
1462 static stringop_algs pentium4_memset[2] = {
1463 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1464 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1465 DUMMY_STRINGOP_ALGS};
1466
1467 static const
1468 struct processor_costs pentium4_cost = {
1469 COSTS_N_INSNS (1), /* cost of an add instruction */
1470 COSTS_N_INSNS (3), /* cost of a lea instruction */
1471 COSTS_N_INSNS (4), /* variable shift costs */
1472 COSTS_N_INSNS (4), /* constant shift costs */
1473 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1474 COSTS_N_INSNS (15), /* HI */
1475 COSTS_N_INSNS (15), /* SI */
1476 COSTS_N_INSNS (15), /* DI */
1477 COSTS_N_INSNS (15)}, /* other */
1478 0, /* cost of multiply per each bit set */
1479 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1480 COSTS_N_INSNS (56), /* HI */
1481 COSTS_N_INSNS (56), /* SI */
1482 COSTS_N_INSNS (56), /* DI */
1483 COSTS_N_INSNS (56)}, /* other */
1484 COSTS_N_INSNS (1), /* cost of movsx */
1485 COSTS_N_INSNS (1), /* cost of movzx */
1486 16, /* "large" insn */
1487 6, /* MOVE_RATIO */
1488 2, /* cost for loading QImode using movzbl */
1489 {4, 5, 4}, /* cost of loading integer registers
1490 in QImode, HImode and SImode.
1491 Relative to reg-reg move (2). */
1492 {2, 3, 2}, /* cost of storing integer registers */
1493 2, /* cost of reg,reg fld/fst */
1494 {2, 2, 6}, /* cost of loading fp registers
1495 in SFmode, DFmode and XFmode */
1496 {4, 4, 6}, /* cost of storing fp registers
1497 in SFmode, DFmode and XFmode */
1498 2, /* cost of moving MMX register */
1499 {2, 2}, /* cost of loading MMX registers
1500 in SImode and DImode */
1501 {2, 2}, /* cost of storing MMX registers
1502 in SImode and DImode */
1503 12, /* cost of moving SSE register */
1504 {12, 12, 12}, /* cost of loading SSE registers
1505 in SImode, DImode and TImode */
1506 {2, 2, 8}, /* cost of storing SSE registers
1507 in SImode, DImode and TImode */
1508 10, /* MMX or SSE register to integer */
1509 8, /* size of l1 cache. */
1510 256, /* size of l2 cache. */
1511 64, /* size of prefetch block */
1512 6, /* number of parallel prefetches */
1513 2, /* Branch cost */
1514 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1515 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1516 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1517 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1518 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1519 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1520 pentium4_memcpy,
1521 pentium4_memset,
1522 1, /* scalar_stmt_cost. */
1523 1, /* scalar load_cost. */
1524 1, /* scalar_store_cost. */
1525 1, /* vec_stmt_cost. */
1526 1, /* vec_to_scalar_cost. */
1527 1, /* scalar_to_vec_cost. */
1528 1, /* vec_align_load_cost. */
1529 2, /* vec_unalign_load_cost. */
1530 1, /* vec_store_cost. */
1531 3, /* cond_taken_branch_cost. */
1532 1, /* cond_not_taken_branch_cost. */
1533 };
1534
1535 static stringop_algs nocona_memcpy[2] = {
1536 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1537 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1538 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1539
1540 static stringop_algs nocona_memset[2] = {
1541 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1542 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1543 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1544 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1545
1546 static const
1547 struct processor_costs nocona_cost = {
1548 COSTS_N_INSNS (1), /* cost of an add instruction */
1549 COSTS_N_INSNS (1), /* cost of a lea instruction */
1550 COSTS_N_INSNS (1), /* variable shift costs */
1551 COSTS_N_INSNS (1), /* constant shift costs */
1552 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1553 COSTS_N_INSNS (10), /* HI */
1554 COSTS_N_INSNS (10), /* SI */
1555 COSTS_N_INSNS (10), /* DI */
1556 COSTS_N_INSNS (10)}, /* other */
1557 0, /* cost of multiply per each bit set */
1558 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1559 COSTS_N_INSNS (66), /* HI */
1560 COSTS_N_INSNS (66), /* SI */
1561 COSTS_N_INSNS (66), /* DI */
1562 COSTS_N_INSNS (66)}, /* other */
1563 COSTS_N_INSNS (1), /* cost of movsx */
1564 COSTS_N_INSNS (1), /* cost of movzx */
1565 16, /* "large" insn */
1566 17, /* MOVE_RATIO */
1567 4, /* cost for loading QImode using movzbl */
1568 {4, 4, 4}, /* cost of loading integer registers
1569 in QImode, HImode and SImode.
1570 Relative to reg-reg move (2). */
1571 {4, 4, 4}, /* cost of storing integer registers */
1572 3, /* cost of reg,reg fld/fst */
1573 {12, 12, 12}, /* cost of loading fp registers
1574 in SFmode, DFmode and XFmode */
1575 {4, 4, 4}, /* cost of storing fp registers
1576 in SFmode, DFmode and XFmode */
1577 6, /* cost of moving MMX register */
1578 {12, 12}, /* cost of loading MMX registers
1579 in SImode and DImode */
1580 {12, 12}, /* cost of storing MMX registers
1581 in SImode and DImode */
1582 6, /* cost of moving SSE register */
1583 {12, 12, 12}, /* cost of loading SSE registers
1584 in SImode, DImode and TImode */
1585 {12, 12, 12}, /* cost of storing SSE registers
1586 in SImode, DImode and TImode */
1587 8, /* MMX or SSE register to integer */
1588 8, /* size of l1 cache. */
1589 1024, /* size of l2 cache. */
1590 64, /* size of prefetch block */
1591 8, /* number of parallel prefetches */
1592 1, /* Branch cost */
1593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1599 nocona_memcpy,
1600 nocona_memset,
1601 1, /* scalar_stmt_cost. */
1602 1, /* scalar load_cost. */
1603 1, /* scalar_store_cost. */
1604 1, /* vec_stmt_cost. */
1605 1, /* vec_to_scalar_cost. */
1606 1, /* scalar_to_vec_cost. */
1607 1, /* vec_align_load_cost. */
1608 2, /* vec_unalign_load_cost. */
1609 1, /* vec_store_cost. */
1610 3, /* cond_taken_branch_cost. */
1611 1, /* cond_not_taken_branch_cost. */
1612 };
1613
1614 static stringop_algs atom_memcpy[2] = {
1615 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1616 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1617 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1618 static stringop_algs atom_memset[2] = {
1619 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1620 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1621 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1622 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1623 static const
1624 struct processor_costs atom_cost = {
1625 COSTS_N_INSNS (1), /* cost of an add instruction */
1626 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1627 COSTS_N_INSNS (1), /* variable shift costs */
1628 COSTS_N_INSNS (1), /* constant shift costs */
1629 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1630 COSTS_N_INSNS (4), /* HI */
1631 COSTS_N_INSNS (3), /* SI */
1632 COSTS_N_INSNS (4), /* DI */
1633 COSTS_N_INSNS (2)}, /* other */
1634 0, /* cost of multiply per each bit set */
1635 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1636 COSTS_N_INSNS (26), /* HI */
1637 COSTS_N_INSNS (42), /* SI */
1638 COSTS_N_INSNS (74), /* DI */
1639 COSTS_N_INSNS (74)}, /* other */
1640 COSTS_N_INSNS (1), /* cost of movsx */
1641 COSTS_N_INSNS (1), /* cost of movzx */
1642 8, /* "large" insn */
1643 17, /* MOVE_RATIO */
1644 4, /* cost for loading QImode using movzbl */
1645 {4, 4, 4}, /* cost of loading integer registers
1646 in QImode, HImode and SImode.
1647 Relative to reg-reg move (2). */
1648 {4, 4, 4}, /* cost of storing integer registers */
1649 4, /* cost of reg,reg fld/fst */
1650 {12, 12, 12}, /* cost of loading fp registers
1651 in SFmode, DFmode and XFmode */
1652 {6, 6, 8}, /* cost of storing fp registers
1653 in SFmode, DFmode and XFmode */
1654 2, /* cost of moving MMX register */
1655 {8, 8}, /* cost of loading MMX registers
1656 in SImode and DImode */
1657 {8, 8}, /* cost of storing MMX registers
1658 in SImode and DImode */
1659 2, /* cost of moving SSE register */
1660 {8, 8, 8}, /* cost of loading SSE registers
1661 in SImode, DImode and TImode */
1662 {8, 8, 8}, /* cost of storing SSE registers
1663 in SImode, DImode and TImode */
1664 5, /* MMX or SSE register to integer */
1665 32, /* size of l1 cache. */
1666 256, /* size of l2 cache. */
1667 64, /* size of prefetch block */
1668 6, /* number of parallel prefetches */
1669 3, /* Branch cost */
1670 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1671 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1672 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1673 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1674 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1675 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1676 atom_memcpy,
1677 atom_memset,
1678 1, /* scalar_stmt_cost. */
1679 1, /* scalar load_cost. */
1680 1, /* scalar_store_cost. */
1681 1, /* vec_stmt_cost. */
1682 1, /* vec_to_scalar_cost. */
1683 1, /* scalar_to_vec_cost. */
1684 1, /* vec_align_load_cost. */
1685 2, /* vec_unalign_load_cost. */
1686 1, /* vec_store_cost. */
1687 3, /* cond_taken_branch_cost. */
1688 1, /* cond_not_taken_branch_cost. */
1689 };
1690
1691 static stringop_algs slm_memcpy[2] = {
1692 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1693 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1694 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1695 static stringop_algs slm_memset[2] = {
1696 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1697 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1698 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1699 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1700 static const
1701 struct processor_costs slm_cost = {
1702 COSTS_N_INSNS (1), /* cost of an add instruction */
1703 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1704 COSTS_N_INSNS (1), /* variable shift costs */
1705 COSTS_N_INSNS (1), /* constant shift costs */
1706 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1707 COSTS_N_INSNS (3), /* HI */
1708 COSTS_N_INSNS (3), /* SI */
1709 COSTS_N_INSNS (4), /* DI */
1710 COSTS_N_INSNS (2)}, /* other */
1711 0, /* cost of multiply per each bit set */
1712 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1713 COSTS_N_INSNS (26), /* HI */
1714 COSTS_N_INSNS (42), /* SI */
1715 COSTS_N_INSNS (74), /* DI */
1716 COSTS_N_INSNS (74)}, /* other */
1717 COSTS_N_INSNS (1), /* cost of movsx */
1718 COSTS_N_INSNS (1), /* cost of movzx */
1719 8, /* "large" insn */
1720 17, /* MOVE_RATIO */
1721 4, /* cost for loading QImode using movzbl */
1722 {4, 4, 4}, /* cost of loading integer registers
1723 in QImode, HImode and SImode.
1724 Relative to reg-reg move (2). */
1725 {4, 4, 4}, /* cost of storing integer registers */
1726 4, /* cost of reg,reg fld/fst */
1727 {12, 12, 12}, /* cost of loading fp registers
1728 in SFmode, DFmode and XFmode */
1729 {6, 6, 8}, /* cost of storing fp registers
1730 in SFmode, DFmode and XFmode */
1731 2, /* cost of moving MMX register */
1732 {8, 8}, /* cost of loading MMX registers
1733 in SImode and DImode */
1734 {8, 8}, /* cost of storing MMX registers
1735 in SImode and DImode */
1736 2, /* cost of moving SSE register */
1737 {8, 8, 8}, /* cost of loading SSE registers
1738 in SImode, DImode and TImode */
1739 {8, 8, 8}, /* cost of storing SSE registers
1740 in SImode, DImode and TImode */
1741 5, /* MMX or SSE register to integer */
1742 32, /* size of l1 cache. */
1743 256, /* size of l2 cache. */
1744 64, /* size of prefetch block */
1745 6, /* number of parallel prefetches */
1746 3, /* Branch cost */
1747 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1748 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1749 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1750 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1751 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1752 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1753 slm_memcpy,
1754 slm_memset,
1755 1, /* scalar_stmt_cost. */
1756 1, /* scalar load_cost. */
1757 1, /* scalar_store_cost. */
1758 1, /* vec_stmt_cost. */
1759 4, /* vec_to_scalar_cost. */
1760 1, /* scalar_to_vec_cost. */
1761 1, /* vec_align_load_cost. */
1762 2, /* vec_unalign_load_cost. */
1763 1, /* vec_store_cost. */
1764 3, /* cond_taken_branch_cost. */
1765 1, /* cond_not_taken_branch_cost. */
1766 };
1767
1768 static stringop_algs intel_memcpy[2] = {
1769 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1770 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1771 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1772 static stringop_algs intel_memset[2] = {
1773 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1774 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1775 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1776 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1777 static const
1778 struct processor_costs intel_cost = {
1779 COSTS_N_INSNS (1), /* cost of an add instruction */
1780 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1781 COSTS_N_INSNS (1), /* variable shift costs */
1782 COSTS_N_INSNS (1), /* constant shift costs */
1783 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1784 COSTS_N_INSNS (3), /* HI */
1785 COSTS_N_INSNS (3), /* SI */
1786 COSTS_N_INSNS (4), /* DI */
1787 COSTS_N_INSNS (2)}, /* other */
1788 0, /* cost of multiply per each bit set */
1789 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1790 COSTS_N_INSNS (26), /* HI */
1791 COSTS_N_INSNS (42), /* SI */
1792 COSTS_N_INSNS (74), /* DI */
1793 COSTS_N_INSNS (74)}, /* other */
1794 COSTS_N_INSNS (1), /* cost of movsx */
1795 COSTS_N_INSNS (1), /* cost of movzx */
1796 8, /* "large" insn */
1797 17, /* MOVE_RATIO */
1798 4, /* cost for loading QImode using movzbl */
1799 {4, 4, 4}, /* cost of loading integer registers
1800 in QImode, HImode and SImode.
1801 Relative to reg-reg move (2). */
1802 {4, 4, 4}, /* cost of storing integer registers */
1803 4, /* cost of reg,reg fld/fst */
1804 {12, 12, 12}, /* cost of loading fp registers
1805 in SFmode, DFmode and XFmode */
1806 {6, 6, 8}, /* cost of storing fp registers
1807 in SFmode, DFmode and XFmode */
1808 2, /* cost of moving MMX register */
1809 {8, 8}, /* cost of loading MMX registers
1810 in SImode and DImode */
1811 {8, 8}, /* cost of storing MMX registers
1812 in SImode and DImode */
1813 2, /* cost of moving SSE register */
1814 {8, 8, 8}, /* cost of loading SSE registers
1815 in SImode, DImode and TImode */
1816 {8, 8, 8}, /* cost of storing SSE registers
1817 in SImode, DImode and TImode */
1818 5, /* MMX or SSE register to integer */
1819 32, /* size of l1 cache. */
1820 256, /* size of l2 cache. */
1821 64, /* size of prefetch block */
1822 6, /* number of parallel prefetches */
1823 3, /* Branch cost */
1824 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1825 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1826 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1827 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1828 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1829 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1830 intel_memcpy,
1831 intel_memset,
1832 1, /* scalar_stmt_cost. */
1833 1, /* scalar load_cost. */
1834 1, /* scalar_store_cost. */
1835 1, /* vec_stmt_cost. */
1836 4, /* vec_to_scalar_cost. */
1837 1, /* scalar_to_vec_cost. */
1838 1, /* vec_align_load_cost. */
1839 2, /* vec_unalign_load_cost. */
1840 1, /* vec_store_cost. */
1841 3, /* cond_taken_branch_cost. */
1842 1, /* cond_not_taken_branch_cost. */
1843 };
1844
1845 /* Generic should produce code tuned for Core-i7 (and newer chips)
1846 and btver1 (and newer chips). */
1847
1848 static stringop_algs generic_memcpy[2] = {
1849 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1850 {-1, libcall, false}}},
1851 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1852 {-1, libcall, false}}}};
1853 static stringop_algs generic_memset[2] = {
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1855 {-1, libcall, false}}},
1856 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1857 {-1, libcall, false}}}};
1858 static const
1859 struct processor_costs generic_cost = {
1860 COSTS_N_INSNS (1), /* cost of an add instruction */
1861 /* On all chips taken into consideration lea is 2 cycles and more. With
1862 this cost however our current implementation of synth_mult results in
1863 use of unnecessary temporary registers causing regression on several
1864 SPECfp benchmarks. */
1865 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1866 COSTS_N_INSNS (1), /* variable shift costs */
1867 COSTS_N_INSNS (1), /* constant shift costs */
1868 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1869 COSTS_N_INSNS (4), /* HI */
1870 COSTS_N_INSNS (3), /* SI */
1871 COSTS_N_INSNS (4), /* DI */
1872 COSTS_N_INSNS (2)}, /* other */
1873 0, /* cost of multiply per each bit set */
1874 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1875 COSTS_N_INSNS (26), /* HI */
1876 COSTS_N_INSNS (42), /* SI */
1877 COSTS_N_INSNS (74), /* DI */
1878 COSTS_N_INSNS (74)}, /* other */
1879 COSTS_N_INSNS (1), /* cost of movsx */
1880 COSTS_N_INSNS (1), /* cost of movzx */
1881 8, /* "large" insn */
1882 17, /* MOVE_RATIO */
1883 4, /* cost for loading QImode using movzbl */
1884 {4, 4, 4}, /* cost of loading integer registers
1885 in QImode, HImode and SImode.
1886 Relative to reg-reg move (2). */
1887 {4, 4, 4}, /* cost of storing integer registers */
1888 4, /* cost of reg,reg fld/fst */
1889 {12, 12, 12}, /* cost of loading fp registers
1890 in SFmode, DFmode and XFmode */
1891 {6, 6, 8}, /* cost of storing fp registers
1892 in SFmode, DFmode and XFmode */
1893 2, /* cost of moving MMX register */
1894 {8, 8}, /* cost of loading MMX registers
1895 in SImode and DImode */
1896 {8, 8}, /* cost of storing MMX registers
1897 in SImode and DImode */
1898 2, /* cost of moving SSE register */
1899 {8, 8, 8}, /* cost of loading SSE registers
1900 in SImode, DImode and TImode */
1901 {8, 8, 8}, /* cost of storing SSE registers
1902 in SImode, DImode and TImode */
1903 5, /* MMX or SSE register to integer */
1904 32, /* size of l1 cache. */
1905 512, /* size of l2 cache. */
1906 64, /* size of prefetch block */
1907 6, /* number of parallel prefetches */
1908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1909 value is increased to perhaps more appropriate value of 5. */
1910 3, /* Branch cost */
1911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1917 generic_memcpy,
1918 generic_memset,
1919 1, /* scalar_stmt_cost. */
1920 1, /* scalar load_cost. */
1921 1, /* scalar_store_cost. */
1922 1, /* vec_stmt_cost. */
1923 1, /* vec_to_scalar_cost. */
1924 1, /* scalar_to_vec_cost. */
1925 1, /* vec_align_load_cost. */
1926 2, /* vec_unalign_load_cost. */
1927 1, /* vec_store_cost. */
1928 3, /* cond_taken_branch_cost. */
1929 1, /* cond_not_taken_branch_cost. */
1930 };
1931
1932 /* core_cost should produce code tuned for Core familly of CPUs. */
1933 static stringop_algs core_memcpy[2] = {
1934 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1935 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1936 {-1, libcall, false}}}};
1937 static stringop_algs core_memset[2] = {
1938 {libcall, {{6, loop_1_byte, true},
1939 {24, loop, true},
1940 {8192, rep_prefix_4_byte, true},
1941 {-1, libcall, false}}},
1942 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1943 {-1, libcall, false}}}};
1944
1945 static const
1946 struct processor_costs core_cost = {
1947 COSTS_N_INSNS (1), /* cost of an add instruction */
1948 /* On all chips taken into consideration lea is 2 cycles and more. With
1949 this cost however our current implementation of synth_mult results in
1950 use of unnecessary temporary registers causing regression on several
1951 SPECfp benchmarks. */
1952 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1953 COSTS_N_INSNS (1), /* variable shift costs */
1954 COSTS_N_INSNS (1), /* constant shift costs */
1955 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1956 COSTS_N_INSNS (4), /* HI */
1957 COSTS_N_INSNS (3), /* SI */
1958 COSTS_N_INSNS (4), /* DI */
1959 COSTS_N_INSNS (2)}, /* other */
1960 0, /* cost of multiply per each bit set */
1961 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1962 COSTS_N_INSNS (26), /* HI */
1963 COSTS_N_INSNS (42), /* SI */
1964 COSTS_N_INSNS (74), /* DI */
1965 COSTS_N_INSNS (74)}, /* other */
1966 COSTS_N_INSNS (1), /* cost of movsx */
1967 COSTS_N_INSNS (1), /* cost of movzx */
1968 8, /* "large" insn */
1969 17, /* MOVE_RATIO */
1970 4, /* cost for loading QImode using movzbl */
1971 {4, 4, 4}, /* cost of loading integer registers
1972 in QImode, HImode and SImode.
1973 Relative to reg-reg move (2). */
1974 {4, 4, 4}, /* cost of storing integer registers */
1975 4, /* cost of reg,reg fld/fst */
1976 {12, 12, 12}, /* cost of loading fp registers
1977 in SFmode, DFmode and XFmode */
1978 {6, 6, 8}, /* cost of storing fp registers
1979 in SFmode, DFmode and XFmode */
1980 2, /* cost of moving MMX register */
1981 {8, 8}, /* cost of loading MMX registers
1982 in SImode and DImode */
1983 {8, 8}, /* cost of storing MMX registers
1984 in SImode and DImode */
1985 2, /* cost of moving SSE register */
1986 {8, 8, 8}, /* cost of loading SSE registers
1987 in SImode, DImode and TImode */
1988 {8, 8, 8}, /* cost of storing SSE registers
1989 in SImode, DImode and TImode */
1990 5, /* MMX or SSE register to integer */
1991 64, /* size of l1 cache. */
1992 512, /* size of l2 cache. */
1993 64, /* size of prefetch block */
1994 6, /* number of parallel prefetches */
1995 /* FIXME perhaps more appropriate value is 5. */
1996 3, /* Branch cost */
1997 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1998 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1999 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2000 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2001 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2002 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2003 core_memcpy,
2004 core_memset,
2005 1, /* scalar_stmt_cost. */
2006 1, /* scalar load_cost. */
2007 1, /* scalar_store_cost. */
2008 1, /* vec_stmt_cost. */
2009 1, /* vec_to_scalar_cost. */
2010 1, /* scalar_to_vec_cost. */
2011 1, /* vec_align_load_cost. */
2012 2, /* vec_unalign_load_cost. */
2013 1, /* vec_store_cost. */
2014 3, /* cond_taken_branch_cost. */
2015 1, /* cond_not_taken_branch_cost. */
2016 };
2017
2018
2019 /* Set by -mtune. */
2020 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2021
2022 /* Set by -mtune or -Os. */
2023 const struct processor_costs *ix86_cost = &pentium_cost;
2024
2025 /* Processor feature/optimization bitmasks. */
2026 #define m_386 (1<<PROCESSOR_I386)
2027 #define m_486 (1<<PROCESSOR_I486)
2028 #define m_PENT (1<<PROCESSOR_PENTIUM)
2029 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2030 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2031 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2032 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2033 #define m_CORE2 (1<<PROCESSOR_CORE2)
2034 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2035 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2036 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2037 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2038 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2039 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2040 #define m_INTEL (1<<PROCESSOR_INTEL)
2041
2042 #define m_GEODE (1<<PROCESSOR_GEODE)
2043 #define m_K6 (1<<PROCESSOR_K6)
2044 #define m_K6_GEODE (m_K6 | m_GEODE)
2045 #define m_K8 (1<<PROCESSOR_K8)
2046 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2047 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2048 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2049 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2050 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2051 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2052 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2053 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2054 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2055 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2056 #define m_BTVER (m_BTVER1 | m_BTVER2)
2057 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2058
2059 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2060
2061 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2062 #undef DEF_TUNE
2063 #define DEF_TUNE(tune, name, selector) name,
2064 #include "x86-tune.def"
2065 #undef DEF_TUNE
2066 };
2067
2068 /* Feature tests against the various tunings. */
2069 unsigned char ix86_tune_features[X86_TUNE_LAST];
2070
2071 /* Feature tests against the various tunings used to create ix86_tune_features
2072 based on the processor mask. */
2073 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2074 #undef DEF_TUNE
2075 #define DEF_TUNE(tune, name, selector) selector,
2076 #include "x86-tune.def"
2077 #undef DEF_TUNE
2078 };
2079
2080 /* Feature tests against the various architecture variations. */
2081 unsigned char ix86_arch_features[X86_ARCH_LAST];
2082
2083 /* Feature tests against the various architecture variations, used to create
2084 ix86_arch_features based on the processor mask. */
2085 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2086 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2087 ~(m_386 | m_486 | m_PENT | m_K6),
2088
2089 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2090 ~m_386,
2091
2092 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2093 ~(m_386 | m_486),
2094
2095 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2096 ~m_386,
2097
2098 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2099 ~m_386,
2100 };
2101
2102 /* In case the average insn count for single function invocation is
2103 lower than this constant, emit fast (but longer) prologue and
2104 epilogue code. */
2105 #define FAST_PROLOGUE_INSN_COUNT 20
2106
2107 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2108 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2109 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2110 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2111
2112 /* Array of the smallest class containing reg number REGNO, indexed by
2113 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2114
2115 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2116 {
2117 /* ax, dx, cx, bx */
2118 AREG, DREG, CREG, BREG,
2119 /* si, di, bp, sp */
2120 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2121 /* FP registers */
2122 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2123 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2124 /* arg pointer */
2125 NON_Q_REGS,
2126 /* flags, fpsr, fpcr, frame */
2127 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2128 /* SSE registers */
2129 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2130 SSE_REGS, SSE_REGS,
2131 /* MMX registers */
2132 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2133 MMX_REGS, MMX_REGS,
2134 /* REX registers */
2135 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2136 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2137 /* SSE REX registers */
2138 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2139 SSE_REGS, SSE_REGS,
2140 /* AVX-512 SSE registers */
2141 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2142 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2143 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 /* Mask registers. */
2146 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2147 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2148 };
2149
2150 /* The "default" register map used in 32bit mode. */
2151
2152 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2153 {
2154 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2155 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2156 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2157 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2158 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2159 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2160 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2161 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2162 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2163 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2164 };
2165
2166 /* The "default" register map used in 64bit mode. */
2167
2168 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2169 {
2170 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2171 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2172 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2173 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2174 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2175 8,9,10,11,12,13,14,15, /* extended integer registers */
2176 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2177 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2178 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2179 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2180 };
2181
2182 /* Define the register numbers to be used in Dwarf debugging information.
2183 The SVR4 reference port C compiler uses the following register numbers
2184 in its Dwarf output code:
2185 0 for %eax (gcc regno = 0)
2186 1 for %ecx (gcc regno = 2)
2187 2 for %edx (gcc regno = 1)
2188 3 for %ebx (gcc regno = 3)
2189 4 for %esp (gcc regno = 7)
2190 5 for %ebp (gcc regno = 6)
2191 6 for %esi (gcc regno = 4)
2192 7 for %edi (gcc regno = 5)
2193 The following three DWARF register numbers are never generated by
2194 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2195 believes these numbers have these meanings.
2196 8 for %eip (no gcc equivalent)
2197 9 for %eflags (gcc regno = 17)
2198 10 for %trapno (no gcc equivalent)
2199 It is not at all clear how we should number the FP stack registers
2200 for the x86 architecture. If the version of SDB on x86/svr4 were
2201 a bit less brain dead with respect to floating-point then we would
2202 have a precedent to follow with respect to DWARF register numbers
2203 for x86 FP registers, but the SDB on x86/svr4 is so completely
2204 broken with respect to FP registers that it is hardly worth thinking
2205 of it as something to strive for compatibility with.
2206 The version of x86/svr4 SDB I have at the moment does (partially)
2207 seem to believe that DWARF register number 11 is associated with
2208 the x86 register %st(0), but that's about all. Higher DWARF
2209 register numbers don't seem to be associated with anything in
2210 particular, and even for DWARF regno 11, SDB only seems to under-
2211 stand that it should say that a variable lives in %st(0) (when
2212 asked via an `=' command) if we said it was in DWARF regno 11,
2213 but SDB still prints garbage when asked for the value of the
2214 variable in question (via a `/' command).
2215 (Also note that the labels SDB prints for various FP stack regs
2216 when doing an `x' command are all wrong.)
2217 Note that these problems generally don't affect the native SVR4
2218 C compiler because it doesn't allow the use of -O with -g and
2219 because when it is *not* optimizing, it allocates a memory
2220 location for each floating-point variable, and the memory
2221 location is what gets described in the DWARF AT_location
2222 attribute for the variable in question.
2223 Regardless of the severe mental illness of the x86/svr4 SDB, we
2224 do something sensible here and we use the following DWARF
2225 register numbers. Note that these are all stack-top-relative
2226 numbers.
2227 11 for %st(0) (gcc regno = 8)
2228 12 for %st(1) (gcc regno = 9)
2229 13 for %st(2) (gcc regno = 10)
2230 14 for %st(3) (gcc regno = 11)
2231 15 for %st(4) (gcc regno = 12)
2232 16 for %st(5) (gcc regno = 13)
2233 17 for %st(6) (gcc regno = 14)
2234 18 for %st(7) (gcc regno = 15)
2235 */
2236 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2237 {
2238 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2239 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2240 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2241 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2242 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2243 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2244 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2245 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2246 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2247 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2248 };
2249
2250 /* Define parameter passing and return registers. */
2251
2252 static int const x86_64_int_parameter_registers[6] =
2253 {
2254 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2255 };
2256
2257 static int const x86_64_ms_abi_int_parameter_registers[4] =
2258 {
2259 CX_REG, DX_REG, R8_REG, R9_REG
2260 };
2261
2262 static int const x86_64_int_return_registers[4] =
2263 {
2264 AX_REG, DX_REG, DI_REG, SI_REG
2265 };
2266
2267 /* Additional registers that are clobbered by SYSV calls. */
2268
2269 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2270 {
2271 SI_REG, DI_REG,
2272 XMM6_REG, XMM7_REG,
2273 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2274 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2275 };
2276
2277 /* Define the structure for the machine field in struct function. */
2278
2279 struct GTY(()) stack_local_entry {
2280 unsigned short mode;
2281 unsigned short n;
2282 rtx rtl;
2283 struct stack_local_entry *next;
2284 };
2285
2286 /* Structure describing stack frame layout.
2287 Stack grows downward:
2288
2289 [arguments]
2290 <- ARG_POINTER
2291 saved pc
2292
2293 saved static chain if ix86_static_chain_on_stack
2294
2295 saved frame pointer if frame_pointer_needed
2296 <- HARD_FRAME_POINTER
2297 [saved regs]
2298 <- regs_save_offset
2299 [padding0]
2300
2301 [saved SSE regs]
2302 <- sse_regs_save_offset
2303 [padding1] |
2304 | <- FRAME_POINTER
2305 [va_arg registers] |
2306 |
2307 [frame] |
2308 |
2309 [padding2] | = to_allocate
2310 <- STACK_POINTER
2311 */
2312 struct ix86_frame
2313 {
2314 int nsseregs;
2315 int nregs;
2316 int va_arg_size;
2317 int red_zone_size;
2318 int outgoing_arguments_size;
2319
2320 /* The offsets relative to ARG_POINTER. */
2321 HOST_WIDE_INT frame_pointer_offset;
2322 HOST_WIDE_INT hard_frame_pointer_offset;
2323 HOST_WIDE_INT stack_pointer_offset;
2324 HOST_WIDE_INT hfp_save_offset;
2325 HOST_WIDE_INT reg_save_offset;
2326 HOST_WIDE_INT sse_reg_save_offset;
2327
2328 /* When save_regs_using_mov is set, emit prologue using
2329 move instead of push instructions. */
2330 bool save_regs_using_mov;
2331 };
2332
2333 /* Which cpu are we scheduling for. */
2334 enum attr_cpu ix86_schedule;
2335
2336 /* Which cpu are we optimizing for. */
2337 enum processor_type ix86_tune;
2338
2339 /* Which instruction set architecture to use. */
2340 enum processor_type ix86_arch;
2341
2342 /* True if processor has SSE prefetch instruction. */
2343 unsigned char x86_prefetch_sse;
2344
2345 /* -mstackrealign option */
2346 static const char ix86_force_align_arg_pointer_string[]
2347 = "force_align_arg_pointer";
2348
2349 static rtx (*ix86_gen_leave) (void);
2350 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2351 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2353 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2354 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2357 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2358 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2361
2362 /* Preferred alignment for stack boundary in bits. */
2363 unsigned int ix86_preferred_stack_boundary;
2364
2365 /* Alignment for incoming stack boundary in bits specified at
2366 command line. */
2367 static unsigned int ix86_user_incoming_stack_boundary;
2368
2369 /* Default alignment for incoming stack boundary in bits. */
2370 static unsigned int ix86_default_incoming_stack_boundary;
2371
2372 /* Alignment for incoming stack boundary in bits. */
2373 unsigned int ix86_incoming_stack_boundary;
2374
2375 /* Calling abi specific va_list type nodes. */
2376 static GTY(()) tree sysv_va_list_type_node;
2377 static GTY(()) tree ms_va_list_type_node;
2378
2379 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2380 char internal_label_prefix[16];
2381 int internal_label_prefix_len;
2382
2383 /* Fence to use after loop using movnt. */
2384 tree x86_mfence;
2385
2386 /* Register class used for passing given 64bit part of the argument.
2387 These represent classes as documented by the PS ABI, with the exception
2388 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2389 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2390
2391 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2392 whenever possible (upper half does contain padding). */
2393 enum x86_64_reg_class
2394 {
2395 X86_64_NO_CLASS,
2396 X86_64_INTEGER_CLASS,
2397 X86_64_INTEGERSI_CLASS,
2398 X86_64_SSE_CLASS,
2399 X86_64_SSESF_CLASS,
2400 X86_64_SSEDF_CLASS,
2401 X86_64_SSEUP_CLASS,
2402 X86_64_X87_CLASS,
2403 X86_64_X87UP_CLASS,
2404 X86_64_COMPLEX_X87_CLASS,
2405 X86_64_MEMORY_CLASS
2406 };
2407
2408 #define MAX_CLASSES 8
2409
2410 /* Table of constants used by fldpi, fldln2, etc.... */
2411 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2412 static bool ext_80387_constants_init = 0;
2413
2414 \f
2415 static struct machine_function * ix86_init_machine_status (void);
2416 static rtx ix86_function_value (const_tree, const_tree, bool);
2417 static bool ix86_function_value_regno_p (const unsigned int);
2418 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2419 const_tree);
2420 static rtx ix86_static_chain (const_tree, bool);
2421 static int ix86_function_regparm (const_tree, const_tree);
2422 static void ix86_compute_frame_layout (struct ix86_frame *);
2423 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2424 rtx, rtx, int);
2425 static void ix86_add_new_builtins (HOST_WIDE_INT);
2426 static tree ix86_canonical_va_list_type (tree);
2427 static void predict_jump (int);
2428 static unsigned int split_stack_prologue_scratch_regno (void);
2429 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2430
2431 enum ix86_function_specific_strings
2432 {
2433 IX86_FUNCTION_SPECIFIC_ARCH,
2434 IX86_FUNCTION_SPECIFIC_TUNE,
2435 IX86_FUNCTION_SPECIFIC_MAX
2436 };
2437
2438 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2439 const char *, enum fpmath_unit, bool);
2440 static void ix86_function_specific_save (struct cl_target_option *,
2441 struct gcc_options *opts);
2442 static void ix86_function_specific_restore (struct gcc_options *opts,
2443 struct cl_target_option *);
2444 static void ix86_function_specific_print (FILE *, int,
2445 struct cl_target_option *);
2446 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2447 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2448 struct gcc_options *,
2449 struct gcc_options *,
2450 struct gcc_options *);
2451 static bool ix86_can_inline_p (tree, tree);
2452 static void ix86_set_current_function (tree);
2453 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2454
2455 static enum calling_abi ix86_function_abi (const_tree);
2456
2457 \f
2458 #ifndef SUBTARGET32_DEFAULT_CPU
2459 #define SUBTARGET32_DEFAULT_CPU "i386"
2460 #endif
2461
2462 /* Whether -mtune= or -march= were specified */
2463 static int ix86_tune_defaulted;
2464 static int ix86_arch_specified;
2465
2466 /* Vectorization library interface and handlers. */
2467 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2468
2469 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2470 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2471
2472 /* Processor target table, indexed by processor number */
2473 struct ptt
2474 {
2475 const char *const name; /* processor name */
2476 const struct processor_costs *cost; /* Processor costs */
2477 const int align_loop; /* Default alignments. */
2478 const int align_loop_max_skip;
2479 const int align_jump;
2480 const int align_jump_max_skip;
2481 const int align_func;
2482 };
2483
2484 /* This table must be in sync with enum processor_type in i386.h. */
2485 static const struct ptt processor_target_table[PROCESSOR_max] =
2486 {
2487 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2488 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2489 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2490 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2491 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2492 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2493 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2494 {"core2", &core_cost, 16, 10, 16, 10, 16},
2495 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2496 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2497 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2498 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2499 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2500 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2501 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2502 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2503 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2504 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2505 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2506 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2507 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2508 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2509 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2510 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2511 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2512 };
2513 \f
2514 static unsigned int
2515 rest_of_handle_insert_vzeroupper (void)
2516 {
2517 int i;
2518
2519 /* vzeroupper instructions are inserted immediately after reload to
2520 account for possible spills from 256bit registers. The pass
2521 reuses mode switching infrastructure by re-running mode insertion
2522 pass, so disable entities that have already been processed. */
2523 for (i = 0; i < MAX_386_ENTITIES; i++)
2524 ix86_optimize_mode_switching[i] = 0;
2525
2526 ix86_optimize_mode_switching[AVX_U128] = 1;
2527
2528 /* Call optimize_mode_switching. */
2529 g->get_passes ()->execute_pass_mode_switching ();
2530 return 0;
2531 }
2532
2533 namespace {
2534
2535 const pass_data pass_data_insert_vzeroupper =
2536 {
2537 RTL_PASS, /* type */
2538 "vzeroupper", /* name */
2539 OPTGROUP_NONE, /* optinfo_flags */
2540 TV_NONE, /* tv_id */
2541 0, /* properties_required */
2542 0, /* properties_provided */
2543 0, /* properties_destroyed */
2544 0, /* todo_flags_start */
2545 TODO_df_finish, /* todo_flags_finish */
2546 };
2547
2548 class pass_insert_vzeroupper : public rtl_opt_pass
2549 {
2550 public:
2551 pass_insert_vzeroupper(gcc::context *ctxt)
2552 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2553 {}
2554
2555 /* opt_pass methods: */
2556 virtual bool gate (function *)
2557 {
2558 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2559 }
2560
2561 virtual unsigned int execute (function *)
2562 {
2563 return rest_of_handle_insert_vzeroupper ();
2564 }
2565
2566 }; // class pass_insert_vzeroupper
2567
2568 } // anon namespace
2569
2570 rtl_opt_pass *
2571 make_pass_insert_vzeroupper (gcc::context *ctxt)
2572 {
2573 return new pass_insert_vzeroupper (ctxt);
2574 }
2575
2576 /* Return true if a red-zone is in use. */
2577
2578 static inline bool
2579 ix86_using_red_zone (void)
2580 {
2581 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2582 }
2583 \f
2584 /* Return a string that documents the current -m options. The caller is
2585 responsible for freeing the string. */
2586
2587 static char *
2588 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2589 const char *tune, enum fpmath_unit fpmath,
2590 bool add_nl_p)
2591 {
2592 struct ix86_target_opts
2593 {
2594 const char *option; /* option string */
2595 HOST_WIDE_INT mask; /* isa mask options */
2596 };
2597
2598 /* This table is ordered so that options like -msse4.2 that imply
2599 preceding options while match those first. */
2600 static struct ix86_target_opts isa_opts[] =
2601 {
2602 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2603 { "-mfma", OPTION_MASK_ISA_FMA },
2604 { "-mxop", OPTION_MASK_ISA_XOP },
2605 { "-mlwp", OPTION_MASK_ISA_LWP },
2606 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2607 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2608 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2609 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2610 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2611 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2612 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2613 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2614 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2615 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2616 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2617 { "-msse3", OPTION_MASK_ISA_SSE3 },
2618 { "-msse2", OPTION_MASK_ISA_SSE2 },
2619 { "-msse", OPTION_MASK_ISA_SSE },
2620 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2621 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2622 { "-mmmx", OPTION_MASK_ISA_MMX },
2623 { "-mabm", OPTION_MASK_ISA_ABM },
2624 { "-mbmi", OPTION_MASK_ISA_BMI },
2625 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2626 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2627 { "-mhle", OPTION_MASK_ISA_HLE },
2628 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2629 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2630 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2631 { "-madx", OPTION_MASK_ISA_ADX },
2632 { "-mtbm", OPTION_MASK_ISA_TBM },
2633 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2634 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2635 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2636 { "-maes", OPTION_MASK_ISA_AES },
2637 { "-msha", OPTION_MASK_ISA_SHA },
2638 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2639 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2640 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2641 { "-mf16c", OPTION_MASK_ISA_F16C },
2642 { "-mrtm", OPTION_MASK_ISA_RTM },
2643 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2644 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2645 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2646 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2647 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2648 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2649 };
2650
2651 /* Flag options. */
2652 static struct ix86_target_opts flag_opts[] =
2653 {
2654 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2655 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2656 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2657 { "-m80387", MASK_80387 },
2658 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2659 { "-malign-double", MASK_ALIGN_DOUBLE },
2660 { "-mcld", MASK_CLD },
2661 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2662 { "-mieee-fp", MASK_IEEE_FP },
2663 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2664 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2665 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2666 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2667 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2668 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2669 { "-mno-red-zone", MASK_NO_RED_ZONE },
2670 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2671 { "-mrecip", MASK_RECIP },
2672 { "-mrtd", MASK_RTD },
2673 { "-msseregparm", MASK_SSEREGPARM },
2674 { "-mstack-arg-probe", MASK_STACK_PROBE },
2675 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2676 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2677 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2678 { "-mvzeroupper", MASK_VZEROUPPER },
2679 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2680 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2681 { "-mprefer-avx128", MASK_PREFER_AVX128},
2682 };
2683
2684 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2685
2686 char isa_other[40];
2687 char target_other[40];
2688 unsigned num = 0;
2689 unsigned i, j;
2690 char *ret;
2691 char *ptr;
2692 size_t len;
2693 size_t line_len;
2694 size_t sep_len;
2695 const char *abi;
2696
2697 memset (opts, '\0', sizeof (opts));
2698
2699 /* Add -march= option. */
2700 if (arch)
2701 {
2702 opts[num][0] = "-march=";
2703 opts[num++][1] = arch;
2704 }
2705
2706 /* Add -mtune= option. */
2707 if (tune)
2708 {
2709 opts[num][0] = "-mtune=";
2710 opts[num++][1] = tune;
2711 }
2712
2713 /* Add -m32/-m64/-mx32. */
2714 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2715 {
2716 if ((isa & OPTION_MASK_ABI_64) != 0)
2717 abi = "-m64";
2718 else
2719 abi = "-mx32";
2720 isa &= ~ (OPTION_MASK_ISA_64BIT
2721 | OPTION_MASK_ABI_64
2722 | OPTION_MASK_ABI_X32);
2723 }
2724 else
2725 abi = "-m32";
2726 opts[num++][0] = abi;
2727
2728 /* Pick out the options in isa options. */
2729 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2730 {
2731 if ((isa & isa_opts[i].mask) != 0)
2732 {
2733 opts[num++][0] = isa_opts[i].option;
2734 isa &= ~ isa_opts[i].mask;
2735 }
2736 }
2737
2738 if (isa && add_nl_p)
2739 {
2740 opts[num++][0] = isa_other;
2741 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2742 isa);
2743 }
2744
2745 /* Add flag options. */
2746 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2747 {
2748 if ((flags & flag_opts[i].mask) != 0)
2749 {
2750 opts[num++][0] = flag_opts[i].option;
2751 flags &= ~ flag_opts[i].mask;
2752 }
2753 }
2754
2755 if (flags && add_nl_p)
2756 {
2757 opts[num++][0] = target_other;
2758 sprintf (target_other, "(other flags: %#x)", flags);
2759 }
2760
2761 /* Add -fpmath= option. */
2762 if (fpmath)
2763 {
2764 opts[num][0] = "-mfpmath=";
2765 switch ((int) fpmath)
2766 {
2767 case FPMATH_387:
2768 opts[num++][1] = "387";
2769 break;
2770
2771 case FPMATH_SSE:
2772 opts[num++][1] = "sse";
2773 break;
2774
2775 case FPMATH_387 | FPMATH_SSE:
2776 opts[num++][1] = "sse+387";
2777 break;
2778
2779 default:
2780 gcc_unreachable ();
2781 }
2782 }
2783
2784 /* Any options? */
2785 if (num == 0)
2786 return NULL;
2787
2788 gcc_assert (num < ARRAY_SIZE (opts));
2789
2790 /* Size the string. */
2791 len = 0;
2792 sep_len = (add_nl_p) ? 3 : 1;
2793 for (i = 0; i < num; i++)
2794 {
2795 len += sep_len;
2796 for (j = 0; j < 2; j++)
2797 if (opts[i][j])
2798 len += strlen (opts[i][j]);
2799 }
2800
2801 /* Build the string. */
2802 ret = ptr = (char *) xmalloc (len);
2803 line_len = 0;
2804
2805 for (i = 0; i < num; i++)
2806 {
2807 size_t len2[2];
2808
2809 for (j = 0; j < 2; j++)
2810 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2811
2812 if (i != 0)
2813 {
2814 *ptr++ = ' ';
2815 line_len++;
2816
2817 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2818 {
2819 *ptr++ = '\\';
2820 *ptr++ = '\n';
2821 line_len = 0;
2822 }
2823 }
2824
2825 for (j = 0; j < 2; j++)
2826 if (opts[i][j])
2827 {
2828 memcpy (ptr, opts[i][j], len2[j]);
2829 ptr += len2[j];
2830 line_len += len2[j];
2831 }
2832 }
2833
2834 *ptr = '\0';
2835 gcc_assert (ret + len >= ptr);
2836
2837 return ret;
2838 }
2839
2840 /* Return true, if profiling code should be emitted before
2841 prologue. Otherwise it returns false.
2842 Note: For x86 with "hotfix" it is sorried. */
2843 static bool
2844 ix86_profile_before_prologue (void)
2845 {
2846 return flag_fentry != 0;
2847 }
2848
2849 /* Function that is callable from the debugger to print the current
2850 options. */
2851 void ATTRIBUTE_UNUSED
2852 ix86_debug_options (void)
2853 {
2854 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2855 ix86_arch_string, ix86_tune_string,
2856 ix86_fpmath, true);
2857
2858 if (opts)
2859 {
2860 fprintf (stderr, "%s\n\n", opts);
2861 free (opts);
2862 }
2863 else
2864 fputs ("<no options>\n\n", stderr);
2865
2866 return;
2867 }
2868
2869 static const char *stringop_alg_names[] = {
2870 #define DEF_ENUM
2871 #define DEF_ALG(alg, name) #name,
2872 #include "stringop.def"
2873 #undef DEF_ENUM
2874 #undef DEF_ALG
2875 };
2876
2877 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2878 The string is of the following form (or comma separated list of it):
2879
2880 strategy_alg:max_size:[align|noalign]
2881
2882 where the full size range for the strategy is either [0, max_size] or
2883 [min_size, max_size], in which min_size is the max_size + 1 of the
2884 preceding range. The last size range must have max_size == -1.
2885
2886 Examples:
2887
2888 1.
2889 -mmemcpy-strategy=libcall:-1:noalign
2890
2891 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2892
2893
2894 2.
2895 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2896
2897 This is to tell the compiler to use the following strategy for memset
2898 1) when the expected size is between [1, 16], use rep_8byte strategy;
2899 2) when the size is between [17, 2048], use vector_loop;
2900 3) when the size is > 2048, use libcall. */
2901
2902 struct stringop_size_range
2903 {
2904 int max;
2905 stringop_alg alg;
2906 bool noalign;
2907 };
2908
2909 static void
2910 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2911 {
2912 const struct stringop_algs *default_algs;
2913 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2914 char *curr_range_str, *next_range_str;
2915 int i = 0, n = 0;
2916
2917 if (is_memset)
2918 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2919 else
2920 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2921
2922 curr_range_str = strategy_str;
2923
2924 do
2925 {
2926 int maxs;
2927 char alg_name[128];
2928 char align[16];
2929 next_range_str = strchr (curr_range_str, ',');
2930 if (next_range_str)
2931 *next_range_str++ = '\0';
2932
2933 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2934 alg_name, &maxs, align))
2935 {
2936 error ("wrong arg %s to option %s", curr_range_str,
2937 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2938 return;
2939 }
2940
2941 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2942 {
2943 error ("size ranges of option %s should be increasing",
2944 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2945 return;
2946 }
2947
2948 for (i = 0; i < last_alg; i++)
2949 if (!strcmp (alg_name, stringop_alg_names[i]))
2950 break;
2951
2952 if (i == last_alg)
2953 {
2954 error ("wrong stringop strategy name %s specified for option %s",
2955 alg_name,
2956 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2957 return;
2958 }
2959
2960 input_ranges[n].max = maxs;
2961 input_ranges[n].alg = (stringop_alg) i;
2962 if (!strcmp (align, "align"))
2963 input_ranges[n].noalign = false;
2964 else if (!strcmp (align, "noalign"))
2965 input_ranges[n].noalign = true;
2966 else
2967 {
2968 error ("unknown alignment %s specified for option %s",
2969 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 return;
2971 }
2972 n++;
2973 curr_range_str = next_range_str;
2974 }
2975 while (curr_range_str);
2976
2977 if (input_ranges[n - 1].max != -1)
2978 {
2979 error ("the max value for the last size range should be -1"
2980 " for option %s",
2981 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2982 return;
2983 }
2984
2985 if (n > MAX_STRINGOP_ALGS)
2986 {
2987 error ("too many size ranges specified in option %s",
2988 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2989 return;
2990 }
2991
2992 /* Now override the default algs array. */
2993 for (i = 0; i < n; i++)
2994 {
2995 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2996 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2997 = input_ranges[i].alg;
2998 *const_cast<int *>(&default_algs->size[i].noalign)
2999 = input_ranges[i].noalign;
3000 }
3001 }
3002
3003 \f
3004 /* parse -mtune-ctrl= option. When DUMP is true,
3005 print the features that are explicitly set. */
3006
3007 static void
3008 parse_mtune_ctrl_str (bool dump)
3009 {
3010 if (!ix86_tune_ctrl_string)
3011 return;
3012
3013 char *next_feature_string = NULL;
3014 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3015 char *orig = curr_feature_string;
3016 int i;
3017 do
3018 {
3019 bool clear = false;
3020
3021 next_feature_string = strchr (curr_feature_string, ',');
3022 if (next_feature_string)
3023 *next_feature_string++ = '\0';
3024 if (*curr_feature_string == '^')
3025 {
3026 curr_feature_string++;
3027 clear = true;
3028 }
3029 for (i = 0; i < X86_TUNE_LAST; i++)
3030 {
3031 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3032 {
3033 ix86_tune_features[i] = !clear;
3034 if (dump)
3035 fprintf (stderr, "Explicitly %s feature %s\n",
3036 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3037 break;
3038 }
3039 }
3040 if (i == X86_TUNE_LAST)
3041 error ("Unknown parameter to option -mtune-ctrl: %s",
3042 clear ? curr_feature_string - 1 : curr_feature_string);
3043 curr_feature_string = next_feature_string;
3044 }
3045 while (curr_feature_string);
3046 free (orig);
3047 }
3048
3049 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3050 processor type. */
3051
3052 static void
3053 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3054 {
3055 unsigned int ix86_tune_mask = 1u << ix86_tune;
3056 int i;
3057
3058 for (i = 0; i < X86_TUNE_LAST; ++i)
3059 {
3060 if (ix86_tune_no_default)
3061 ix86_tune_features[i] = 0;
3062 else
3063 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3064 }
3065
3066 if (dump)
3067 {
3068 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3069 for (i = 0; i < X86_TUNE_LAST; i++)
3070 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3071 ix86_tune_features[i] ? "on" : "off");
3072 }
3073
3074 parse_mtune_ctrl_str (dump);
3075 }
3076
3077
3078 /* Override various settings based on options. If MAIN_ARGS_P, the
3079 options are from the command line, otherwise they are from
3080 attributes. */
3081
3082 static void
3083 ix86_option_override_internal (bool main_args_p,
3084 struct gcc_options *opts,
3085 struct gcc_options *opts_set)
3086 {
3087 int i;
3088 unsigned int ix86_arch_mask;
3089 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3090 const char *prefix;
3091 const char *suffix;
3092 const char *sw;
3093
3094 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3095 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3096 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3097 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3098 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3099 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3100 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3101 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3102 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3103 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3104 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3105 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3106 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3107 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3108 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3109 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3110 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3111 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3112 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3113 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3114 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3115 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3116 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3117 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3118 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3119 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3120 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3121 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3122 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3123 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3124 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3125 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3126 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3127 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3128 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3129 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3130 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3131 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3132 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3133 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3134 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3135 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3136 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3137 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3138 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3139 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3140 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3141 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3142 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3143 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3144 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3145 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3146
3147 #define PTA_CORE2 \
3148 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3149 | PTA_CX16 | PTA_FXSR)
3150 #define PTA_NEHALEM \
3151 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3152 #define PTA_WESTMERE \
3153 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3154 #define PTA_SANDYBRIDGE \
3155 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3156 #define PTA_IVYBRIDGE \
3157 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3158 #define PTA_HASWELL \
3159 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3160 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3161 #define PTA_BROADWELL \
3162 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3163 #define PTA_BONNELL \
3164 (PTA_CORE2 | PTA_MOVBE)
3165 #define PTA_SILVERMONT \
3166 (PTA_WESTMERE | PTA_MOVBE)
3167
3168 /* if this reaches 64, need to widen struct pta flags below */
3169
3170 static struct pta
3171 {
3172 const char *const name; /* processor name or nickname. */
3173 const enum processor_type processor;
3174 const enum attr_cpu schedule;
3175 const unsigned HOST_WIDE_INT flags;
3176 }
3177 const processor_alias_table[] =
3178 {
3179 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3180 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3181 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3182 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3183 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3184 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3185 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3186 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3187 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3188 PTA_MMX | PTA_SSE | PTA_FXSR},
3189 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3190 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3191 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3192 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3193 PTA_MMX | PTA_SSE | PTA_FXSR},
3194 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3195 PTA_MMX | PTA_SSE | PTA_FXSR},
3196 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3197 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3198 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3199 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3200 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3201 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3202 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3203 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3204 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3205 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3206 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3207 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3208 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3209 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3210 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3211 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3212 PTA_SANDYBRIDGE},
3213 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3214 PTA_SANDYBRIDGE},
3215 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3216 PTA_IVYBRIDGE},
3217 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3218 PTA_IVYBRIDGE},
3219 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3220 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3221 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3222 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3223 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3224 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3225 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3226 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3227 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3228 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3229 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3230 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3231 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3232 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3233 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3234 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3235 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3236 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3237 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3238 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3239 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3240 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3241 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3242 {"x86-64", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3244 {"k8", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"opteron", PROCESSOR_K8, CPU_K8,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3252 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3253 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3255 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3256 {"athlon64", PROCESSOR_K8, CPU_K8,
3257 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3258 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3259 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3260 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3261 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3262 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3263 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3264 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3265 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3266 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3267 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3268 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3269 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3270 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3271 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3275 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3276 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3277 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3278 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3279 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3280 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3281 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3282 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3285 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3286 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3287 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3288 | PTA_XSAVEOPT | PTA_FSGSBASE},
3289 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3290 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3291 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3292 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3293 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3294 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3295 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3296 | PTA_MOVBE},
3297 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3298 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3299 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3300 | PTA_FXSR | PTA_XSAVE},
3301 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3302 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3303 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3304 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3305 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3306 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3307
3308 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3309 PTA_64BIT
3310 | PTA_HLE /* flags are only used for -march switch. */ },
3311 };
3312
3313 /* -mrecip options. */
3314 static struct
3315 {
3316 const char *string; /* option name */
3317 unsigned int mask; /* mask bits to set */
3318 }
3319 const recip_options[] =
3320 {
3321 { "all", RECIP_MASK_ALL },
3322 { "none", RECIP_MASK_NONE },
3323 { "div", RECIP_MASK_DIV },
3324 { "sqrt", RECIP_MASK_SQRT },
3325 { "vec-div", RECIP_MASK_VEC_DIV },
3326 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3327 };
3328
3329 int const pta_size = ARRAY_SIZE (processor_alias_table);
3330
3331 /* Set up prefix/suffix so the error messages refer to either the command
3332 line argument, or the attribute(target). */
3333 if (main_args_p)
3334 {
3335 prefix = "-m";
3336 suffix = "";
3337 sw = "switch";
3338 }
3339 else
3340 {
3341 prefix = "option(\"";
3342 suffix = "\")";
3343 sw = "attribute";
3344 }
3345
3346 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3347 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3348 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3349 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3350 #ifdef TARGET_BI_ARCH
3351 else
3352 {
3353 #if TARGET_BI_ARCH == 1
3354 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3355 is on and OPTION_MASK_ABI_X32 is off. We turn off
3356 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3357 -mx32. */
3358 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3360 #else
3361 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3362 on and OPTION_MASK_ABI_64 is off. We turn off
3363 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3364 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3365 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3366 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3367 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3368 #endif
3369 }
3370 #endif
3371
3372 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3373 {
3374 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3375 OPTION_MASK_ABI_64 for TARGET_X32. */
3376 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3378 }
3379 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3380 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3381 | OPTION_MASK_ABI_X32
3382 | OPTION_MASK_ABI_64);
3383 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3384 {
3385 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3386 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3387 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3388 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3389 }
3390
3391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3392 SUBTARGET_OVERRIDE_OPTIONS;
3393 #endif
3394
3395 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3396 SUBSUBTARGET_OVERRIDE_OPTIONS;
3397 #endif
3398
3399 /* -fPIC is the default for x86_64. */
3400 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3401 opts->x_flag_pic = 2;
3402
3403 /* Need to check -mtune=generic first. */
3404 if (opts->x_ix86_tune_string)
3405 {
3406 /* As special support for cross compilers we read -mtune=native
3407 as -mtune=generic. With native compilers we won't see the
3408 -mtune=native, as it was changed by the driver. */
3409 if (!strcmp (opts->x_ix86_tune_string, "native"))
3410 {
3411 opts->x_ix86_tune_string = "generic";
3412 }
3413 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3414 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3415 "%stune=k8%s or %stune=generic%s instead as appropriate",
3416 prefix, suffix, prefix, suffix, prefix, suffix);
3417 }
3418 else
3419 {
3420 if (opts->x_ix86_arch_string)
3421 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3422 if (!opts->x_ix86_tune_string)
3423 {
3424 opts->x_ix86_tune_string
3425 = processor_target_table[TARGET_CPU_DEFAULT].name;
3426 ix86_tune_defaulted = 1;
3427 }
3428
3429 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3430 or defaulted. We need to use a sensible tune option. */
3431 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3432 {
3433 opts->x_ix86_tune_string = "generic";
3434 }
3435 }
3436
3437 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3438 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3439 {
3440 /* rep; movq isn't available in 32-bit code. */
3441 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3442 opts->x_ix86_stringop_alg = no_stringop;
3443 }
3444
3445 if (!opts->x_ix86_arch_string)
3446 opts->x_ix86_arch_string
3447 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3448 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3449 else
3450 ix86_arch_specified = 1;
3451
3452 if (opts_set->x_ix86_pmode)
3453 {
3454 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3455 && opts->x_ix86_pmode == PMODE_SI)
3456 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3457 && opts->x_ix86_pmode == PMODE_DI))
3458 error ("address mode %qs not supported in the %s bit mode",
3459 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3460 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3461 }
3462 else
3463 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3464 ? PMODE_DI : PMODE_SI;
3465
3466 if (!opts_set->x_ix86_abi)
3467 opts->x_ix86_abi = DEFAULT_ABI;
3468
3469 /* For targets using ms ABI enable ms-extensions, if not
3470 explicit turned off. For non-ms ABI we turn off this
3471 option. */
3472 if (!opts_set->x_flag_ms_extensions)
3473 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3474
3475 if (opts_set->x_ix86_cmodel)
3476 {
3477 switch (opts->x_ix86_cmodel)
3478 {
3479 case CM_SMALL:
3480 case CM_SMALL_PIC:
3481 if (opts->x_flag_pic)
3482 opts->x_ix86_cmodel = CM_SMALL_PIC;
3483 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3484 error ("code model %qs not supported in the %s bit mode",
3485 "small", "32");
3486 break;
3487
3488 case CM_MEDIUM:
3489 case CM_MEDIUM_PIC:
3490 if (opts->x_flag_pic)
3491 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3492 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3493 error ("code model %qs not supported in the %s bit mode",
3494 "medium", "32");
3495 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3496 error ("code model %qs not supported in x32 mode",
3497 "medium");
3498 break;
3499
3500 case CM_LARGE:
3501 case CM_LARGE_PIC:
3502 if (opts->x_flag_pic)
3503 opts->x_ix86_cmodel = CM_LARGE_PIC;
3504 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in the %s bit mode",
3506 "large", "32");
3507 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3508 error ("code model %qs not supported in x32 mode",
3509 "large");
3510 break;
3511
3512 case CM_32:
3513 if (opts->x_flag_pic)
3514 error ("code model %s does not support PIC mode", "32");
3515 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3516 error ("code model %qs not supported in the %s bit mode",
3517 "32", "64");
3518 break;
3519
3520 case CM_KERNEL:
3521 if (opts->x_flag_pic)
3522 {
3523 error ("code model %s does not support PIC mode", "kernel");
3524 opts->x_ix86_cmodel = CM_32;
3525 }
3526 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3527 error ("code model %qs not supported in the %s bit mode",
3528 "kernel", "32");
3529 break;
3530
3531 default:
3532 gcc_unreachable ();
3533 }
3534 }
3535 else
3536 {
3537 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3538 use of rip-relative addressing. This eliminates fixups that
3539 would otherwise be needed if this object is to be placed in a
3540 DLL, and is essentially just as efficient as direct addressing. */
3541 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3542 && (TARGET_RDOS || TARGET_PECOFF))
3543 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3544 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3545 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3546 else
3547 opts->x_ix86_cmodel = CM_32;
3548 }
3549 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3550 {
3551 error ("-masm=intel not supported in this configuration");
3552 opts->x_ix86_asm_dialect = ASM_ATT;
3553 }
3554 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3555 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3556 sorry ("%i-bit mode not compiled in",
3557 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3558
3559 for (i = 0; i < pta_size; i++)
3560 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3561 {
3562 ix86_schedule = processor_alias_table[i].schedule;
3563 ix86_arch = processor_alias_table[i].processor;
3564 /* Default cpu tuning to the architecture. */
3565 ix86_tune = ix86_arch;
3566
3567 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3568 && !(processor_alias_table[i].flags & PTA_64BIT))
3569 error ("CPU you selected does not support x86-64 "
3570 "instruction set");
3571
3572 if (processor_alias_table[i].flags & PTA_MMX
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3575 if (processor_alias_table[i].flags & PTA_3DNOW
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3578 if (processor_alias_table[i].flags & PTA_3DNOW_A
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3581 if (processor_alias_table[i].flags & PTA_SSE
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3584 if (processor_alias_table[i].flags & PTA_SSE2
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3587 if (processor_alias_table[i].flags & PTA_SSE3
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3590 if (processor_alias_table[i].flags & PTA_SSSE3
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3593 if (processor_alias_table[i].flags & PTA_SSE4_1
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3596 if (processor_alias_table[i].flags & PTA_SSE4_2
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3599 if (processor_alias_table[i].flags & PTA_AVX
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3602 if (processor_alias_table[i].flags & PTA_AVX2
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3605 if (processor_alias_table[i].flags & PTA_FMA
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3608 if (processor_alias_table[i].flags & PTA_SSE4A
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3611 if (processor_alias_table[i].flags & PTA_FMA4
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3614 if (processor_alias_table[i].flags & PTA_XOP
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3617 if (processor_alias_table[i].flags & PTA_LWP
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3620 if (processor_alias_table[i].flags & PTA_ABM
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3623 if (processor_alias_table[i].flags & PTA_BMI
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3626 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3629 if (processor_alias_table[i].flags & PTA_TBM
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3632 if (processor_alias_table[i].flags & PTA_BMI2
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3635 if (processor_alias_table[i].flags & PTA_CX16
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3638 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3641 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3642 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3645 if (processor_alias_table[i].flags & PTA_MOVBE
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3648 if (processor_alias_table[i].flags & PTA_AES
3649 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3650 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3651 if (processor_alias_table[i].flags & PTA_SHA
3652 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3653 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3654 if (processor_alias_table[i].flags & PTA_PCLMUL
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3657 if (processor_alias_table[i].flags & PTA_FSGSBASE
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3660 if (processor_alias_table[i].flags & PTA_RDRND
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3663 if (processor_alias_table[i].flags & PTA_F16C
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3666 if (processor_alias_table[i].flags & PTA_RTM
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3669 if (processor_alias_table[i].flags & PTA_HLE
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3672 if (processor_alias_table[i].flags & PTA_PRFCHW
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3675 if (processor_alias_table[i].flags & PTA_RDSEED
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3678 if (processor_alias_table[i].flags & PTA_ADX
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3681 if (processor_alias_table[i].flags & PTA_FXSR
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3684 if (processor_alias_table[i].flags & PTA_XSAVE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3687 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3690 if (processor_alias_table[i].flags & PTA_AVX512F
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3693 if (processor_alias_table[i].flags & PTA_AVX512ER
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3696 if (processor_alias_table[i].flags & PTA_AVX512PF
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3699 if (processor_alias_table[i].flags & PTA_AVX512CD
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3702 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3705 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3708 if (processor_alias_table[i].flags & PTA_XSAVEC
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3711 if (processor_alias_table[i].flags & PTA_XSAVES
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3714 if (processor_alias_table[i].flags & PTA_AVX512DQ
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3717 if (processor_alias_table[i].flags & PTA_AVX512BW
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3720 if (processor_alias_table[i].flags & PTA_AVX512VL
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3723 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3724 x86_prefetch_sse = true;
3725
3726 break;
3727 }
3728
3729 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3730 error ("generic CPU can be used only for %stune=%s %s",
3731 prefix, suffix, sw);
3732 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3733 error ("intel CPU can be used only for %stune=%s %s",
3734 prefix, suffix, sw);
3735 else if (i == pta_size)
3736 error ("bad value (%s) for %sarch=%s %s",
3737 opts->x_ix86_arch_string, prefix, suffix, sw);
3738
3739 ix86_arch_mask = 1u << ix86_arch;
3740 for (i = 0; i < X86_ARCH_LAST; ++i)
3741 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3742
3743 for (i = 0; i < pta_size; i++)
3744 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3745 {
3746 ix86_schedule = processor_alias_table[i].schedule;
3747 ix86_tune = processor_alias_table[i].processor;
3748 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3749 {
3750 if (!(processor_alias_table[i].flags & PTA_64BIT))
3751 {
3752 if (ix86_tune_defaulted)
3753 {
3754 opts->x_ix86_tune_string = "x86-64";
3755 for (i = 0; i < pta_size; i++)
3756 if (! strcmp (opts->x_ix86_tune_string,
3757 processor_alias_table[i].name))
3758 break;
3759 ix86_schedule = processor_alias_table[i].schedule;
3760 ix86_tune = processor_alias_table[i].processor;
3761 }
3762 else
3763 error ("CPU you selected does not support x86-64 "
3764 "instruction set");
3765 }
3766 }
3767 /* Intel CPUs have always interpreted SSE prefetch instructions as
3768 NOPs; so, we can enable SSE prefetch instructions even when
3769 -mtune (rather than -march) points us to a processor that has them.
3770 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3771 higher processors. */
3772 if (TARGET_CMOV
3773 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3774 x86_prefetch_sse = true;
3775 break;
3776 }
3777
3778 if (ix86_tune_specified && i == pta_size)
3779 error ("bad value (%s) for %stune=%s %s",
3780 opts->x_ix86_tune_string, prefix, suffix, sw);
3781
3782 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3783
3784 #ifndef USE_IX86_FRAME_POINTER
3785 #define USE_IX86_FRAME_POINTER 0
3786 #endif
3787
3788 #ifndef USE_X86_64_FRAME_POINTER
3789 #define USE_X86_64_FRAME_POINTER 0
3790 #endif
3791
3792 /* Set the default values for switches whose default depends on TARGET_64BIT
3793 in case they weren't overwritten by command line options. */
3794 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3795 {
3796 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3797 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3798 if (opts->x_flag_asynchronous_unwind_tables
3799 && !opts_set->x_flag_unwind_tables
3800 && TARGET_64BIT_MS_ABI)
3801 opts->x_flag_unwind_tables = 1;
3802 if (opts->x_flag_asynchronous_unwind_tables == 2)
3803 opts->x_flag_unwind_tables
3804 = opts->x_flag_asynchronous_unwind_tables = 1;
3805 if (opts->x_flag_pcc_struct_return == 2)
3806 opts->x_flag_pcc_struct_return = 0;
3807 }
3808 else
3809 {
3810 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3811 opts->x_flag_omit_frame_pointer
3812 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3813 if (opts->x_flag_asynchronous_unwind_tables == 2)
3814 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3815 if (opts->x_flag_pcc_struct_return == 2)
3816 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3817 }
3818
3819 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3820 if (opts->x_optimize_size)
3821 ix86_cost = &ix86_size_cost;
3822 else
3823 ix86_cost = ix86_tune_cost;
3824
3825 /* Arrange to set up i386_stack_locals for all functions. */
3826 init_machine_status = ix86_init_machine_status;
3827
3828 /* Validate -mregparm= value. */
3829 if (opts_set->x_ix86_regparm)
3830 {
3831 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3832 warning (0, "-mregparm is ignored in 64-bit mode");
3833 if (opts->x_ix86_regparm > REGPARM_MAX)
3834 {
3835 error ("-mregparm=%d is not between 0 and %d",
3836 opts->x_ix86_regparm, REGPARM_MAX);
3837 opts->x_ix86_regparm = 0;
3838 }
3839 }
3840 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3841 opts->x_ix86_regparm = REGPARM_MAX;
3842
3843 /* Default align_* from the processor table. */
3844 if (opts->x_align_loops == 0)
3845 {
3846 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3847 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3848 }
3849 if (opts->x_align_jumps == 0)
3850 {
3851 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3852 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3853 }
3854 if (opts->x_align_functions == 0)
3855 {
3856 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3857 }
3858
3859 /* Provide default for -mbranch-cost= value. */
3860 if (!opts_set->x_ix86_branch_cost)
3861 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3862
3863 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3864 {
3865 opts->x_target_flags
3866 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3867
3868 /* Enable by default the SSE and MMX builtins. Do allow the user to
3869 explicitly disable any of these. In particular, disabling SSE and
3870 MMX for kernel code is extremely useful. */
3871 if (!ix86_arch_specified)
3872 opts->x_ix86_isa_flags
3873 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3874 | TARGET_SUBTARGET64_ISA_DEFAULT)
3875 & ~opts->x_ix86_isa_flags_explicit);
3876
3877 if (TARGET_RTD_P (opts->x_target_flags))
3878 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3879 }
3880 else
3881 {
3882 opts->x_target_flags
3883 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3884
3885 if (!ix86_arch_specified)
3886 opts->x_ix86_isa_flags
3887 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3888
3889 /* i386 ABI does not specify red zone. It still makes sense to use it
3890 when programmer takes care to stack from being destroyed. */
3891 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3892 opts->x_target_flags |= MASK_NO_RED_ZONE;
3893 }
3894
3895 /* Keep nonleaf frame pointers. */
3896 if (opts->x_flag_omit_frame_pointer)
3897 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3898 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3899 opts->x_flag_omit_frame_pointer = 1;
3900
3901 /* If we're doing fast math, we don't care about comparison order
3902 wrt NaNs. This lets us use a shorter comparison sequence. */
3903 if (opts->x_flag_finite_math_only)
3904 opts->x_target_flags &= ~MASK_IEEE_FP;
3905
3906 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3907 since the insns won't need emulation. */
3908 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3909 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3910
3911 /* Likewise, if the target doesn't have a 387, or we've specified
3912 software floating point, don't use 387 inline intrinsics. */
3913 if (!TARGET_80387_P (opts->x_target_flags))
3914 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3915
3916 /* Turn on MMX builtins for -msse. */
3917 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3918 opts->x_ix86_isa_flags
3919 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3920
3921 /* Enable SSE prefetch. */
3922 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3923 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3924 x86_prefetch_sse = true;
3925
3926 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3927 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3928 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3929 opts->x_ix86_isa_flags
3930 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3931
3932 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3933 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3934 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3935 opts->x_ix86_isa_flags
3936 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3937
3938 /* Enable lzcnt instruction for -mabm. */
3939 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3940 opts->x_ix86_isa_flags
3941 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3942
3943 /* Validate -mpreferred-stack-boundary= value or default it to
3944 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3945 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3946 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3947 {
3948 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3949 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3950 int max = (TARGET_SEH ? 4 : 12);
3951
3952 if (opts->x_ix86_preferred_stack_boundary_arg < min
3953 || opts->x_ix86_preferred_stack_boundary_arg > max)
3954 {
3955 if (min == max)
3956 error ("-mpreferred-stack-boundary is not supported "
3957 "for this target");
3958 else
3959 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3960 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3961 }
3962 else
3963 ix86_preferred_stack_boundary
3964 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3965 }
3966
3967 /* Set the default value for -mstackrealign. */
3968 if (opts->x_ix86_force_align_arg_pointer == -1)
3969 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3970
3971 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3972
3973 /* Validate -mincoming-stack-boundary= value or default it to
3974 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3975 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3976 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3977 {
3978 if (opts->x_ix86_incoming_stack_boundary_arg
3979 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3980 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3981 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3982 opts->x_ix86_incoming_stack_boundary_arg,
3983 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3984 else
3985 {
3986 ix86_user_incoming_stack_boundary
3987 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3988 ix86_incoming_stack_boundary
3989 = ix86_user_incoming_stack_boundary;
3990 }
3991 }
3992
3993 #ifndef NO_PROFILE_COUNTERS
3994 if (flag_nop_mcount)
3995 error ("-mnop-mcount is not compatible with this target");
3996 #endif
3997 if (flag_nop_mcount && flag_pic)
3998 error ("-mnop-mcount is not implemented for -fPIC");
3999
4000 /* Accept -msseregparm only if at least SSE support is enabled. */
4001 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4002 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4003 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4004
4005 if (opts_set->x_ix86_fpmath)
4006 {
4007 if (opts->x_ix86_fpmath & FPMATH_SSE)
4008 {
4009 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4010 {
4011 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4012 opts->x_ix86_fpmath = FPMATH_387;
4013 }
4014 else if ((opts->x_ix86_fpmath & FPMATH_387)
4015 && !TARGET_80387_P (opts->x_target_flags))
4016 {
4017 warning (0, "387 instruction set disabled, using SSE arithmetics");
4018 opts->x_ix86_fpmath = FPMATH_SSE;
4019 }
4020 }
4021 }
4022 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4023 fpmath=387. The second is however default at many targets since the
4024 extra 80bit precision of temporaries is considered to be part of ABI.
4025 Overwrite the default at least for -ffast-math.
4026 TODO: -mfpmath=both seems to produce same performing code with bit
4027 smaller binaries. It is however not clear if register allocation is
4028 ready for this setting.
4029 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4030 codegen. We may switch to 387 with -ffast-math for size optimized
4031 functions. */
4032 else if (fast_math_flags_set_p (&global_options)
4033 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4034 opts->x_ix86_fpmath = FPMATH_SSE;
4035 else
4036 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4037
4038 /* If the i387 is disabled, then do not return values in it. */
4039 if (!TARGET_80387_P (opts->x_target_flags))
4040 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4041
4042 /* Use external vectorized library in vectorizing intrinsics. */
4043 if (opts_set->x_ix86_veclibabi_type)
4044 switch (opts->x_ix86_veclibabi_type)
4045 {
4046 case ix86_veclibabi_type_svml:
4047 ix86_veclib_handler = ix86_veclibabi_svml;
4048 break;
4049
4050 case ix86_veclibabi_type_acml:
4051 ix86_veclib_handler = ix86_veclibabi_acml;
4052 break;
4053
4054 default:
4055 gcc_unreachable ();
4056 }
4057
4058 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4059 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4060 && !opts->x_optimize_size)
4061 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4062
4063 /* If stack probes are required, the space used for large function
4064 arguments on the stack must also be probed, so enable
4065 -maccumulate-outgoing-args so this happens in the prologue. */
4066 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4067 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4068 {
4069 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4070 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4071 "for correctness", prefix, suffix);
4072 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4073 }
4074
4075 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4076 {
4077 char *p;
4078 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4079 p = strchr (internal_label_prefix, 'X');
4080 internal_label_prefix_len = p - internal_label_prefix;
4081 *p = '\0';
4082 }
4083
4084 /* When scheduling description is not available, disable scheduler pass
4085 so it won't slow down the compilation and make x87 code slower. */
4086 if (!TARGET_SCHEDULE)
4087 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4088
4089 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4090 ix86_tune_cost->simultaneous_prefetches,
4091 opts->x_param_values,
4092 opts_set->x_param_values);
4093 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4094 ix86_tune_cost->prefetch_block,
4095 opts->x_param_values,
4096 opts_set->x_param_values);
4097 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4098 ix86_tune_cost->l1_cache_size,
4099 opts->x_param_values,
4100 opts_set->x_param_values);
4101 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4102 ix86_tune_cost->l2_cache_size,
4103 opts->x_param_values,
4104 opts_set->x_param_values);
4105
4106 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4107 if (opts->x_flag_prefetch_loop_arrays < 0
4108 && HAVE_prefetch
4109 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4110 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4111 opts->x_flag_prefetch_loop_arrays = 1;
4112
4113 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4114 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4115 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4116 targetm.expand_builtin_va_start = NULL;
4117
4118 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4119 {
4120 ix86_gen_leave = gen_leave_rex64;
4121 if (Pmode == DImode)
4122 {
4123 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4124 ix86_gen_tls_local_dynamic_base_64
4125 = gen_tls_local_dynamic_base_64_di;
4126 }
4127 else
4128 {
4129 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4130 ix86_gen_tls_local_dynamic_base_64
4131 = gen_tls_local_dynamic_base_64_si;
4132 }
4133 }
4134 else
4135 ix86_gen_leave = gen_leave;
4136
4137 if (Pmode == DImode)
4138 {
4139 ix86_gen_add3 = gen_adddi3;
4140 ix86_gen_sub3 = gen_subdi3;
4141 ix86_gen_sub3_carry = gen_subdi3_carry;
4142 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4143 ix86_gen_andsp = gen_anddi3;
4144 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4145 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4146 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4147 ix86_gen_monitor = gen_sse3_monitor_di;
4148 }
4149 else
4150 {
4151 ix86_gen_add3 = gen_addsi3;
4152 ix86_gen_sub3 = gen_subsi3;
4153 ix86_gen_sub3_carry = gen_subsi3_carry;
4154 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4155 ix86_gen_andsp = gen_andsi3;
4156 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4157 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4158 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4159 ix86_gen_monitor = gen_sse3_monitor_si;
4160 }
4161
4162 #ifdef USE_IX86_CLD
4163 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4164 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4165 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4166 #endif
4167
4168 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4169 {
4170 if (opts->x_flag_fentry > 0)
4171 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4172 "with -fpic");
4173 opts->x_flag_fentry = 0;
4174 }
4175 else if (TARGET_SEH)
4176 {
4177 if (opts->x_flag_fentry == 0)
4178 sorry ("-mno-fentry isn%'t compatible with SEH");
4179 opts->x_flag_fentry = 1;
4180 }
4181 else if (opts->x_flag_fentry < 0)
4182 {
4183 #if defined(PROFILE_BEFORE_PROLOGUE)
4184 opts->x_flag_fentry = 1;
4185 #else
4186 opts->x_flag_fentry = 0;
4187 #endif
4188 }
4189
4190 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4191 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4192 AVX unaligned load/store. */
4193 if (!opts->x_optimize_size)
4194 {
4195 if (flag_expensive_optimizations
4196 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4197 opts->x_target_flags |= MASK_VZEROUPPER;
4198 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4199 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4200 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4201 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4202 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4203 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4204 /* Enable 128-bit AVX instruction generation
4205 for the auto-vectorizer. */
4206 if (TARGET_AVX128_OPTIMAL
4207 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4208 opts->x_target_flags |= MASK_PREFER_AVX128;
4209 }
4210
4211 if (opts->x_ix86_recip_name)
4212 {
4213 char *p = ASTRDUP (opts->x_ix86_recip_name);
4214 char *q;
4215 unsigned int mask, i;
4216 bool invert;
4217
4218 while ((q = strtok (p, ",")) != NULL)
4219 {
4220 p = NULL;
4221 if (*q == '!')
4222 {
4223 invert = true;
4224 q++;
4225 }
4226 else
4227 invert = false;
4228
4229 if (!strcmp (q, "default"))
4230 mask = RECIP_MASK_ALL;
4231 else
4232 {
4233 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4234 if (!strcmp (q, recip_options[i].string))
4235 {
4236 mask = recip_options[i].mask;
4237 break;
4238 }
4239
4240 if (i == ARRAY_SIZE (recip_options))
4241 {
4242 error ("unknown option for -mrecip=%s", q);
4243 invert = false;
4244 mask = RECIP_MASK_NONE;
4245 }
4246 }
4247
4248 opts->x_recip_mask_explicit |= mask;
4249 if (invert)
4250 opts->x_recip_mask &= ~mask;
4251 else
4252 opts->x_recip_mask |= mask;
4253 }
4254 }
4255
4256 if (TARGET_RECIP_P (opts->x_target_flags))
4257 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4258 else if (opts_set->x_target_flags & MASK_RECIP)
4259 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4260
4261 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4262 for 64-bit Bionic. */
4263 if (TARGET_HAS_BIONIC
4264 && !(opts_set->x_target_flags
4265 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4266 opts->x_target_flags |= (TARGET_64BIT
4267 ? MASK_LONG_DOUBLE_128
4268 : MASK_LONG_DOUBLE_64);
4269
4270 /* Only one of them can be active. */
4271 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4272 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4273
4274 /* Save the initial options in case the user does function specific
4275 options. */
4276 if (main_args_p)
4277 target_option_default_node = target_option_current_node
4278 = build_target_option_node (opts);
4279
4280 /* Handle stack protector */
4281 if (!opts_set->x_ix86_stack_protector_guard)
4282 opts->x_ix86_stack_protector_guard
4283 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4284
4285 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4286 if (opts->x_ix86_tune_memcpy_strategy)
4287 {
4288 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4289 ix86_parse_stringop_strategy_string (str, false);
4290 free (str);
4291 }
4292
4293 if (opts->x_ix86_tune_memset_strategy)
4294 {
4295 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4296 ix86_parse_stringop_strategy_string (str, true);
4297 free (str);
4298 }
4299 }
4300
4301 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4302
4303 static void
4304 ix86_option_override (void)
4305 {
4306 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4307 static struct register_pass_info insert_vzeroupper_info
4308 = { pass_insert_vzeroupper, "reload",
4309 1, PASS_POS_INSERT_AFTER
4310 };
4311
4312 ix86_option_override_internal (true, &global_options, &global_options_set);
4313
4314
4315 /* This needs to be done at start up. It's convenient to do it here. */
4316 register_pass (&insert_vzeroupper_info);
4317 }
4318
4319 /* Update register usage after having seen the compiler flags. */
4320
4321 static void
4322 ix86_conditional_register_usage (void)
4323 {
4324 int i, c_mask;
4325 unsigned int j;
4326
4327 /* The PIC register, if it exists, is fixed. */
4328 j = PIC_OFFSET_TABLE_REGNUM;
4329 if (j != INVALID_REGNUM)
4330 fixed_regs[j] = call_used_regs[j] = 1;
4331
4332 /* For 32-bit targets, squash the REX registers. */
4333 if (! TARGET_64BIT)
4334 {
4335 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4338 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4339 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4340 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4341 }
4342
4343 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4344 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4345 : TARGET_64BIT ? (1 << 2)
4346 : (1 << 1));
4347
4348 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4349
4350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4351 {
4352 /* Set/reset conditionally defined registers from
4353 CALL_USED_REGISTERS initializer. */
4354 if (call_used_regs[i] > 1)
4355 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4356
4357 /* Calculate registers of CLOBBERED_REGS register set
4358 as call used registers from GENERAL_REGS register set. */
4359 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4360 && call_used_regs[i])
4361 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4362 }
4363
4364 /* If MMX is disabled, squash the registers. */
4365 if (! TARGET_MMX)
4366 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4367 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4368 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4369
4370 /* If SSE is disabled, squash the registers. */
4371 if (! TARGET_SSE)
4372 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4373 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4374 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4375
4376 /* If the FPU is disabled, squash the registers. */
4377 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4378 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4379 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4380 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4381
4382 /* If AVX512F is disabled, squash the registers. */
4383 if (! TARGET_AVX512F)
4384 {
4385 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4387
4388 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4389 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4390 }
4391 }
4392
4393 \f
4394 /* Save the current options */
4395
4396 static void
4397 ix86_function_specific_save (struct cl_target_option *ptr,
4398 struct gcc_options *opts)
4399 {
4400 ptr->arch = ix86_arch;
4401 ptr->schedule = ix86_schedule;
4402 ptr->tune = ix86_tune;
4403 ptr->branch_cost = ix86_branch_cost;
4404 ptr->tune_defaulted = ix86_tune_defaulted;
4405 ptr->arch_specified = ix86_arch_specified;
4406 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4407 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4408 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4409 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4410 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4411 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4412 ptr->x_ix86_abi = opts->x_ix86_abi;
4413 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4414 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4415 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4416 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4417 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4418 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4419 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4420 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4421 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4422 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4423 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4424 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4425 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4426 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4427 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4428 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4429 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4430 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4431 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4432 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4433
4434 /* The fields are char but the variables are not; make sure the
4435 values fit in the fields. */
4436 gcc_assert (ptr->arch == ix86_arch);
4437 gcc_assert (ptr->schedule == ix86_schedule);
4438 gcc_assert (ptr->tune == ix86_tune);
4439 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4440 }
4441
4442 /* Restore the current options */
4443
4444 static void
4445 ix86_function_specific_restore (struct gcc_options *opts,
4446 struct cl_target_option *ptr)
4447 {
4448 enum processor_type old_tune = ix86_tune;
4449 enum processor_type old_arch = ix86_arch;
4450 unsigned int ix86_arch_mask;
4451 int i;
4452
4453 /* We don't change -fPIC. */
4454 opts->x_flag_pic = flag_pic;
4455
4456 ix86_arch = (enum processor_type) ptr->arch;
4457 ix86_schedule = (enum attr_cpu) ptr->schedule;
4458 ix86_tune = (enum processor_type) ptr->tune;
4459 opts->x_ix86_branch_cost = ptr->branch_cost;
4460 ix86_tune_defaulted = ptr->tune_defaulted;
4461 ix86_arch_specified = ptr->arch_specified;
4462 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4463 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4464 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4465 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4466 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4467 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4468 opts->x_ix86_abi = ptr->x_ix86_abi;
4469 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4470 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4471 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4472 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4473 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4474 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4475 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4476 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4477 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4478 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4479 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4480 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4481 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4482 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4483 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4484 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4485 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4486 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4487 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4488 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4489
4490 /* Recreate the arch feature tests if the arch changed */
4491 if (old_arch != ix86_arch)
4492 {
4493 ix86_arch_mask = 1u << ix86_arch;
4494 for (i = 0; i < X86_ARCH_LAST; ++i)
4495 ix86_arch_features[i]
4496 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4497 }
4498
4499 /* Recreate the tune optimization tests */
4500 if (old_tune != ix86_tune)
4501 set_ix86_tune_features (ix86_tune, false);
4502 }
4503
4504 /* Print the current options */
4505
4506 static void
4507 ix86_function_specific_print (FILE *file, int indent,
4508 struct cl_target_option *ptr)
4509 {
4510 char *target_string
4511 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4512 NULL, NULL, ptr->x_ix86_fpmath, false);
4513
4514 gcc_assert (ptr->arch < PROCESSOR_max);
4515 fprintf (file, "%*sarch = %d (%s)\n",
4516 indent, "",
4517 ptr->arch, processor_target_table[ptr->arch].name);
4518
4519 gcc_assert (ptr->tune < PROCESSOR_max);
4520 fprintf (file, "%*stune = %d (%s)\n",
4521 indent, "",
4522 ptr->tune, processor_target_table[ptr->tune].name);
4523
4524 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4525
4526 if (target_string)
4527 {
4528 fprintf (file, "%*s%s\n", indent, "", target_string);
4529 free (target_string);
4530 }
4531 }
4532
4533 \f
4534 /* Inner function to process the attribute((target(...))), take an argument and
4535 set the current options from the argument. If we have a list, recursively go
4536 over the list. */
4537
4538 static bool
4539 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4540 struct gcc_options *opts,
4541 struct gcc_options *opts_set,
4542 struct gcc_options *enum_opts_set)
4543 {
4544 char *next_optstr;
4545 bool ret = true;
4546
4547 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4548 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4549 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4550 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4551 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4552
4553 enum ix86_opt_type
4554 {
4555 ix86_opt_unknown,
4556 ix86_opt_yes,
4557 ix86_opt_no,
4558 ix86_opt_str,
4559 ix86_opt_enum,
4560 ix86_opt_isa
4561 };
4562
4563 static const struct
4564 {
4565 const char *string;
4566 size_t len;
4567 enum ix86_opt_type type;
4568 int opt;
4569 int mask;
4570 } attrs[] = {
4571 /* isa options */
4572 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4573 IX86_ATTR_ISA ("abm", OPT_mabm),
4574 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4575 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4576 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4577 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4578 IX86_ATTR_ISA ("aes", OPT_maes),
4579 IX86_ATTR_ISA ("sha", OPT_msha),
4580 IX86_ATTR_ISA ("avx", OPT_mavx),
4581 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4582 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4583 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4584 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4585 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4586 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4587 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4588 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4589 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4590 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4591 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4592 IX86_ATTR_ISA ("sse", OPT_msse),
4593 IX86_ATTR_ISA ("sse2", OPT_msse2),
4594 IX86_ATTR_ISA ("sse3", OPT_msse3),
4595 IX86_ATTR_ISA ("sse4", OPT_msse4),
4596 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4597 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4598 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4599 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4600 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4601 IX86_ATTR_ISA ("fma", OPT_mfma),
4602 IX86_ATTR_ISA ("xop", OPT_mxop),
4603 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4604 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4605 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4606 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4607 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4608 IX86_ATTR_ISA ("hle", OPT_mhle),
4609 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4610 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4611 IX86_ATTR_ISA ("adx", OPT_madx),
4612 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4613 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4614 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4615 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4616 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4617 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4618 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4619
4620 /* enum options */
4621 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4622
4623 /* string options */
4624 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4625 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4626
4627 /* flag options */
4628 IX86_ATTR_YES ("cld",
4629 OPT_mcld,
4630 MASK_CLD),
4631
4632 IX86_ATTR_NO ("fancy-math-387",
4633 OPT_mfancy_math_387,
4634 MASK_NO_FANCY_MATH_387),
4635
4636 IX86_ATTR_YES ("ieee-fp",
4637 OPT_mieee_fp,
4638 MASK_IEEE_FP),
4639
4640 IX86_ATTR_YES ("inline-all-stringops",
4641 OPT_minline_all_stringops,
4642 MASK_INLINE_ALL_STRINGOPS),
4643
4644 IX86_ATTR_YES ("inline-stringops-dynamically",
4645 OPT_minline_stringops_dynamically,
4646 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4647
4648 IX86_ATTR_NO ("align-stringops",
4649 OPT_mno_align_stringops,
4650 MASK_NO_ALIGN_STRINGOPS),
4651
4652 IX86_ATTR_YES ("recip",
4653 OPT_mrecip,
4654 MASK_RECIP),
4655
4656 };
4657
4658 /* If this is a list, recurse to get the options. */
4659 if (TREE_CODE (args) == TREE_LIST)
4660 {
4661 bool ret = true;
4662
4663 for (; args; args = TREE_CHAIN (args))
4664 if (TREE_VALUE (args)
4665 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4666 p_strings, opts, opts_set,
4667 enum_opts_set))
4668 ret = false;
4669
4670 return ret;
4671 }
4672
4673 else if (TREE_CODE (args) != STRING_CST)
4674 {
4675 error ("attribute %<target%> argument not a string");
4676 return false;
4677 }
4678
4679 /* Handle multiple arguments separated by commas. */
4680 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4681
4682 while (next_optstr && *next_optstr != '\0')
4683 {
4684 char *p = next_optstr;
4685 char *orig_p = p;
4686 char *comma = strchr (next_optstr, ',');
4687 const char *opt_string;
4688 size_t len, opt_len;
4689 int opt;
4690 bool opt_set_p;
4691 char ch;
4692 unsigned i;
4693 enum ix86_opt_type type = ix86_opt_unknown;
4694 int mask = 0;
4695
4696 if (comma)
4697 {
4698 *comma = '\0';
4699 len = comma - next_optstr;
4700 next_optstr = comma + 1;
4701 }
4702 else
4703 {
4704 len = strlen (p);
4705 next_optstr = NULL;
4706 }
4707
4708 /* Recognize no-xxx. */
4709 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4710 {
4711 opt_set_p = false;
4712 p += 3;
4713 len -= 3;
4714 }
4715 else
4716 opt_set_p = true;
4717
4718 /* Find the option. */
4719 ch = *p;
4720 opt = N_OPTS;
4721 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4722 {
4723 type = attrs[i].type;
4724 opt_len = attrs[i].len;
4725 if (ch == attrs[i].string[0]
4726 && ((type != ix86_opt_str && type != ix86_opt_enum)
4727 ? len == opt_len
4728 : len > opt_len)
4729 && memcmp (p, attrs[i].string, opt_len) == 0)
4730 {
4731 opt = attrs[i].opt;
4732 mask = attrs[i].mask;
4733 opt_string = attrs[i].string;
4734 break;
4735 }
4736 }
4737
4738 /* Process the option. */
4739 if (opt == N_OPTS)
4740 {
4741 error ("attribute(target(\"%s\")) is unknown", orig_p);
4742 ret = false;
4743 }
4744
4745 else if (type == ix86_opt_isa)
4746 {
4747 struct cl_decoded_option decoded;
4748
4749 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4750 ix86_handle_option (opts, opts_set,
4751 &decoded, input_location);
4752 }
4753
4754 else if (type == ix86_opt_yes || type == ix86_opt_no)
4755 {
4756 if (type == ix86_opt_no)
4757 opt_set_p = !opt_set_p;
4758
4759 if (opt_set_p)
4760 opts->x_target_flags |= mask;
4761 else
4762 opts->x_target_flags &= ~mask;
4763 }
4764
4765 else if (type == ix86_opt_str)
4766 {
4767 if (p_strings[opt])
4768 {
4769 error ("option(\"%s\") was already specified", opt_string);
4770 ret = false;
4771 }
4772 else
4773 p_strings[opt] = xstrdup (p + opt_len);
4774 }
4775
4776 else if (type == ix86_opt_enum)
4777 {
4778 bool arg_ok;
4779 int value;
4780
4781 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4782 if (arg_ok)
4783 set_option (opts, enum_opts_set, opt, value,
4784 p + opt_len, DK_UNSPECIFIED, input_location,
4785 global_dc);
4786 else
4787 {
4788 error ("attribute(target(\"%s\")) is unknown", orig_p);
4789 ret = false;
4790 }
4791 }
4792
4793 else
4794 gcc_unreachable ();
4795 }
4796
4797 return ret;
4798 }
4799
4800 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4801
4802 tree
4803 ix86_valid_target_attribute_tree (tree args,
4804 struct gcc_options *opts,
4805 struct gcc_options *opts_set)
4806 {
4807 const char *orig_arch_string = opts->x_ix86_arch_string;
4808 const char *orig_tune_string = opts->x_ix86_tune_string;
4809 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4810 int orig_tune_defaulted = ix86_tune_defaulted;
4811 int orig_arch_specified = ix86_arch_specified;
4812 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4813 tree t = NULL_TREE;
4814 int i;
4815 struct cl_target_option *def
4816 = TREE_TARGET_OPTION (target_option_default_node);
4817 struct gcc_options enum_opts_set;
4818
4819 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4820
4821 /* Process each of the options on the chain. */
4822 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4823 opts_set, &enum_opts_set))
4824 return error_mark_node;
4825
4826 /* If the changed options are different from the default, rerun
4827 ix86_option_override_internal, and then save the options away.
4828 The string options are are attribute options, and will be undone
4829 when we copy the save structure. */
4830 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4831 || opts->x_target_flags != def->x_target_flags
4832 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4833 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4834 || enum_opts_set.x_ix86_fpmath)
4835 {
4836 /* If we are using the default tune= or arch=, undo the string assigned,
4837 and use the default. */
4838 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4839 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4840 else if (!orig_arch_specified)
4841 opts->x_ix86_arch_string = NULL;
4842
4843 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4844 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4845 else if (orig_tune_defaulted)
4846 opts->x_ix86_tune_string = NULL;
4847
4848 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4849 if (enum_opts_set.x_ix86_fpmath)
4850 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4851 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4852 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4853 {
4854 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4855 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4856 }
4857
4858 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4859 ix86_option_override_internal (false, opts, opts_set);
4860
4861 /* Add any builtin functions with the new isa if any. */
4862 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4863
4864 /* Save the current options unless we are validating options for
4865 #pragma. */
4866 t = build_target_option_node (opts);
4867
4868 opts->x_ix86_arch_string = orig_arch_string;
4869 opts->x_ix86_tune_string = orig_tune_string;
4870 opts_set->x_ix86_fpmath = orig_fpmath_set;
4871
4872 /* Free up memory allocated to hold the strings */
4873 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4874 free (option_strings[i]);
4875 }
4876
4877 return t;
4878 }
4879
4880 /* Hook to validate attribute((target("string"))). */
4881
4882 static bool
4883 ix86_valid_target_attribute_p (tree fndecl,
4884 tree ARG_UNUSED (name),
4885 tree args,
4886 int ARG_UNUSED (flags))
4887 {
4888 struct gcc_options func_options;
4889 tree new_target, new_optimize;
4890 bool ret = true;
4891
4892 /* attribute((target("default"))) does nothing, beyond
4893 affecting multi-versioning. */
4894 if (TREE_VALUE (args)
4895 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4896 && TREE_CHAIN (args) == NULL_TREE
4897 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4898 return true;
4899
4900 tree old_optimize = build_optimization_node (&global_options);
4901
4902 /* Get the optimization options of the current function. */
4903 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4904
4905 if (!func_optimize)
4906 func_optimize = old_optimize;
4907
4908 /* Init func_options. */
4909 memset (&func_options, 0, sizeof (func_options));
4910 init_options_struct (&func_options, NULL);
4911 lang_hooks.init_options_struct (&func_options);
4912
4913 cl_optimization_restore (&func_options,
4914 TREE_OPTIMIZATION (func_optimize));
4915
4916 /* Initialize func_options to the default before its target options can
4917 be set. */
4918 cl_target_option_restore (&func_options,
4919 TREE_TARGET_OPTION (target_option_default_node));
4920
4921 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4922 &global_options_set);
4923
4924 new_optimize = build_optimization_node (&func_options);
4925
4926 if (new_target == error_mark_node)
4927 ret = false;
4928
4929 else if (fndecl && new_target)
4930 {
4931 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4932
4933 if (old_optimize != new_optimize)
4934 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4935 }
4936
4937 return ret;
4938 }
4939
4940 \f
4941 /* Hook to determine if one function can safely inline another. */
4942
4943 static bool
4944 ix86_can_inline_p (tree caller, tree callee)
4945 {
4946 bool ret = false;
4947 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4948 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4949
4950 /* If callee has no option attributes, then it is ok to inline. */
4951 if (!callee_tree)
4952 ret = true;
4953
4954 /* If caller has no option attributes, but callee does then it is not ok to
4955 inline. */
4956 else if (!caller_tree)
4957 ret = false;
4958
4959 else
4960 {
4961 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4962 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4963
4964 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4965 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4966 function. */
4967 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4968 != callee_opts->x_ix86_isa_flags)
4969 ret = false;
4970
4971 /* See if we have the same non-isa options. */
4972 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4973 ret = false;
4974
4975 /* See if arch, tune, etc. are the same. */
4976 else if (caller_opts->arch != callee_opts->arch)
4977 ret = false;
4978
4979 else if (caller_opts->tune != callee_opts->tune)
4980 ret = false;
4981
4982 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4983 ret = false;
4984
4985 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4986 ret = false;
4987
4988 else
4989 ret = true;
4990 }
4991
4992 return ret;
4993 }
4994
4995 \f
4996 /* Remember the last target of ix86_set_current_function. */
4997 static GTY(()) tree ix86_previous_fndecl;
4998
4999 /* Invalidate ix86_previous_fndecl cache. */
5000 void
5001 ix86_reset_previous_fndecl (void)
5002 {
5003 ix86_previous_fndecl = NULL_TREE;
5004 }
5005
5006 /* Establish appropriate back-end context for processing the function
5007 FNDECL. The argument might be NULL to indicate processing at top
5008 level, outside of any function scope. */
5009 static void
5010 ix86_set_current_function (tree fndecl)
5011 {
5012 /* Only change the context if the function changes. This hook is called
5013 several times in the course of compiling a function, and we don't want to
5014 slow things down too much or call target_reinit when it isn't safe. */
5015 if (fndecl && fndecl != ix86_previous_fndecl)
5016 {
5017 tree old_tree = (ix86_previous_fndecl
5018 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5019 : NULL_TREE);
5020
5021 tree new_tree = (fndecl
5022 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5023 : NULL_TREE);
5024
5025 ix86_previous_fndecl = fndecl;
5026 if (old_tree == new_tree)
5027 ;
5028
5029 else if (new_tree)
5030 {
5031 cl_target_option_restore (&global_options,
5032 TREE_TARGET_OPTION (new_tree));
5033 if (TREE_TARGET_GLOBALS (new_tree))
5034 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5035 else
5036 TREE_TARGET_GLOBALS (new_tree)
5037 = save_target_globals_default_opts ();
5038 }
5039
5040 else if (old_tree)
5041 {
5042 new_tree = target_option_current_node;
5043 cl_target_option_restore (&global_options,
5044 TREE_TARGET_OPTION (new_tree));
5045 if (TREE_TARGET_GLOBALS (new_tree))
5046 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5047 else if (new_tree == target_option_default_node)
5048 restore_target_globals (&default_target_globals);
5049 else
5050 TREE_TARGET_GLOBALS (new_tree)
5051 = save_target_globals_default_opts ();
5052 }
5053 }
5054 }
5055
5056 \f
5057 /* Return true if this goes in large data/bss. */
5058
5059 static bool
5060 ix86_in_large_data_p (tree exp)
5061 {
5062 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5063 return false;
5064
5065 /* Functions are never large data. */
5066 if (TREE_CODE (exp) == FUNCTION_DECL)
5067 return false;
5068
5069 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5070 {
5071 const char *section = DECL_SECTION_NAME (exp);
5072 if (strcmp (section, ".ldata") == 0
5073 || strcmp (section, ".lbss") == 0)
5074 return true;
5075 return false;
5076 }
5077 else
5078 {
5079 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5080
5081 /* If this is an incomplete type with size 0, then we can't put it
5082 in data because it might be too big when completed. Also,
5083 int_size_in_bytes returns -1 if size can vary or is larger than
5084 an integer in which case also it is safer to assume that it goes in
5085 large data. */
5086 if (size <= 0 || size > ix86_section_threshold)
5087 return true;
5088 }
5089
5090 return false;
5091 }
5092
5093 /* Switch to the appropriate section for output of DECL.
5094 DECL is either a `VAR_DECL' node or a constant of some sort.
5095 RELOC indicates whether forming the initial value of DECL requires
5096 link-time relocations. */
5097
5098 ATTRIBUTE_UNUSED static section *
5099 x86_64_elf_select_section (tree decl, int reloc,
5100 unsigned HOST_WIDE_INT align)
5101 {
5102 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5103 && ix86_in_large_data_p (decl))
5104 {
5105 const char *sname = NULL;
5106 unsigned int flags = SECTION_WRITE;
5107 switch (categorize_decl_for_section (decl, reloc))
5108 {
5109 case SECCAT_DATA:
5110 sname = ".ldata";
5111 break;
5112 case SECCAT_DATA_REL:
5113 sname = ".ldata.rel";
5114 break;
5115 case SECCAT_DATA_REL_LOCAL:
5116 sname = ".ldata.rel.local";
5117 break;
5118 case SECCAT_DATA_REL_RO:
5119 sname = ".ldata.rel.ro";
5120 break;
5121 case SECCAT_DATA_REL_RO_LOCAL:
5122 sname = ".ldata.rel.ro.local";
5123 break;
5124 case SECCAT_BSS:
5125 sname = ".lbss";
5126 flags |= SECTION_BSS;
5127 break;
5128 case SECCAT_RODATA:
5129 case SECCAT_RODATA_MERGE_STR:
5130 case SECCAT_RODATA_MERGE_STR_INIT:
5131 case SECCAT_RODATA_MERGE_CONST:
5132 sname = ".lrodata";
5133 flags = 0;
5134 break;
5135 case SECCAT_SRODATA:
5136 case SECCAT_SDATA:
5137 case SECCAT_SBSS:
5138 gcc_unreachable ();
5139 case SECCAT_TEXT:
5140 case SECCAT_TDATA:
5141 case SECCAT_TBSS:
5142 /* We don't split these for medium model. Place them into
5143 default sections and hope for best. */
5144 break;
5145 }
5146 if (sname)
5147 {
5148 /* We might get called with string constants, but get_named_section
5149 doesn't like them as they are not DECLs. Also, we need to set
5150 flags in that case. */
5151 if (!DECL_P (decl))
5152 return get_section (sname, flags, NULL);
5153 return get_named_section (decl, sname, reloc);
5154 }
5155 }
5156 return default_elf_select_section (decl, reloc, align);
5157 }
5158
5159 /* Select a set of attributes for section NAME based on the properties
5160 of DECL and whether or not RELOC indicates that DECL's initializer
5161 might contain runtime relocations. */
5162
5163 static unsigned int ATTRIBUTE_UNUSED
5164 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5165 {
5166 unsigned int flags = default_section_type_flags (decl, name, reloc);
5167
5168 if (decl == NULL_TREE
5169 && (strcmp (name, ".ldata.rel.ro") == 0
5170 || strcmp (name, ".ldata.rel.ro.local") == 0))
5171 flags |= SECTION_RELRO;
5172
5173 if (strcmp (name, ".lbss") == 0
5174 || strncmp (name, ".lbss.", 5) == 0
5175 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5176 flags |= SECTION_BSS;
5177
5178 return flags;
5179 }
5180
5181 /* Build up a unique section name, expressed as a
5182 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5183 RELOC indicates whether the initial value of EXP requires
5184 link-time relocations. */
5185
5186 static void ATTRIBUTE_UNUSED
5187 x86_64_elf_unique_section (tree decl, int reloc)
5188 {
5189 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5190 && ix86_in_large_data_p (decl))
5191 {
5192 const char *prefix = NULL;
5193 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5194 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5195
5196 switch (categorize_decl_for_section (decl, reloc))
5197 {
5198 case SECCAT_DATA:
5199 case SECCAT_DATA_REL:
5200 case SECCAT_DATA_REL_LOCAL:
5201 case SECCAT_DATA_REL_RO:
5202 case SECCAT_DATA_REL_RO_LOCAL:
5203 prefix = one_only ? ".ld" : ".ldata";
5204 break;
5205 case SECCAT_BSS:
5206 prefix = one_only ? ".lb" : ".lbss";
5207 break;
5208 case SECCAT_RODATA:
5209 case SECCAT_RODATA_MERGE_STR:
5210 case SECCAT_RODATA_MERGE_STR_INIT:
5211 case SECCAT_RODATA_MERGE_CONST:
5212 prefix = one_only ? ".lr" : ".lrodata";
5213 break;
5214 case SECCAT_SRODATA:
5215 case SECCAT_SDATA:
5216 case SECCAT_SBSS:
5217 gcc_unreachable ();
5218 case SECCAT_TEXT:
5219 case SECCAT_TDATA:
5220 case SECCAT_TBSS:
5221 /* We don't split these for medium model. Place them into
5222 default sections and hope for best. */
5223 break;
5224 }
5225 if (prefix)
5226 {
5227 const char *name, *linkonce;
5228 char *string;
5229
5230 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5231 name = targetm.strip_name_encoding (name);
5232
5233 /* If we're using one_only, then there needs to be a .gnu.linkonce
5234 prefix to the section name. */
5235 linkonce = one_only ? ".gnu.linkonce" : "";
5236
5237 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5238
5239 set_decl_section_name (decl, string);
5240 return;
5241 }
5242 }
5243 default_unique_section (decl, reloc);
5244 }
5245
5246 #ifdef COMMON_ASM_OP
5247 /* This says how to output assembler code to declare an
5248 uninitialized external linkage data object.
5249
5250 For medium model x86-64 we need to use .largecomm opcode for
5251 large objects. */
5252 void
5253 x86_elf_aligned_common (FILE *file,
5254 const char *name, unsigned HOST_WIDE_INT size,
5255 int align)
5256 {
5257 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5258 && size > (unsigned int)ix86_section_threshold)
5259 fputs (".largecomm\t", file);
5260 else
5261 fputs (COMMON_ASM_OP, file);
5262 assemble_name (file, name);
5263 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5264 size, align / BITS_PER_UNIT);
5265 }
5266 #endif
5267
5268 /* Utility function for targets to use in implementing
5269 ASM_OUTPUT_ALIGNED_BSS. */
5270
5271 void
5272 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5273 unsigned HOST_WIDE_INT size, int align)
5274 {
5275 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5276 && size > (unsigned int)ix86_section_threshold)
5277 switch_to_section (get_named_section (decl, ".lbss", 0));
5278 else
5279 switch_to_section (bss_section);
5280 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5281 #ifdef ASM_DECLARE_OBJECT_NAME
5282 last_assemble_variable_decl = decl;
5283 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5284 #else
5285 /* Standard thing is just output label for the object. */
5286 ASM_OUTPUT_LABEL (file, name);
5287 #endif /* ASM_DECLARE_OBJECT_NAME */
5288 ASM_OUTPUT_SKIP (file, size ? size : 1);
5289 }
5290 \f
5291 /* Decide whether we must probe the stack before any space allocation
5292 on this target. It's essentially TARGET_STACK_PROBE except when
5293 -fstack-check causes the stack to be already probed differently. */
5294
5295 bool
5296 ix86_target_stack_probe (void)
5297 {
5298 /* Do not probe the stack twice if static stack checking is enabled. */
5299 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5300 return false;
5301
5302 return TARGET_STACK_PROBE;
5303 }
5304 \f
5305 /* Decide whether we can make a sibling call to a function. DECL is the
5306 declaration of the function being targeted by the call and EXP is the
5307 CALL_EXPR representing the call. */
5308
5309 static bool
5310 ix86_function_ok_for_sibcall (tree decl, tree exp)
5311 {
5312 tree type, decl_or_type;
5313 rtx a, b;
5314
5315 /* If we are generating position-independent code, we cannot sibcall
5316 optimize any indirect call, or a direct call to a global function,
5317 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5318 if (!TARGET_MACHO
5319 && !TARGET_64BIT
5320 && flag_pic
5321 && (!decl || !targetm.binds_local_p (decl)))
5322 return false;
5323
5324 /* If we need to align the outgoing stack, then sibcalling would
5325 unalign the stack, which may break the called function. */
5326 if (ix86_minimum_incoming_stack_boundary (true)
5327 < PREFERRED_STACK_BOUNDARY)
5328 return false;
5329
5330 if (decl)
5331 {
5332 decl_or_type = decl;
5333 type = TREE_TYPE (decl);
5334 }
5335 else
5336 {
5337 /* We're looking at the CALL_EXPR, we need the type of the function. */
5338 type = CALL_EXPR_FN (exp); /* pointer expression */
5339 type = TREE_TYPE (type); /* pointer type */
5340 type = TREE_TYPE (type); /* function type */
5341 decl_or_type = type;
5342 }
5343
5344 /* Check that the return value locations are the same. Like
5345 if we are returning floats on the 80387 register stack, we cannot
5346 make a sibcall from a function that doesn't return a float to a
5347 function that does or, conversely, from a function that does return
5348 a float to a function that doesn't; the necessary stack adjustment
5349 would not be executed. This is also the place we notice
5350 differences in the return value ABI. Note that it is ok for one
5351 of the functions to have void return type as long as the return
5352 value of the other is passed in a register. */
5353 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5354 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5355 cfun->decl, false);
5356 if (STACK_REG_P (a) || STACK_REG_P (b))
5357 {
5358 if (!rtx_equal_p (a, b))
5359 return false;
5360 }
5361 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5362 ;
5363 else if (!rtx_equal_p (a, b))
5364 return false;
5365
5366 if (TARGET_64BIT)
5367 {
5368 /* The SYSV ABI has more call-clobbered registers;
5369 disallow sibcalls from MS to SYSV. */
5370 if (cfun->machine->call_abi == MS_ABI
5371 && ix86_function_type_abi (type) == SYSV_ABI)
5372 return false;
5373 }
5374 else
5375 {
5376 /* If this call is indirect, we'll need to be able to use a
5377 call-clobbered register for the address of the target function.
5378 Make sure that all such registers are not used for passing
5379 parameters. Note that DLLIMPORT functions are indirect. */
5380 if (!decl
5381 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5382 {
5383 if (ix86_function_regparm (type, NULL) >= 3)
5384 {
5385 /* ??? Need to count the actual number of registers to be used,
5386 not the possible number of registers. Fix later. */
5387 return false;
5388 }
5389 }
5390 }
5391
5392 /* Otherwise okay. That also includes certain types of indirect calls. */
5393 return true;
5394 }
5395
5396 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5397 and "sseregparm" calling convention attributes;
5398 arguments as in struct attribute_spec.handler. */
5399
5400 static tree
5401 ix86_handle_cconv_attribute (tree *node, tree name,
5402 tree args,
5403 int,
5404 bool *no_add_attrs)
5405 {
5406 if (TREE_CODE (*node) != FUNCTION_TYPE
5407 && TREE_CODE (*node) != METHOD_TYPE
5408 && TREE_CODE (*node) != FIELD_DECL
5409 && TREE_CODE (*node) != TYPE_DECL)
5410 {
5411 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5412 name);
5413 *no_add_attrs = true;
5414 return NULL_TREE;
5415 }
5416
5417 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5418 if (is_attribute_p ("regparm", name))
5419 {
5420 tree cst;
5421
5422 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5423 {
5424 error ("fastcall and regparm attributes are not compatible");
5425 }
5426
5427 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5428 {
5429 error ("regparam and thiscall attributes are not compatible");
5430 }
5431
5432 cst = TREE_VALUE (args);
5433 if (TREE_CODE (cst) != INTEGER_CST)
5434 {
5435 warning (OPT_Wattributes,
5436 "%qE attribute requires an integer constant argument",
5437 name);
5438 *no_add_attrs = true;
5439 }
5440 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5441 {
5442 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5443 name, REGPARM_MAX);
5444 *no_add_attrs = true;
5445 }
5446
5447 return NULL_TREE;
5448 }
5449
5450 if (TARGET_64BIT)
5451 {
5452 /* Do not warn when emulating the MS ABI. */
5453 if ((TREE_CODE (*node) != FUNCTION_TYPE
5454 && TREE_CODE (*node) != METHOD_TYPE)
5455 || ix86_function_type_abi (*node) != MS_ABI)
5456 warning (OPT_Wattributes, "%qE attribute ignored",
5457 name);
5458 *no_add_attrs = true;
5459 return NULL_TREE;
5460 }
5461
5462 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5463 if (is_attribute_p ("fastcall", name))
5464 {
5465 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5466 {
5467 error ("fastcall and cdecl attributes are not compatible");
5468 }
5469 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("fastcall and stdcall attributes are not compatible");
5472 }
5473 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5474 {
5475 error ("fastcall and regparm attributes are not compatible");
5476 }
5477 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5478 {
5479 error ("fastcall and thiscall attributes are not compatible");
5480 }
5481 }
5482
5483 /* Can combine stdcall with fastcall (redundant), regparm and
5484 sseregparm. */
5485 else if (is_attribute_p ("stdcall", name))
5486 {
5487 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5488 {
5489 error ("stdcall and cdecl attributes are not compatible");
5490 }
5491 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5492 {
5493 error ("stdcall and fastcall attributes are not compatible");
5494 }
5495 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5496 {
5497 error ("stdcall and thiscall attributes are not compatible");
5498 }
5499 }
5500
5501 /* Can combine cdecl with regparm and sseregparm. */
5502 else if (is_attribute_p ("cdecl", name))
5503 {
5504 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5505 {
5506 error ("stdcall and cdecl attributes are not compatible");
5507 }
5508 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5509 {
5510 error ("fastcall and cdecl attributes are not compatible");
5511 }
5512 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5513 {
5514 error ("cdecl and thiscall attributes are not compatible");
5515 }
5516 }
5517 else if (is_attribute_p ("thiscall", name))
5518 {
5519 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5520 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5521 name);
5522 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5523 {
5524 error ("stdcall and thiscall attributes are not compatible");
5525 }
5526 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5527 {
5528 error ("fastcall and thiscall attributes are not compatible");
5529 }
5530 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5531 {
5532 error ("cdecl and thiscall attributes are not compatible");
5533 }
5534 }
5535
5536 /* Can combine sseregparm with all attributes. */
5537
5538 return NULL_TREE;
5539 }
5540
5541 /* The transactional memory builtins are implicitly regparm or fastcall
5542 depending on the ABI. Override the generic do-nothing attribute that
5543 these builtins were declared with, and replace it with one of the two
5544 attributes that we expect elsewhere. */
5545
5546 static tree
5547 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5548 int flags, bool *no_add_attrs)
5549 {
5550 tree alt;
5551
5552 /* In no case do we want to add the placeholder attribute. */
5553 *no_add_attrs = true;
5554
5555 /* The 64-bit ABI is unchanged for transactional memory. */
5556 if (TARGET_64BIT)
5557 return NULL_TREE;
5558
5559 /* ??? Is there a better way to validate 32-bit windows? We have
5560 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5561 if (CHECK_STACK_LIMIT > 0)
5562 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5563 else
5564 {
5565 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5566 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5567 }
5568 decl_attributes (node, alt, flags);
5569
5570 return NULL_TREE;
5571 }
5572
5573 /* This function determines from TYPE the calling-convention. */
5574
5575 unsigned int
5576 ix86_get_callcvt (const_tree type)
5577 {
5578 unsigned int ret = 0;
5579 bool is_stdarg;
5580 tree attrs;
5581
5582 if (TARGET_64BIT)
5583 return IX86_CALLCVT_CDECL;
5584
5585 attrs = TYPE_ATTRIBUTES (type);
5586 if (attrs != NULL_TREE)
5587 {
5588 if (lookup_attribute ("cdecl", attrs))
5589 ret |= IX86_CALLCVT_CDECL;
5590 else if (lookup_attribute ("stdcall", attrs))
5591 ret |= IX86_CALLCVT_STDCALL;
5592 else if (lookup_attribute ("fastcall", attrs))
5593 ret |= IX86_CALLCVT_FASTCALL;
5594 else if (lookup_attribute ("thiscall", attrs))
5595 ret |= IX86_CALLCVT_THISCALL;
5596
5597 /* Regparam isn't allowed for thiscall and fastcall. */
5598 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5599 {
5600 if (lookup_attribute ("regparm", attrs))
5601 ret |= IX86_CALLCVT_REGPARM;
5602 if (lookup_attribute ("sseregparm", attrs))
5603 ret |= IX86_CALLCVT_SSEREGPARM;
5604 }
5605
5606 if (IX86_BASE_CALLCVT(ret) != 0)
5607 return ret;
5608 }
5609
5610 is_stdarg = stdarg_p (type);
5611 if (TARGET_RTD && !is_stdarg)
5612 return IX86_CALLCVT_STDCALL | ret;
5613
5614 if (ret != 0
5615 || is_stdarg
5616 || TREE_CODE (type) != METHOD_TYPE
5617 || ix86_function_type_abi (type) != MS_ABI)
5618 return IX86_CALLCVT_CDECL | ret;
5619
5620 return IX86_CALLCVT_THISCALL;
5621 }
5622
5623 /* Return 0 if the attributes for two types are incompatible, 1 if they
5624 are compatible, and 2 if they are nearly compatible (which causes a
5625 warning to be generated). */
5626
5627 static int
5628 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5629 {
5630 unsigned int ccvt1, ccvt2;
5631
5632 if (TREE_CODE (type1) != FUNCTION_TYPE
5633 && TREE_CODE (type1) != METHOD_TYPE)
5634 return 1;
5635
5636 ccvt1 = ix86_get_callcvt (type1);
5637 ccvt2 = ix86_get_callcvt (type2);
5638 if (ccvt1 != ccvt2)
5639 return 0;
5640 if (ix86_function_regparm (type1, NULL)
5641 != ix86_function_regparm (type2, NULL))
5642 return 0;
5643
5644 return 1;
5645 }
5646 \f
5647 /* Return the regparm value for a function with the indicated TYPE and DECL.
5648 DECL may be NULL when calling function indirectly
5649 or considering a libcall. */
5650
5651 static int
5652 ix86_function_regparm (const_tree type, const_tree decl)
5653 {
5654 tree attr;
5655 int regparm;
5656 unsigned int ccvt;
5657
5658 if (TARGET_64BIT)
5659 return (ix86_function_type_abi (type) == SYSV_ABI
5660 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5661 ccvt = ix86_get_callcvt (type);
5662 regparm = ix86_regparm;
5663
5664 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5665 {
5666 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5667 if (attr)
5668 {
5669 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5670 return regparm;
5671 }
5672 }
5673 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5674 return 2;
5675 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5676 return 1;
5677
5678 /* Use register calling convention for local functions when possible. */
5679 if (decl
5680 && TREE_CODE (decl) == FUNCTION_DECL
5681 /* Caller and callee must agree on the calling convention, so
5682 checking here just optimize means that with
5683 __attribute__((optimize (...))) caller could use regparm convention
5684 and callee not, or vice versa. Instead look at whether the callee
5685 is optimized or not. */
5686 && opt_for_fn (decl, optimize)
5687 && !(profile_flag && !flag_fentry))
5688 {
5689 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5690 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5691 if (i && i->local && i->can_change_signature)
5692 {
5693 int local_regparm, globals = 0, regno;
5694
5695 /* Make sure no regparm register is taken by a
5696 fixed register variable. */
5697 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5698 if (fixed_regs[local_regparm])
5699 break;
5700
5701 /* We don't want to use regparm(3) for nested functions as
5702 these use a static chain pointer in the third argument. */
5703 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5704 local_regparm = 2;
5705
5706 /* In 32-bit mode save a register for the split stack. */
5707 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5708 local_regparm = 2;
5709
5710 /* Each fixed register usage increases register pressure,
5711 so less registers should be used for argument passing.
5712 This functionality can be overriden by an explicit
5713 regparm value. */
5714 for (regno = AX_REG; regno <= DI_REG; regno++)
5715 if (fixed_regs[regno])
5716 globals++;
5717
5718 local_regparm
5719 = globals < local_regparm ? local_regparm - globals : 0;
5720
5721 if (local_regparm > regparm)
5722 regparm = local_regparm;
5723 }
5724 }
5725
5726 return regparm;
5727 }
5728
5729 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5730 DFmode (2) arguments in SSE registers for a function with the
5731 indicated TYPE and DECL. DECL may be NULL when calling function
5732 indirectly or considering a libcall. Otherwise return 0. */
5733
5734 static int
5735 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5736 {
5737 gcc_assert (!TARGET_64BIT);
5738
5739 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5740 by the sseregparm attribute. */
5741 if (TARGET_SSEREGPARM
5742 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5743 {
5744 if (!TARGET_SSE)
5745 {
5746 if (warn)
5747 {
5748 if (decl)
5749 error ("calling %qD with attribute sseregparm without "
5750 "SSE/SSE2 enabled", decl);
5751 else
5752 error ("calling %qT with attribute sseregparm without "
5753 "SSE/SSE2 enabled", type);
5754 }
5755 return 0;
5756 }
5757
5758 return 2;
5759 }
5760
5761 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5762 (and DFmode for SSE2) arguments in SSE registers. */
5763 if (decl && TARGET_SSE_MATH && optimize
5764 && !(profile_flag && !flag_fentry))
5765 {
5766 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5767 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5768 if (i && i->local && i->can_change_signature)
5769 return TARGET_SSE2 ? 2 : 1;
5770 }
5771
5772 return 0;
5773 }
5774
5775 /* Return true if EAX is live at the start of the function. Used by
5776 ix86_expand_prologue to determine if we need special help before
5777 calling allocate_stack_worker. */
5778
5779 static bool
5780 ix86_eax_live_at_start_p (void)
5781 {
5782 /* Cheat. Don't bother working forward from ix86_function_regparm
5783 to the function type to whether an actual argument is located in
5784 eax. Instead just look at cfg info, which is still close enough
5785 to correct at this point. This gives false positives for broken
5786 functions that might use uninitialized data that happens to be
5787 allocated in eax, but who cares? */
5788 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5789 }
5790
5791 static bool
5792 ix86_keep_aggregate_return_pointer (tree fntype)
5793 {
5794 tree attr;
5795
5796 if (!TARGET_64BIT)
5797 {
5798 attr = lookup_attribute ("callee_pop_aggregate_return",
5799 TYPE_ATTRIBUTES (fntype));
5800 if (attr)
5801 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5802
5803 /* For 32-bit MS-ABI the default is to keep aggregate
5804 return pointer. */
5805 if (ix86_function_type_abi (fntype) == MS_ABI)
5806 return true;
5807 }
5808 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5809 }
5810
5811 /* Value is the number of bytes of arguments automatically
5812 popped when returning from a subroutine call.
5813 FUNDECL is the declaration node of the function (as a tree),
5814 FUNTYPE is the data type of the function (as a tree),
5815 or for a library call it is an identifier node for the subroutine name.
5816 SIZE is the number of bytes of arguments passed on the stack.
5817
5818 On the 80386, the RTD insn may be used to pop them if the number
5819 of args is fixed, but if the number is variable then the caller
5820 must pop them all. RTD can't be used for library calls now
5821 because the library is compiled with the Unix compiler.
5822 Use of RTD is a selectable option, since it is incompatible with
5823 standard Unix calling sequences. If the option is not selected,
5824 the caller must always pop the args.
5825
5826 The attribute stdcall is equivalent to RTD on a per module basis. */
5827
5828 static int
5829 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5830 {
5831 unsigned int ccvt;
5832
5833 /* None of the 64-bit ABIs pop arguments. */
5834 if (TARGET_64BIT)
5835 return 0;
5836
5837 ccvt = ix86_get_callcvt (funtype);
5838
5839 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5840 | IX86_CALLCVT_THISCALL)) != 0
5841 && ! stdarg_p (funtype))
5842 return size;
5843
5844 /* Lose any fake structure return argument if it is passed on the stack. */
5845 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5846 && !ix86_keep_aggregate_return_pointer (funtype))
5847 {
5848 int nregs = ix86_function_regparm (funtype, fundecl);
5849 if (nregs == 0)
5850 return GET_MODE_SIZE (Pmode);
5851 }
5852
5853 return 0;
5854 }
5855
5856 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5857
5858 static bool
5859 ix86_legitimate_combined_insn (rtx_insn *insn)
5860 {
5861 /* Check operand constraints in case hard registers were propagated
5862 into insn pattern. This check prevents combine pass from
5863 generating insn patterns with invalid hard register operands.
5864 These invalid insns can eventually confuse reload to error out
5865 with a spill failure. See also PRs 46829 and 46843. */
5866 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5867 {
5868 int i;
5869
5870 extract_insn (insn);
5871 preprocess_constraints (insn);
5872
5873 int n_operands = recog_data.n_operands;
5874 int n_alternatives = recog_data.n_alternatives;
5875 for (i = 0; i < n_operands; i++)
5876 {
5877 rtx op = recog_data.operand[i];
5878 enum machine_mode mode = GET_MODE (op);
5879 const operand_alternative *op_alt;
5880 int offset = 0;
5881 bool win;
5882 int j;
5883
5884 /* For pre-AVX disallow unaligned loads/stores where the
5885 instructions don't support it. */
5886 if (!TARGET_AVX
5887 && VECTOR_MODE_P (GET_MODE (op))
5888 && misaligned_operand (op, GET_MODE (op)))
5889 {
5890 int min_align = get_attr_ssememalign (insn);
5891 if (min_align == 0)
5892 return false;
5893 }
5894
5895 /* A unary operator may be accepted by the predicate, but it
5896 is irrelevant for matching constraints. */
5897 if (UNARY_P (op))
5898 op = XEXP (op, 0);
5899
5900 if (GET_CODE (op) == SUBREG)
5901 {
5902 if (REG_P (SUBREG_REG (op))
5903 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5904 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5905 GET_MODE (SUBREG_REG (op)),
5906 SUBREG_BYTE (op),
5907 GET_MODE (op));
5908 op = SUBREG_REG (op);
5909 }
5910
5911 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5912 continue;
5913
5914 op_alt = recog_op_alt;
5915
5916 /* Operand has no constraints, anything is OK. */
5917 win = !n_alternatives;
5918
5919 alternative_mask preferred = get_preferred_alternatives (insn);
5920 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5921 {
5922 if (!TEST_BIT (preferred, j))
5923 continue;
5924 if (op_alt[i].anything_ok
5925 || (op_alt[i].matches != -1
5926 && operands_match_p
5927 (recog_data.operand[i],
5928 recog_data.operand[op_alt[i].matches]))
5929 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5930 {
5931 win = true;
5932 break;
5933 }
5934 }
5935
5936 if (!win)
5937 return false;
5938 }
5939 }
5940
5941 return true;
5942 }
5943 \f
5944 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5945
5946 static unsigned HOST_WIDE_INT
5947 ix86_asan_shadow_offset (void)
5948 {
5949 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5950 : HOST_WIDE_INT_C (0x7fff8000))
5951 : (HOST_WIDE_INT_1 << 29);
5952 }
5953 \f
5954 /* Argument support functions. */
5955
5956 /* Return true when register may be used to pass function parameters. */
5957 bool
5958 ix86_function_arg_regno_p (int regno)
5959 {
5960 int i;
5961 const int *parm_regs;
5962
5963 if (!TARGET_64BIT)
5964 {
5965 if (TARGET_MACHO)
5966 return (regno < REGPARM_MAX
5967 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5968 else
5969 return (regno < REGPARM_MAX
5970 || (TARGET_MMX && MMX_REGNO_P (regno)
5971 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5972 || (TARGET_SSE && SSE_REGNO_P (regno)
5973 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5974 }
5975
5976 if (TARGET_SSE && SSE_REGNO_P (regno)
5977 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5978 return true;
5979
5980 /* TODO: The function should depend on current function ABI but
5981 builtins.c would need updating then. Therefore we use the
5982 default ABI. */
5983
5984 /* RAX is used as hidden argument to va_arg functions. */
5985 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5986 return true;
5987
5988 if (ix86_abi == MS_ABI)
5989 parm_regs = x86_64_ms_abi_int_parameter_registers;
5990 else
5991 parm_regs = x86_64_int_parameter_registers;
5992 for (i = 0; i < (ix86_abi == MS_ABI
5993 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5994 if (regno == parm_regs[i])
5995 return true;
5996 return false;
5997 }
5998
5999 /* Return if we do not know how to pass TYPE solely in registers. */
6000
6001 static bool
6002 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
6003 {
6004 if (must_pass_in_stack_var_size_or_pad (mode, type))
6005 return true;
6006
6007 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6008 The layout_type routine is crafty and tries to trick us into passing
6009 currently unsupported vector types on the stack by using TImode. */
6010 return (!TARGET_64BIT && mode == TImode
6011 && type && TREE_CODE (type) != VECTOR_TYPE);
6012 }
6013
6014 /* It returns the size, in bytes, of the area reserved for arguments passed
6015 in registers for the function represented by fndecl dependent to the used
6016 abi format. */
6017 int
6018 ix86_reg_parm_stack_space (const_tree fndecl)
6019 {
6020 enum calling_abi call_abi = SYSV_ABI;
6021 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6022 call_abi = ix86_function_abi (fndecl);
6023 else
6024 call_abi = ix86_function_type_abi (fndecl);
6025 if (TARGET_64BIT && call_abi == MS_ABI)
6026 return 32;
6027 return 0;
6028 }
6029
6030 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6031 call abi used. */
6032 enum calling_abi
6033 ix86_function_type_abi (const_tree fntype)
6034 {
6035 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6036 {
6037 enum calling_abi abi = ix86_abi;
6038 if (abi == SYSV_ABI)
6039 {
6040 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6041 abi = MS_ABI;
6042 }
6043 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6044 abi = SYSV_ABI;
6045 return abi;
6046 }
6047 return ix86_abi;
6048 }
6049
6050 /* We add this as a workaround in order to use libc_has_function
6051 hook in i386.md. */
6052 bool
6053 ix86_libc_has_function (enum function_class fn_class)
6054 {
6055 return targetm.libc_has_function (fn_class);
6056 }
6057
6058 static bool
6059 ix86_function_ms_hook_prologue (const_tree fn)
6060 {
6061 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6062 {
6063 if (decl_function_context (fn) != NULL_TREE)
6064 error_at (DECL_SOURCE_LOCATION (fn),
6065 "ms_hook_prologue is not compatible with nested function");
6066 else
6067 return true;
6068 }
6069 return false;
6070 }
6071
6072 static enum calling_abi
6073 ix86_function_abi (const_tree fndecl)
6074 {
6075 if (! fndecl)
6076 return ix86_abi;
6077 return ix86_function_type_abi (TREE_TYPE (fndecl));
6078 }
6079
6080 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6081 call abi used. */
6082 enum calling_abi
6083 ix86_cfun_abi (void)
6084 {
6085 if (! cfun)
6086 return ix86_abi;
6087 return cfun->machine->call_abi;
6088 }
6089
6090 /* Write the extra assembler code needed to declare a function properly. */
6091
6092 void
6093 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6094 tree decl)
6095 {
6096 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6097
6098 if (is_ms_hook)
6099 {
6100 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6101 unsigned int filler_cc = 0xcccccccc;
6102
6103 for (i = 0; i < filler_count; i += 4)
6104 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6105 }
6106
6107 #ifdef SUBTARGET_ASM_UNWIND_INIT
6108 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6109 #endif
6110
6111 ASM_OUTPUT_LABEL (asm_out_file, fname);
6112
6113 /* Output magic byte marker, if hot-patch attribute is set. */
6114 if (is_ms_hook)
6115 {
6116 if (TARGET_64BIT)
6117 {
6118 /* leaq [%rsp + 0], %rsp */
6119 asm_fprintf (asm_out_file, ASM_BYTE
6120 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6121 }
6122 else
6123 {
6124 /* movl.s %edi, %edi
6125 push %ebp
6126 movl.s %esp, %ebp */
6127 asm_fprintf (asm_out_file, ASM_BYTE
6128 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6129 }
6130 }
6131 }
6132
6133 /* regclass.c */
6134 extern void init_regs (void);
6135
6136 /* Implementation of call abi switching target hook. Specific to FNDECL
6137 the specific call register sets are set. See also
6138 ix86_conditional_register_usage for more details. */
6139 void
6140 ix86_call_abi_override (const_tree fndecl)
6141 {
6142 if (fndecl == NULL_TREE)
6143 cfun->machine->call_abi = ix86_abi;
6144 else
6145 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6146 }
6147
6148 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6149 expensive re-initialization of init_regs each time we switch function context
6150 since this is needed only during RTL expansion. */
6151 static void
6152 ix86_maybe_switch_abi (void)
6153 {
6154 if (TARGET_64BIT &&
6155 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6156 reinit_regs ();
6157 }
6158
6159 /* Return 1 if pseudo register should be created and used to hold
6160 GOT address for PIC code. */
6161 static bool
6162 ix86_use_pseudo_pic_reg (void)
6163 {
6164 if ((TARGET_64BIT
6165 && (ix86_cmodel == CM_SMALL_PIC
6166 || TARGET_PECOFF))
6167 || !flag_pic)
6168 return false;
6169 return true;
6170 }
6171
6172 /* Create and initialize PIC register if required. */
6173 static void
6174 ix86_init_pic_reg (void)
6175 {
6176 edge entry_edge;
6177 rtx_insn *seq;
6178
6179 if (!ix86_use_pseudo_pic_reg ())
6180 return;
6181
6182 start_sequence ();
6183
6184 if (TARGET_64BIT)
6185 {
6186 if (ix86_cmodel == CM_LARGE_PIC)
6187 {
6188 rtx_code_label *label;
6189 rtx tmp_reg;
6190
6191 gcc_assert (Pmode == DImode);
6192 label = gen_label_rtx ();
6193 emit_label (label);
6194 LABEL_PRESERVE_P (label) = 1;
6195 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6196 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6197 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6198 label));
6199 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6200 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6201 pic_offset_table_rtx, tmp_reg));
6202 }
6203 else
6204 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6205 }
6206 else
6207 {
6208 rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6209 RTX_FRAME_RELATED_P (insn) = 1;
6210 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6211 }
6212
6213 seq = get_insns ();
6214 end_sequence ();
6215
6216 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6217 insert_insn_on_edge (seq, entry_edge);
6218 commit_one_edge_insertion (entry_edge);
6219 }
6220
6221 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6222 for a call to a function whose data type is FNTYPE.
6223 For a library call, FNTYPE is 0. */
6224
6225 void
6226 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6227 tree fntype, /* tree ptr for function decl */
6228 rtx libname, /* SYMBOL_REF of library name or 0 */
6229 tree fndecl,
6230 int caller)
6231 {
6232 struct cgraph_local_info *i;
6233
6234 memset (cum, 0, sizeof (*cum));
6235
6236 if (fndecl)
6237 {
6238 i = cgraph_node::local_info (fndecl);
6239 cum->call_abi = ix86_function_abi (fndecl);
6240 }
6241 else
6242 {
6243 i = NULL;
6244 cum->call_abi = ix86_function_type_abi (fntype);
6245 }
6246
6247 cum->caller = caller;
6248
6249 /* Set up the number of registers to use for passing arguments. */
6250 cum->nregs = ix86_regparm;
6251 if (TARGET_64BIT)
6252 {
6253 cum->nregs = (cum->call_abi == SYSV_ABI
6254 ? X86_64_REGPARM_MAX
6255 : X86_64_MS_REGPARM_MAX);
6256 }
6257 if (TARGET_SSE)
6258 {
6259 cum->sse_nregs = SSE_REGPARM_MAX;
6260 if (TARGET_64BIT)
6261 {
6262 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6263 ? X86_64_SSE_REGPARM_MAX
6264 : X86_64_MS_SSE_REGPARM_MAX);
6265 }
6266 }
6267 if (TARGET_MMX)
6268 cum->mmx_nregs = MMX_REGPARM_MAX;
6269 cum->warn_avx512f = true;
6270 cum->warn_avx = true;
6271 cum->warn_sse = true;
6272 cum->warn_mmx = true;
6273
6274 /* Because type might mismatch in between caller and callee, we need to
6275 use actual type of function for local calls.
6276 FIXME: cgraph_analyze can be told to actually record if function uses
6277 va_start so for local functions maybe_vaarg can be made aggressive
6278 helping K&R code.
6279 FIXME: once typesytem is fixed, we won't need this code anymore. */
6280 if (i && i->local && i->can_change_signature)
6281 fntype = TREE_TYPE (fndecl);
6282 cum->maybe_vaarg = (fntype
6283 ? (!prototype_p (fntype) || stdarg_p (fntype))
6284 : !libname);
6285
6286 if (!TARGET_64BIT)
6287 {
6288 /* If there are variable arguments, then we won't pass anything
6289 in registers in 32-bit mode. */
6290 if (stdarg_p (fntype))
6291 {
6292 cum->nregs = 0;
6293 cum->sse_nregs = 0;
6294 cum->mmx_nregs = 0;
6295 cum->warn_avx512f = false;
6296 cum->warn_avx = false;
6297 cum->warn_sse = false;
6298 cum->warn_mmx = false;
6299 return;
6300 }
6301
6302 /* Use ecx and edx registers if function has fastcall attribute,
6303 else look for regparm information. */
6304 if (fntype)
6305 {
6306 unsigned int ccvt = ix86_get_callcvt (fntype);
6307 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6308 {
6309 cum->nregs = 1;
6310 cum->fastcall = 1; /* Same first register as in fastcall. */
6311 }
6312 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6313 {
6314 cum->nregs = 2;
6315 cum->fastcall = 1;
6316 }
6317 else
6318 cum->nregs = ix86_function_regparm (fntype, fndecl);
6319 }
6320
6321 /* Set up the number of SSE registers used for passing SFmode
6322 and DFmode arguments. Warn for mismatching ABI. */
6323 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6324 }
6325 }
6326
6327 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6328 But in the case of vector types, it is some vector mode.
6329
6330 When we have only some of our vector isa extensions enabled, then there
6331 are some modes for which vector_mode_supported_p is false. For these
6332 modes, the generic vector support in gcc will choose some non-vector mode
6333 in order to implement the type. By computing the natural mode, we'll
6334 select the proper ABI location for the operand and not depend on whatever
6335 the middle-end decides to do with these vector types.
6336
6337 The midde-end can't deal with the vector types > 16 bytes. In this
6338 case, we return the original mode and warn ABI change if CUM isn't
6339 NULL.
6340
6341 If INT_RETURN is true, warn ABI change if the vector mode isn't
6342 available for function return value. */
6343
6344 static enum machine_mode
6345 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6346 bool in_return)
6347 {
6348 enum machine_mode mode = TYPE_MODE (type);
6349
6350 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6351 {
6352 HOST_WIDE_INT size = int_size_in_bytes (type);
6353 if ((size == 8 || size == 16 || size == 32 || size == 64)
6354 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6355 && TYPE_VECTOR_SUBPARTS (type) > 1)
6356 {
6357 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6358
6359 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6360 mode = MIN_MODE_VECTOR_FLOAT;
6361 else
6362 mode = MIN_MODE_VECTOR_INT;
6363
6364 /* Get the mode which has this inner mode and number of units. */
6365 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6366 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6367 && GET_MODE_INNER (mode) == innermode)
6368 {
6369 if (size == 64 && !TARGET_AVX512F)
6370 {
6371 static bool warnedavx512f;
6372 static bool warnedavx512f_ret;
6373
6374 if (cum && cum->warn_avx512f && !warnedavx512f)
6375 {
6376 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6377 "without AVX512F enabled changes the ABI"))
6378 warnedavx512f = true;
6379 }
6380 else if (in_return && !warnedavx512f_ret)
6381 {
6382 if (warning (OPT_Wpsabi, "AVX512F vector return "
6383 "without AVX512F enabled changes the ABI"))
6384 warnedavx512f_ret = true;
6385 }
6386
6387 return TYPE_MODE (type);
6388 }
6389 else if (size == 32 && !TARGET_AVX)
6390 {
6391 static bool warnedavx;
6392 static bool warnedavx_ret;
6393
6394 if (cum && cum->warn_avx && !warnedavx)
6395 {
6396 if (warning (OPT_Wpsabi, "AVX vector argument "
6397 "without AVX enabled changes the ABI"))
6398 warnedavx = true;
6399 }
6400 else if (in_return && !warnedavx_ret)
6401 {
6402 if (warning (OPT_Wpsabi, "AVX vector return "
6403 "without AVX enabled changes the ABI"))
6404 warnedavx_ret = true;
6405 }
6406
6407 return TYPE_MODE (type);
6408 }
6409 else if (((size == 8 && TARGET_64BIT) || size == 16)
6410 && !TARGET_SSE)
6411 {
6412 static bool warnedsse;
6413 static bool warnedsse_ret;
6414
6415 if (cum && cum->warn_sse && !warnedsse)
6416 {
6417 if (warning (OPT_Wpsabi, "SSE vector argument "
6418 "without SSE enabled changes the ABI"))
6419 warnedsse = true;
6420 }
6421 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6422 {
6423 if (warning (OPT_Wpsabi, "SSE vector return "
6424 "without SSE enabled changes the ABI"))
6425 warnedsse_ret = true;
6426 }
6427 }
6428 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6429 {
6430 static bool warnedmmx;
6431 static bool warnedmmx_ret;
6432
6433 if (cum && cum->warn_mmx && !warnedmmx)
6434 {
6435 if (warning (OPT_Wpsabi, "MMX vector argument "
6436 "without MMX enabled changes the ABI"))
6437 warnedmmx = true;
6438 }
6439 else if (in_return && !warnedmmx_ret)
6440 {
6441 if (warning (OPT_Wpsabi, "MMX vector return "
6442 "without MMX enabled changes the ABI"))
6443 warnedmmx_ret = true;
6444 }
6445 }
6446 return mode;
6447 }
6448
6449 gcc_unreachable ();
6450 }
6451 }
6452
6453 return mode;
6454 }
6455
6456 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6457 this may not agree with the mode that the type system has chosen for the
6458 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6459 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6460
6461 static rtx
6462 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6463 unsigned int regno)
6464 {
6465 rtx tmp;
6466
6467 if (orig_mode != BLKmode)
6468 tmp = gen_rtx_REG (orig_mode, regno);
6469 else
6470 {
6471 tmp = gen_rtx_REG (mode, regno);
6472 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6473 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6474 }
6475
6476 return tmp;
6477 }
6478
6479 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6480 of this code is to classify each 8bytes of incoming argument by the register
6481 class and assign registers accordingly. */
6482
6483 /* Return the union class of CLASS1 and CLASS2.
6484 See the x86-64 PS ABI for details. */
6485
6486 static enum x86_64_reg_class
6487 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6488 {
6489 /* Rule #1: If both classes are equal, this is the resulting class. */
6490 if (class1 == class2)
6491 return class1;
6492
6493 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6494 the other class. */
6495 if (class1 == X86_64_NO_CLASS)
6496 return class2;
6497 if (class2 == X86_64_NO_CLASS)
6498 return class1;
6499
6500 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6501 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6502 return X86_64_MEMORY_CLASS;
6503
6504 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6505 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6506 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6507 return X86_64_INTEGERSI_CLASS;
6508 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6509 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6510 return X86_64_INTEGER_CLASS;
6511
6512 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6513 MEMORY is used. */
6514 if (class1 == X86_64_X87_CLASS
6515 || class1 == X86_64_X87UP_CLASS
6516 || class1 == X86_64_COMPLEX_X87_CLASS
6517 || class2 == X86_64_X87_CLASS
6518 || class2 == X86_64_X87UP_CLASS
6519 || class2 == X86_64_COMPLEX_X87_CLASS)
6520 return X86_64_MEMORY_CLASS;
6521
6522 /* Rule #6: Otherwise class SSE is used. */
6523 return X86_64_SSE_CLASS;
6524 }
6525
6526 /* Classify the argument of type TYPE and mode MODE.
6527 CLASSES will be filled by the register class used to pass each word
6528 of the operand. The number of words is returned. In case the parameter
6529 should be passed in memory, 0 is returned. As a special case for zero
6530 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6531
6532 BIT_OFFSET is used internally for handling records and specifies offset
6533 of the offset in bits modulo 512 to avoid overflow cases.
6534
6535 See the x86-64 PS ABI for details.
6536 */
6537
6538 static int
6539 classify_argument (enum machine_mode mode, const_tree type,
6540 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6541 {
6542 HOST_WIDE_INT bytes =
6543 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6544 int words
6545 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6546
6547 /* Variable sized entities are always passed/returned in memory. */
6548 if (bytes < 0)
6549 return 0;
6550
6551 if (mode != VOIDmode
6552 && targetm.calls.must_pass_in_stack (mode, type))
6553 return 0;
6554
6555 if (type && AGGREGATE_TYPE_P (type))
6556 {
6557 int i;
6558 tree field;
6559 enum x86_64_reg_class subclasses[MAX_CLASSES];
6560
6561 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6562 if (bytes > 64)
6563 return 0;
6564
6565 for (i = 0; i < words; i++)
6566 classes[i] = X86_64_NO_CLASS;
6567
6568 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6569 signalize memory class, so handle it as special case. */
6570 if (!words)
6571 {
6572 classes[0] = X86_64_NO_CLASS;
6573 return 1;
6574 }
6575
6576 /* Classify each field of record and merge classes. */
6577 switch (TREE_CODE (type))
6578 {
6579 case RECORD_TYPE:
6580 /* And now merge the fields of structure. */
6581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6582 {
6583 if (TREE_CODE (field) == FIELD_DECL)
6584 {
6585 int num;
6586
6587 if (TREE_TYPE (field) == error_mark_node)
6588 continue;
6589
6590 /* Bitfields are always classified as integer. Handle them
6591 early, since later code would consider them to be
6592 misaligned integers. */
6593 if (DECL_BIT_FIELD (field))
6594 {
6595 for (i = (int_bit_position (field)
6596 + (bit_offset % 64)) / 8 / 8;
6597 i < ((int_bit_position (field) + (bit_offset % 64))
6598 + tree_to_shwi (DECL_SIZE (field))
6599 + 63) / 8 / 8; i++)
6600 classes[i] =
6601 merge_classes (X86_64_INTEGER_CLASS,
6602 classes[i]);
6603 }
6604 else
6605 {
6606 int pos;
6607
6608 type = TREE_TYPE (field);
6609
6610 /* Flexible array member is ignored. */
6611 if (TYPE_MODE (type) == BLKmode
6612 && TREE_CODE (type) == ARRAY_TYPE
6613 && TYPE_SIZE (type) == NULL_TREE
6614 && TYPE_DOMAIN (type) != NULL_TREE
6615 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6616 == NULL_TREE))
6617 {
6618 static bool warned;
6619
6620 if (!warned && warn_psabi)
6621 {
6622 warned = true;
6623 inform (input_location,
6624 "the ABI of passing struct with"
6625 " a flexible array member has"
6626 " changed in GCC 4.4");
6627 }
6628 continue;
6629 }
6630 num = classify_argument (TYPE_MODE (type), type,
6631 subclasses,
6632 (int_bit_position (field)
6633 + bit_offset) % 512);
6634 if (!num)
6635 return 0;
6636 pos = (int_bit_position (field)
6637 + (bit_offset % 64)) / 8 / 8;
6638 for (i = 0; i < num && (i + pos) < words; i++)
6639 classes[i + pos] =
6640 merge_classes (subclasses[i], classes[i + pos]);
6641 }
6642 }
6643 }
6644 break;
6645
6646 case ARRAY_TYPE:
6647 /* Arrays are handled as small records. */
6648 {
6649 int num;
6650 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6651 TREE_TYPE (type), subclasses, bit_offset);
6652 if (!num)
6653 return 0;
6654
6655 /* The partial classes are now full classes. */
6656 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6657 subclasses[0] = X86_64_SSE_CLASS;
6658 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6659 && !((bit_offset % 64) == 0 && bytes == 4))
6660 subclasses[0] = X86_64_INTEGER_CLASS;
6661
6662 for (i = 0; i < words; i++)
6663 classes[i] = subclasses[i % num];
6664
6665 break;
6666 }
6667 case UNION_TYPE:
6668 case QUAL_UNION_TYPE:
6669 /* Unions are similar to RECORD_TYPE but offset is always 0.
6670 */
6671 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6672 {
6673 if (TREE_CODE (field) == FIELD_DECL)
6674 {
6675 int num;
6676
6677 if (TREE_TYPE (field) == error_mark_node)
6678 continue;
6679
6680 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6681 TREE_TYPE (field), subclasses,
6682 bit_offset);
6683 if (!num)
6684 return 0;
6685 for (i = 0; i < num && i < words; i++)
6686 classes[i] = merge_classes (subclasses[i], classes[i]);
6687 }
6688 }
6689 break;
6690
6691 default:
6692 gcc_unreachable ();
6693 }
6694
6695 if (words > 2)
6696 {
6697 /* When size > 16 bytes, if the first one isn't
6698 X86_64_SSE_CLASS or any other ones aren't
6699 X86_64_SSEUP_CLASS, everything should be passed in
6700 memory. */
6701 if (classes[0] != X86_64_SSE_CLASS)
6702 return 0;
6703
6704 for (i = 1; i < words; i++)
6705 if (classes[i] != X86_64_SSEUP_CLASS)
6706 return 0;
6707 }
6708
6709 /* Final merger cleanup. */
6710 for (i = 0; i < words; i++)
6711 {
6712 /* If one class is MEMORY, everything should be passed in
6713 memory. */
6714 if (classes[i] == X86_64_MEMORY_CLASS)
6715 return 0;
6716
6717 /* The X86_64_SSEUP_CLASS should be always preceded by
6718 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6719 if (classes[i] == X86_64_SSEUP_CLASS
6720 && classes[i - 1] != X86_64_SSE_CLASS
6721 && classes[i - 1] != X86_64_SSEUP_CLASS)
6722 {
6723 /* The first one should never be X86_64_SSEUP_CLASS. */
6724 gcc_assert (i != 0);
6725 classes[i] = X86_64_SSE_CLASS;
6726 }
6727
6728 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6729 everything should be passed in memory. */
6730 if (classes[i] == X86_64_X87UP_CLASS
6731 && (classes[i - 1] != X86_64_X87_CLASS))
6732 {
6733 static bool warned;
6734
6735 /* The first one should never be X86_64_X87UP_CLASS. */
6736 gcc_assert (i != 0);
6737 if (!warned && warn_psabi)
6738 {
6739 warned = true;
6740 inform (input_location,
6741 "the ABI of passing union with long double"
6742 " has changed in GCC 4.4");
6743 }
6744 return 0;
6745 }
6746 }
6747 return words;
6748 }
6749
6750 /* Compute alignment needed. We align all types to natural boundaries with
6751 exception of XFmode that is aligned to 64bits. */
6752 if (mode != VOIDmode && mode != BLKmode)
6753 {
6754 int mode_alignment = GET_MODE_BITSIZE (mode);
6755
6756 if (mode == XFmode)
6757 mode_alignment = 128;
6758 else if (mode == XCmode)
6759 mode_alignment = 256;
6760 if (COMPLEX_MODE_P (mode))
6761 mode_alignment /= 2;
6762 /* Misaligned fields are always returned in memory. */
6763 if (bit_offset % mode_alignment)
6764 return 0;
6765 }
6766
6767 /* for V1xx modes, just use the base mode */
6768 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6769 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6770 mode = GET_MODE_INNER (mode);
6771
6772 /* Classification of atomic types. */
6773 switch (mode)
6774 {
6775 case SDmode:
6776 case DDmode:
6777 classes[0] = X86_64_SSE_CLASS;
6778 return 1;
6779 case TDmode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 return 2;
6783 case DImode:
6784 case SImode:
6785 case HImode:
6786 case QImode:
6787 case CSImode:
6788 case CHImode:
6789 case CQImode:
6790 {
6791 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6792
6793 /* Analyze last 128 bits only. */
6794 size = (size - 1) & 0x7f;
6795
6796 if (size < 32)
6797 {
6798 classes[0] = X86_64_INTEGERSI_CLASS;
6799 return 1;
6800 }
6801 else if (size < 64)
6802 {
6803 classes[0] = X86_64_INTEGER_CLASS;
6804 return 1;
6805 }
6806 else if (size < 64+32)
6807 {
6808 classes[0] = X86_64_INTEGER_CLASS;
6809 classes[1] = X86_64_INTEGERSI_CLASS;
6810 return 2;
6811 }
6812 else if (size < 64+64)
6813 {
6814 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6815 return 2;
6816 }
6817 else
6818 gcc_unreachable ();
6819 }
6820 case CDImode:
6821 case TImode:
6822 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6823 return 2;
6824 case COImode:
6825 case OImode:
6826 /* OImode shouldn't be used directly. */
6827 gcc_unreachable ();
6828 case CTImode:
6829 return 0;
6830 case SFmode:
6831 if (!(bit_offset % 64))
6832 classes[0] = X86_64_SSESF_CLASS;
6833 else
6834 classes[0] = X86_64_SSE_CLASS;
6835 return 1;
6836 case DFmode:
6837 classes[0] = X86_64_SSEDF_CLASS;
6838 return 1;
6839 case XFmode:
6840 classes[0] = X86_64_X87_CLASS;
6841 classes[1] = X86_64_X87UP_CLASS;
6842 return 2;
6843 case TFmode:
6844 classes[0] = X86_64_SSE_CLASS;
6845 classes[1] = X86_64_SSEUP_CLASS;
6846 return 2;
6847 case SCmode:
6848 classes[0] = X86_64_SSE_CLASS;
6849 if (!(bit_offset % 64))
6850 return 1;
6851 else
6852 {
6853 static bool warned;
6854
6855 if (!warned && warn_psabi)
6856 {
6857 warned = true;
6858 inform (input_location,
6859 "the ABI of passing structure with complex float"
6860 " member has changed in GCC 4.4");
6861 }
6862 classes[1] = X86_64_SSESF_CLASS;
6863 return 2;
6864 }
6865 case DCmode:
6866 classes[0] = X86_64_SSEDF_CLASS;
6867 classes[1] = X86_64_SSEDF_CLASS;
6868 return 2;
6869 case XCmode:
6870 classes[0] = X86_64_COMPLEX_X87_CLASS;
6871 return 1;
6872 case TCmode:
6873 /* This modes is larger than 16 bytes. */
6874 return 0;
6875 case V8SFmode:
6876 case V8SImode:
6877 case V32QImode:
6878 case V16HImode:
6879 case V4DFmode:
6880 case V4DImode:
6881 classes[0] = X86_64_SSE_CLASS;
6882 classes[1] = X86_64_SSEUP_CLASS;
6883 classes[2] = X86_64_SSEUP_CLASS;
6884 classes[3] = X86_64_SSEUP_CLASS;
6885 return 4;
6886 case V8DFmode:
6887 case V16SFmode:
6888 case V8DImode:
6889 case V16SImode:
6890 case V32HImode:
6891 case V64QImode:
6892 classes[0] = X86_64_SSE_CLASS;
6893 classes[1] = X86_64_SSEUP_CLASS;
6894 classes[2] = X86_64_SSEUP_CLASS;
6895 classes[3] = X86_64_SSEUP_CLASS;
6896 classes[4] = X86_64_SSEUP_CLASS;
6897 classes[5] = X86_64_SSEUP_CLASS;
6898 classes[6] = X86_64_SSEUP_CLASS;
6899 classes[7] = X86_64_SSEUP_CLASS;
6900 return 8;
6901 case V4SFmode:
6902 case V4SImode:
6903 case V16QImode:
6904 case V8HImode:
6905 case V2DFmode:
6906 case V2DImode:
6907 classes[0] = X86_64_SSE_CLASS;
6908 classes[1] = X86_64_SSEUP_CLASS;
6909 return 2;
6910 case V1TImode:
6911 case V1DImode:
6912 case V2SFmode:
6913 case V2SImode:
6914 case V4HImode:
6915 case V8QImode:
6916 classes[0] = X86_64_SSE_CLASS;
6917 return 1;
6918 case BLKmode:
6919 case VOIDmode:
6920 return 0;
6921 default:
6922 gcc_assert (VECTOR_MODE_P (mode));
6923
6924 if (bytes > 16)
6925 return 0;
6926
6927 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6928
6929 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6930 classes[0] = X86_64_INTEGERSI_CLASS;
6931 else
6932 classes[0] = X86_64_INTEGER_CLASS;
6933 classes[1] = X86_64_INTEGER_CLASS;
6934 return 1 + (bytes > 8);
6935 }
6936 }
6937
6938 /* Examine the argument and return set number of register required in each
6939 class. Return true iff parameter should be passed in memory. */
6940
6941 static bool
6942 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6943 int *int_nregs, int *sse_nregs)
6944 {
6945 enum x86_64_reg_class regclass[MAX_CLASSES];
6946 int n = classify_argument (mode, type, regclass, 0);
6947
6948 *int_nregs = 0;
6949 *sse_nregs = 0;
6950
6951 if (!n)
6952 return true;
6953 for (n--; n >= 0; n--)
6954 switch (regclass[n])
6955 {
6956 case X86_64_INTEGER_CLASS:
6957 case X86_64_INTEGERSI_CLASS:
6958 (*int_nregs)++;
6959 break;
6960 case X86_64_SSE_CLASS:
6961 case X86_64_SSESF_CLASS:
6962 case X86_64_SSEDF_CLASS:
6963 (*sse_nregs)++;
6964 break;
6965 case X86_64_NO_CLASS:
6966 case X86_64_SSEUP_CLASS:
6967 break;
6968 case X86_64_X87_CLASS:
6969 case X86_64_X87UP_CLASS:
6970 case X86_64_COMPLEX_X87_CLASS:
6971 if (!in_return)
6972 return true;
6973 break;
6974 case X86_64_MEMORY_CLASS:
6975 gcc_unreachable ();
6976 }
6977
6978 return false;
6979 }
6980
6981 /* Construct container for the argument used by GCC interface. See
6982 FUNCTION_ARG for the detailed description. */
6983
6984 static rtx
6985 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6986 const_tree type, int in_return, int nintregs, int nsseregs,
6987 const int *intreg, int sse_regno)
6988 {
6989 /* The following variables hold the static issued_error state. */
6990 static bool issued_sse_arg_error;
6991 static bool issued_sse_ret_error;
6992 static bool issued_x87_ret_error;
6993
6994 enum machine_mode tmpmode;
6995 int bytes =
6996 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6997 enum x86_64_reg_class regclass[MAX_CLASSES];
6998 int n;
6999 int i;
7000 int nexps = 0;
7001 int needed_sseregs, needed_intregs;
7002 rtx exp[MAX_CLASSES];
7003 rtx ret;
7004
7005 n = classify_argument (mode, type, regclass, 0);
7006 if (!n)
7007 return NULL;
7008 if (examine_argument (mode, type, in_return, &needed_intregs,
7009 &needed_sseregs))
7010 return NULL;
7011 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7012 return NULL;
7013
7014 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7015 some less clueful developer tries to use floating-point anyway. */
7016 if (needed_sseregs && !TARGET_SSE)
7017 {
7018 if (in_return)
7019 {
7020 if (!issued_sse_ret_error)
7021 {
7022 error ("SSE register return with SSE disabled");
7023 issued_sse_ret_error = true;
7024 }
7025 }
7026 else if (!issued_sse_arg_error)
7027 {
7028 error ("SSE register argument with SSE disabled");
7029 issued_sse_arg_error = true;
7030 }
7031 return NULL;
7032 }
7033
7034 /* Likewise, error if the ABI requires us to return values in the
7035 x87 registers and the user specified -mno-80387. */
7036 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7037 for (i = 0; i < n; i++)
7038 if (regclass[i] == X86_64_X87_CLASS
7039 || regclass[i] == X86_64_X87UP_CLASS
7040 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7041 {
7042 if (!issued_x87_ret_error)
7043 {
7044 error ("x87 register return with x87 disabled");
7045 issued_x87_ret_error = true;
7046 }
7047 return NULL;
7048 }
7049
7050 /* First construct simple cases. Avoid SCmode, since we want to use
7051 single register to pass this type. */
7052 if (n == 1 && mode != SCmode)
7053 switch (regclass[0])
7054 {
7055 case X86_64_INTEGER_CLASS:
7056 case X86_64_INTEGERSI_CLASS:
7057 return gen_rtx_REG (mode, intreg[0]);
7058 case X86_64_SSE_CLASS:
7059 case X86_64_SSESF_CLASS:
7060 case X86_64_SSEDF_CLASS:
7061 if (mode != BLKmode)
7062 return gen_reg_or_parallel (mode, orig_mode,
7063 SSE_REGNO (sse_regno));
7064 break;
7065 case X86_64_X87_CLASS:
7066 case X86_64_COMPLEX_X87_CLASS:
7067 return gen_rtx_REG (mode, FIRST_STACK_REG);
7068 case X86_64_NO_CLASS:
7069 /* Zero sized array, struct or class. */
7070 return NULL;
7071 default:
7072 gcc_unreachable ();
7073 }
7074 if (n == 2
7075 && regclass[0] == X86_64_SSE_CLASS
7076 && regclass[1] == X86_64_SSEUP_CLASS
7077 && mode != BLKmode)
7078 return gen_reg_or_parallel (mode, orig_mode,
7079 SSE_REGNO (sse_regno));
7080 if (n == 4
7081 && regclass[0] == X86_64_SSE_CLASS
7082 && regclass[1] == X86_64_SSEUP_CLASS
7083 && regclass[2] == X86_64_SSEUP_CLASS
7084 && regclass[3] == X86_64_SSEUP_CLASS
7085 && mode != BLKmode)
7086 return gen_reg_or_parallel (mode, orig_mode,
7087 SSE_REGNO (sse_regno));
7088 if (n == 8
7089 && regclass[0] == X86_64_SSE_CLASS
7090 && regclass[1] == X86_64_SSEUP_CLASS
7091 && regclass[2] == X86_64_SSEUP_CLASS
7092 && regclass[3] == X86_64_SSEUP_CLASS
7093 && regclass[4] == X86_64_SSEUP_CLASS
7094 && regclass[5] == X86_64_SSEUP_CLASS
7095 && regclass[6] == X86_64_SSEUP_CLASS
7096 && regclass[7] == X86_64_SSEUP_CLASS
7097 && mode != BLKmode)
7098 return gen_reg_or_parallel (mode, orig_mode,
7099 SSE_REGNO (sse_regno));
7100 if (n == 2
7101 && regclass[0] == X86_64_X87_CLASS
7102 && regclass[1] == X86_64_X87UP_CLASS)
7103 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7104
7105 if (n == 2
7106 && regclass[0] == X86_64_INTEGER_CLASS
7107 && regclass[1] == X86_64_INTEGER_CLASS
7108 && (mode == CDImode || mode == TImode)
7109 && intreg[0] + 1 == intreg[1])
7110 return gen_rtx_REG (mode, intreg[0]);
7111
7112 /* Otherwise figure out the entries of the PARALLEL. */
7113 for (i = 0; i < n; i++)
7114 {
7115 int pos;
7116
7117 switch (regclass[i])
7118 {
7119 case X86_64_NO_CLASS:
7120 break;
7121 case X86_64_INTEGER_CLASS:
7122 case X86_64_INTEGERSI_CLASS:
7123 /* Merge TImodes on aligned occasions here too. */
7124 if (i * 8 + 8 > bytes)
7125 tmpmode
7126 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7127 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7128 tmpmode = SImode;
7129 else
7130 tmpmode = DImode;
7131 /* We've requested 24 bytes we
7132 don't have mode for. Use DImode. */
7133 if (tmpmode == BLKmode)
7134 tmpmode = DImode;
7135 exp [nexps++]
7136 = gen_rtx_EXPR_LIST (VOIDmode,
7137 gen_rtx_REG (tmpmode, *intreg),
7138 GEN_INT (i*8));
7139 intreg++;
7140 break;
7141 case X86_64_SSESF_CLASS:
7142 exp [nexps++]
7143 = gen_rtx_EXPR_LIST (VOIDmode,
7144 gen_rtx_REG (SFmode,
7145 SSE_REGNO (sse_regno)),
7146 GEN_INT (i*8));
7147 sse_regno++;
7148 break;
7149 case X86_64_SSEDF_CLASS:
7150 exp [nexps++]
7151 = gen_rtx_EXPR_LIST (VOIDmode,
7152 gen_rtx_REG (DFmode,
7153 SSE_REGNO (sse_regno)),
7154 GEN_INT (i*8));
7155 sse_regno++;
7156 break;
7157 case X86_64_SSE_CLASS:
7158 pos = i;
7159 switch (n)
7160 {
7161 case 1:
7162 tmpmode = DImode;
7163 break;
7164 case 2:
7165 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7166 {
7167 tmpmode = TImode;
7168 i++;
7169 }
7170 else
7171 tmpmode = DImode;
7172 break;
7173 case 4:
7174 gcc_assert (i == 0
7175 && regclass[1] == X86_64_SSEUP_CLASS
7176 && regclass[2] == X86_64_SSEUP_CLASS
7177 && regclass[3] == X86_64_SSEUP_CLASS);
7178 tmpmode = OImode;
7179 i += 3;
7180 break;
7181 case 8:
7182 gcc_assert (i == 0
7183 && regclass[1] == X86_64_SSEUP_CLASS
7184 && regclass[2] == X86_64_SSEUP_CLASS
7185 && regclass[3] == X86_64_SSEUP_CLASS
7186 && regclass[4] == X86_64_SSEUP_CLASS
7187 && regclass[5] == X86_64_SSEUP_CLASS
7188 && regclass[6] == X86_64_SSEUP_CLASS
7189 && regclass[7] == X86_64_SSEUP_CLASS);
7190 tmpmode = XImode;
7191 i += 7;
7192 break;
7193 default:
7194 gcc_unreachable ();
7195 }
7196 exp [nexps++]
7197 = gen_rtx_EXPR_LIST (VOIDmode,
7198 gen_rtx_REG (tmpmode,
7199 SSE_REGNO (sse_regno)),
7200 GEN_INT (pos*8));
7201 sse_regno++;
7202 break;
7203 default:
7204 gcc_unreachable ();
7205 }
7206 }
7207
7208 /* Empty aligned struct, union or class. */
7209 if (nexps == 0)
7210 return NULL;
7211
7212 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7213 for (i = 0; i < nexps; i++)
7214 XVECEXP (ret, 0, i) = exp [i];
7215 return ret;
7216 }
7217
7218 /* Update the data in CUM to advance over an argument of mode MODE
7219 and data type TYPE. (TYPE is null for libcalls where that information
7220 may not be available.) */
7221
7222 static void
7223 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7224 const_tree type, HOST_WIDE_INT bytes,
7225 HOST_WIDE_INT words)
7226 {
7227 switch (mode)
7228 {
7229 default:
7230 break;
7231
7232 case BLKmode:
7233 if (bytes < 0)
7234 break;
7235 /* FALLTHRU */
7236
7237 case DImode:
7238 case SImode:
7239 case HImode:
7240 case QImode:
7241 cum->words += words;
7242 cum->nregs -= words;
7243 cum->regno += words;
7244
7245 if (cum->nregs <= 0)
7246 {
7247 cum->nregs = 0;
7248 cum->regno = 0;
7249 }
7250 break;
7251
7252 case OImode:
7253 /* OImode shouldn't be used directly. */
7254 gcc_unreachable ();
7255
7256 case DFmode:
7257 if (cum->float_in_sse < 2)
7258 break;
7259 case SFmode:
7260 if (cum->float_in_sse < 1)
7261 break;
7262 /* FALLTHRU */
7263
7264 case V8SFmode:
7265 case V8SImode:
7266 case V64QImode:
7267 case V32HImode:
7268 case V16SImode:
7269 case V8DImode:
7270 case V16SFmode:
7271 case V8DFmode:
7272 case V32QImode:
7273 case V16HImode:
7274 case V4DFmode:
7275 case V4DImode:
7276 case TImode:
7277 case V16QImode:
7278 case V8HImode:
7279 case V4SImode:
7280 case V2DImode:
7281 case V4SFmode:
7282 case V2DFmode:
7283 if (!type || !AGGREGATE_TYPE_P (type))
7284 {
7285 cum->sse_words += words;
7286 cum->sse_nregs -= 1;
7287 cum->sse_regno += 1;
7288 if (cum->sse_nregs <= 0)
7289 {
7290 cum->sse_nregs = 0;
7291 cum->sse_regno = 0;
7292 }
7293 }
7294 break;
7295
7296 case V8QImode:
7297 case V4HImode:
7298 case V2SImode:
7299 case V2SFmode:
7300 case V1TImode:
7301 case V1DImode:
7302 if (!type || !AGGREGATE_TYPE_P (type))
7303 {
7304 cum->mmx_words += words;
7305 cum->mmx_nregs -= 1;
7306 cum->mmx_regno += 1;
7307 if (cum->mmx_nregs <= 0)
7308 {
7309 cum->mmx_nregs = 0;
7310 cum->mmx_regno = 0;
7311 }
7312 }
7313 break;
7314 }
7315 }
7316
7317 static void
7318 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7319 const_tree type, HOST_WIDE_INT words, bool named)
7320 {
7321 int int_nregs, sse_nregs;
7322
7323 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7324 if (!named && (VALID_AVX512F_REG_MODE (mode)
7325 || VALID_AVX256_REG_MODE (mode)))
7326 return;
7327
7328 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7329 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7330 {
7331 cum->nregs -= int_nregs;
7332 cum->sse_nregs -= sse_nregs;
7333 cum->regno += int_nregs;
7334 cum->sse_regno += sse_nregs;
7335 }
7336 else
7337 {
7338 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7339 cum->words = (cum->words + align - 1) & ~(align - 1);
7340 cum->words += words;
7341 }
7342 }
7343
7344 static void
7345 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7346 HOST_WIDE_INT words)
7347 {
7348 /* Otherwise, this should be passed indirect. */
7349 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7350
7351 cum->words += words;
7352 if (cum->nregs > 0)
7353 {
7354 cum->nregs -= 1;
7355 cum->regno += 1;
7356 }
7357 }
7358
7359 /* Update the data in CUM to advance over an argument of mode MODE and
7360 data type TYPE. (TYPE is null for libcalls where that information
7361 may not be available.) */
7362
7363 static void
7364 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7365 const_tree type, bool named)
7366 {
7367 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7368 HOST_WIDE_INT bytes, words;
7369
7370 if (mode == BLKmode)
7371 bytes = int_size_in_bytes (type);
7372 else
7373 bytes = GET_MODE_SIZE (mode);
7374 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7375
7376 if (type)
7377 mode = type_natural_mode (type, NULL, false);
7378
7379 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7380 function_arg_advance_ms_64 (cum, bytes, words);
7381 else if (TARGET_64BIT)
7382 function_arg_advance_64 (cum, mode, type, words, named);
7383 else
7384 function_arg_advance_32 (cum, mode, type, bytes, words);
7385 }
7386
7387 /* Define where to put the arguments to a function.
7388 Value is zero to push the argument on the stack,
7389 or a hard register in which to store the argument.
7390
7391 MODE is the argument's machine mode.
7392 TYPE is the data type of the argument (as a tree).
7393 This is null for libcalls where that information may
7394 not be available.
7395 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7396 the preceding args and about the function being called.
7397 NAMED is nonzero if this argument is a named parameter
7398 (otherwise it is an extra parameter matching an ellipsis). */
7399
7400 static rtx
7401 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7402 enum machine_mode orig_mode, const_tree type,
7403 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7404 {
7405 /* Avoid the AL settings for the Unix64 ABI. */
7406 if (mode == VOIDmode)
7407 return constm1_rtx;
7408
7409 switch (mode)
7410 {
7411 default:
7412 break;
7413
7414 case BLKmode:
7415 if (bytes < 0)
7416 break;
7417 /* FALLTHRU */
7418 case DImode:
7419 case SImode:
7420 case HImode:
7421 case QImode:
7422 if (words <= cum->nregs)
7423 {
7424 int regno = cum->regno;
7425
7426 /* Fastcall allocates the first two DWORD (SImode) or
7427 smaller arguments to ECX and EDX if it isn't an
7428 aggregate type . */
7429 if (cum->fastcall)
7430 {
7431 if (mode == BLKmode
7432 || mode == DImode
7433 || (type && AGGREGATE_TYPE_P (type)))
7434 break;
7435
7436 /* ECX not EAX is the first allocated register. */
7437 if (regno == AX_REG)
7438 regno = CX_REG;
7439 }
7440 return gen_rtx_REG (mode, regno);
7441 }
7442 break;
7443
7444 case DFmode:
7445 if (cum->float_in_sse < 2)
7446 break;
7447 case SFmode:
7448 if (cum->float_in_sse < 1)
7449 break;
7450 /* FALLTHRU */
7451 case TImode:
7452 /* In 32bit, we pass TImode in xmm registers. */
7453 case V16QImode:
7454 case V8HImode:
7455 case V4SImode:
7456 case V2DImode:
7457 case V4SFmode:
7458 case V2DFmode:
7459 if (!type || !AGGREGATE_TYPE_P (type))
7460 {
7461 if (cum->sse_nregs)
7462 return gen_reg_or_parallel (mode, orig_mode,
7463 cum->sse_regno + FIRST_SSE_REG);
7464 }
7465 break;
7466
7467 case OImode:
7468 case XImode:
7469 /* OImode and XImode shouldn't be used directly. */
7470 gcc_unreachable ();
7471
7472 case V64QImode:
7473 case V32HImode:
7474 case V16SImode:
7475 case V8DImode:
7476 case V16SFmode:
7477 case V8DFmode:
7478 case V8SFmode:
7479 case V8SImode:
7480 case V32QImode:
7481 case V16HImode:
7482 case V4DFmode:
7483 case V4DImode:
7484 if (!type || !AGGREGATE_TYPE_P (type))
7485 {
7486 if (cum->sse_nregs)
7487 return gen_reg_or_parallel (mode, orig_mode,
7488 cum->sse_regno + FIRST_SSE_REG);
7489 }
7490 break;
7491
7492 case V8QImode:
7493 case V4HImode:
7494 case V2SImode:
7495 case V2SFmode:
7496 case V1TImode:
7497 case V1DImode:
7498 if (!type || !AGGREGATE_TYPE_P (type))
7499 {
7500 if (cum->mmx_nregs)
7501 return gen_reg_or_parallel (mode, orig_mode,
7502 cum->mmx_regno + FIRST_MMX_REG);
7503 }
7504 break;
7505 }
7506
7507 return NULL_RTX;
7508 }
7509
7510 static rtx
7511 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7512 enum machine_mode orig_mode, const_tree type, bool named)
7513 {
7514 /* Handle a hidden AL argument containing number of registers
7515 for varargs x86-64 functions. */
7516 if (mode == VOIDmode)
7517 return GEN_INT (cum->maybe_vaarg
7518 ? (cum->sse_nregs < 0
7519 ? X86_64_SSE_REGPARM_MAX
7520 : cum->sse_regno)
7521 : -1);
7522
7523 switch (mode)
7524 {
7525 default:
7526 break;
7527
7528 case V8SFmode:
7529 case V8SImode:
7530 case V32QImode:
7531 case V16HImode:
7532 case V4DFmode:
7533 case V4DImode:
7534 case V16SFmode:
7535 case V16SImode:
7536 case V64QImode:
7537 case V32HImode:
7538 case V8DFmode:
7539 case V8DImode:
7540 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7541 if (!named)
7542 return NULL;
7543 break;
7544 }
7545
7546 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7547 cum->sse_nregs,
7548 &x86_64_int_parameter_registers [cum->regno],
7549 cum->sse_regno);
7550 }
7551
7552 static rtx
7553 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7554 enum machine_mode orig_mode, bool named,
7555 HOST_WIDE_INT bytes)
7556 {
7557 unsigned int regno;
7558
7559 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7560 We use value of -2 to specify that current function call is MSABI. */
7561 if (mode == VOIDmode)
7562 return GEN_INT (-2);
7563
7564 /* If we've run out of registers, it goes on the stack. */
7565 if (cum->nregs == 0)
7566 return NULL_RTX;
7567
7568 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7569
7570 /* Only floating point modes are passed in anything but integer regs. */
7571 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7572 {
7573 if (named)
7574 regno = cum->regno + FIRST_SSE_REG;
7575 else
7576 {
7577 rtx t1, t2;
7578
7579 /* Unnamed floating parameters are passed in both the
7580 SSE and integer registers. */
7581 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7582 t2 = gen_rtx_REG (mode, regno);
7583 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7584 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7585 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7586 }
7587 }
7588 /* Handle aggregated types passed in register. */
7589 if (orig_mode == BLKmode)
7590 {
7591 if (bytes > 0 && bytes <= 8)
7592 mode = (bytes > 4 ? DImode : SImode);
7593 if (mode == BLKmode)
7594 mode = DImode;
7595 }
7596
7597 return gen_reg_or_parallel (mode, orig_mode, regno);
7598 }
7599
7600 /* Return where to put the arguments to a function.
7601 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7602
7603 MODE is the argument's machine mode. TYPE is the data type of the
7604 argument. It is null for libcalls where that information may not be
7605 available. CUM gives information about the preceding args and about
7606 the function being called. NAMED is nonzero if this argument is a
7607 named parameter (otherwise it is an extra parameter matching an
7608 ellipsis). */
7609
7610 static rtx
7611 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7612 const_tree type, bool named)
7613 {
7614 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7615 enum machine_mode mode = omode;
7616 HOST_WIDE_INT bytes, words;
7617 rtx arg;
7618
7619 if (mode == BLKmode)
7620 bytes = int_size_in_bytes (type);
7621 else
7622 bytes = GET_MODE_SIZE (mode);
7623 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7624
7625 /* To simplify the code below, represent vector types with a vector mode
7626 even if MMX/SSE are not active. */
7627 if (type && TREE_CODE (type) == VECTOR_TYPE)
7628 mode = type_natural_mode (type, cum, false);
7629
7630 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7631 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7632 else if (TARGET_64BIT)
7633 arg = function_arg_64 (cum, mode, omode, type, named);
7634 else
7635 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7636
7637 return arg;
7638 }
7639
7640 /* A C expression that indicates when an argument must be passed by
7641 reference. If nonzero for an argument, a copy of that argument is
7642 made in memory and a pointer to the argument is passed instead of
7643 the argument itself. The pointer is passed in whatever way is
7644 appropriate for passing a pointer to that type. */
7645
7646 static bool
7647 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7648 const_tree type, bool)
7649 {
7650 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7651
7652 /* See Windows x64 Software Convention. */
7653 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7654 {
7655 int msize = (int) GET_MODE_SIZE (mode);
7656 if (type)
7657 {
7658 /* Arrays are passed by reference. */
7659 if (TREE_CODE (type) == ARRAY_TYPE)
7660 return true;
7661
7662 if (AGGREGATE_TYPE_P (type))
7663 {
7664 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7665 are passed by reference. */
7666 msize = int_size_in_bytes (type);
7667 }
7668 }
7669
7670 /* __m128 is passed by reference. */
7671 switch (msize) {
7672 case 1: case 2: case 4: case 8:
7673 break;
7674 default:
7675 return true;
7676 }
7677 }
7678 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7679 return 1;
7680
7681 return 0;
7682 }
7683
7684 /* Return true when TYPE should be 128bit aligned for 32bit argument
7685 passing ABI. XXX: This function is obsolete and is only used for
7686 checking psABI compatibility with previous versions of GCC. */
7687
7688 static bool
7689 ix86_compat_aligned_value_p (const_tree type)
7690 {
7691 enum machine_mode mode = TYPE_MODE (type);
7692 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7693 || mode == TDmode
7694 || mode == TFmode
7695 || mode == TCmode)
7696 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7697 return true;
7698 if (TYPE_ALIGN (type) < 128)
7699 return false;
7700
7701 if (AGGREGATE_TYPE_P (type))
7702 {
7703 /* Walk the aggregates recursively. */
7704 switch (TREE_CODE (type))
7705 {
7706 case RECORD_TYPE:
7707 case UNION_TYPE:
7708 case QUAL_UNION_TYPE:
7709 {
7710 tree field;
7711
7712 /* Walk all the structure fields. */
7713 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7714 {
7715 if (TREE_CODE (field) == FIELD_DECL
7716 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7717 return true;
7718 }
7719 break;
7720 }
7721
7722 case ARRAY_TYPE:
7723 /* Just for use if some languages passes arrays by value. */
7724 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7725 return true;
7726 break;
7727
7728 default:
7729 gcc_unreachable ();
7730 }
7731 }
7732 return false;
7733 }
7734
7735 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7736 XXX: This function is obsolete and is only used for checking psABI
7737 compatibility with previous versions of GCC. */
7738
7739 static unsigned int
7740 ix86_compat_function_arg_boundary (enum machine_mode mode,
7741 const_tree type, unsigned int align)
7742 {
7743 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7744 natural boundaries. */
7745 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7746 {
7747 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7748 make an exception for SSE modes since these require 128bit
7749 alignment.
7750
7751 The handling here differs from field_alignment. ICC aligns MMX
7752 arguments to 4 byte boundaries, while structure fields are aligned
7753 to 8 byte boundaries. */
7754 if (!type)
7755 {
7756 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7757 align = PARM_BOUNDARY;
7758 }
7759 else
7760 {
7761 if (!ix86_compat_aligned_value_p (type))
7762 align = PARM_BOUNDARY;
7763 }
7764 }
7765 if (align > BIGGEST_ALIGNMENT)
7766 align = BIGGEST_ALIGNMENT;
7767 return align;
7768 }
7769
7770 /* Return true when TYPE should be 128bit aligned for 32bit argument
7771 passing ABI. */
7772
7773 static bool
7774 ix86_contains_aligned_value_p (const_tree type)
7775 {
7776 enum machine_mode mode = TYPE_MODE (type);
7777
7778 if (mode == XFmode || mode == XCmode)
7779 return false;
7780
7781 if (TYPE_ALIGN (type) < 128)
7782 return false;
7783
7784 if (AGGREGATE_TYPE_P (type))
7785 {
7786 /* Walk the aggregates recursively. */
7787 switch (TREE_CODE (type))
7788 {
7789 case RECORD_TYPE:
7790 case UNION_TYPE:
7791 case QUAL_UNION_TYPE:
7792 {
7793 tree field;
7794
7795 /* Walk all the structure fields. */
7796 for (field = TYPE_FIELDS (type);
7797 field;
7798 field = DECL_CHAIN (field))
7799 {
7800 if (TREE_CODE (field) == FIELD_DECL
7801 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7802 return true;
7803 }
7804 break;
7805 }
7806
7807 case ARRAY_TYPE:
7808 /* Just for use if some languages passes arrays by value. */
7809 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7810 return true;
7811 break;
7812
7813 default:
7814 gcc_unreachable ();
7815 }
7816 }
7817 else
7818 return TYPE_ALIGN (type) >= 128;
7819
7820 return false;
7821 }
7822
7823 /* Gives the alignment boundary, in bits, of an argument with the
7824 specified mode and type. */
7825
7826 static unsigned int
7827 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7828 {
7829 unsigned int align;
7830 if (type)
7831 {
7832 /* Since the main variant type is used for call, we convert it to
7833 the main variant type. */
7834 type = TYPE_MAIN_VARIANT (type);
7835 align = TYPE_ALIGN (type);
7836 }
7837 else
7838 align = GET_MODE_ALIGNMENT (mode);
7839 if (align < PARM_BOUNDARY)
7840 align = PARM_BOUNDARY;
7841 else
7842 {
7843 static bool warned;
7844 unsigned int saved_align = align;
7845
7846 if (!TARGET_64BIT)
7847 {
7848 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7849 if (!type)
7850 {
7851 if (mode == XFmode || mode == XCmode)
7852 align = PARM_BOUNDARY;
7853 }
7854 else if (!ix86_contains_aligned_value_p (type))
7855 align = PARM_BOUNDARY;
7856
7857 if (align < 128)
7858 align = PARM_BOUNDARY;
7859 }
7860
7861 if (warn_psabi
7862 && !warned
7863 && align != ix86_compat_function_arg_boundary (mode, type,
7864 saved_align))
7865 {
7866 warned = true;
7867 inform (input_location,
7868 "The ABI for passing parameters with %d-byte"
7869 " alignment has changed in GCC 4.6",
7870 align / BITS_PER_UNIT);
7871 }
7872 }
7873
7874 return align;
7875 }
7876
7877 /* Return true if N is a possible register number of function value. */
7878
7879 static bool
7880 ix86_function_value_regno_p (const unsigned int regno)
7881 {
7882 switch (regno)
7883 {
7884 case AX_REG:
7885 return true;
7886 case DX_REG:
7887 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7888 case DI_REG:
7889 case SI_REG:
7890 return TARGET_64BIT && ix86_abi != MS_ABI;
7891
7892 /* Complex values are returned in %st(0)/%st(1) pair. */
7893 case ST0_REG:
7894 case ST1_REG:
7895 /* TODO: The function should depend on current function ABI but
7896 builtins.c would need updating then. Therefore we use the
7897 default ABI. */
7898 if (TARGET_64BIT && ix86_abi == MS_ABI)
7899 return false;
7900 return TARGET_FLOAT_RETURNS_IN_80387;
7901
7902 /* Complex values are returned in %xmm0/%xmm1 pair. */
7903 case XMM0_REG:
7904 case XMM1_REG:
7905 return TARGET_SSE;
7906
7907 case MM0_REG:
7908 if (TARGET_MACHO || TARGET_64BIT)
7909 return false;
7910 return TARGET_MMX;
7911 }
7912
7913 return false;
7914 }
7915
7916 /* Define how to find the value returned by a function.
7917 VALTYPE is the data type of the value (as a tree).
7918 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7919 otherwise, FUNC is 0. */
7920
7921 static rtx
7922 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7923 const_tree fntype, const_tree fn)
7924 {
7925 unsigned int regno;
7926
7927 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7928 we normally prevent this case when mmx is not available. However
7929 some ABIs may require the result to be returned like DImode. */
7930 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7931 regno = FIRST_MMX_REG;
7932
7933 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7934 we prevent this case when sse is not available. However some ABIs
7935 may require the result to be returned like integer TImode. */
7936 else if (mode == TImode
7937 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7938 regno = FIRST_SSE_REG;
7939
7940 /* 32-byte vector modes in %ymm0. */
7941 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7942 regno = FIRST_SSE_REG;
7943
7944 /* 64-byte vector modes in %zmm0. */
7945 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7946 regno = FIRST_SSE_REG;
7947
7948 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7949 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7950 regno = FIRST_FLOAT_REG;
7951 else
7952 /* Most things go in %eax. */
7953 regno = AX_REG;
7954
7955 /* Override FP return register with %xmm0 for local functions when
7956 SSE math is enabled or for functions with sseregparm attribute. */
7957 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7958 {
7959 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7960 if ((sse_level >= 1 && mode == SFmode)
7961 || (sse_level == 2 && mode == DFmode))
7962 regno = FIRST_SSE_REG;
7963 }
7964
7965 /* OImode shouldn't be used directly. */
7966 gcc_assert (mode != OImode);
7967
7968 return gen_rtx_REG (orig_mode, regno);
7969 }
7970
7971 static rtx
7972 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7973 const_tree valtype)
7974 {
7975 rtx ret;
7976
7977 /* Handle libcalls, which don't provide a type node. */
7978 if (valtype == NULL)
7979 {
7980 unsigned int regno;
7981
7982 switch (mode)
7983 {
7984 case SFmode:
7985 case SCmode:
7986 case DFmode:
7987 case DCmode:
7988 case TFmode:
7989 case SDmode:
7990 case DDmode:
7991 case TDmode:
7992 regno = FIRST_SSE_REG;
7993 break;
7994 case XFmode:
7995 case XCmode:
7996 regno = FIRST_FLOAT_REG;
7997 break;
7998 case TCmode:
7999 return NULL;
8000 default:
8001 regno = AX_REG;
8002 }
8003
8004 return gen_rtx_REG (mode, regno);
8005 }
8006 else if (POINTER_TYPE_P (valtype))
8007 {
8008 /* Pointers are always returned in word_mode. */
8009 mode = word_mode;
8010 }
8011
8012 ret = construct_container (mode, orig_mode, valtype, 1,
8013 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8014 x86_64_int_return_registers, 0);
8015
8016 /* For zero sized structures, construct_container returns NULL, but we
8017 need to keep rest of compiler happy by returning meaningful value. */
8018 if (!ret)
8019 ret = gen_rtx_REG (orig_mode, AX_REG);
8020
8021 return ret;
8022 }
8023
8024 static rtx
8025 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
8026 const_tree valtype)
8027 {
8028 unsigned int regno = AX_REG;
8029
8030 if (TARGET_SSE)
8031 {
8032 switch (GET_MODE_SIZE (mode))
8033 {
8034 case 16:
8035 if (valtype != NULL_TREE
8036 && !VECTOR_INTEGER_TYPE_P (valtype)
8037 && !VECTOR_INTEGER_TYPE_P (valtype)
8038 && !INTEGRAL_TYPE_P (valtype)
8039 && !VECTOR_FLOAT_TYPE_P (valtype))
8040 break;
8041 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8042 && !COMPLEX_MODE_P (mode))
8043 regno = FIRST_SSE_REG;
8044 break;
8045 case 8:
8046 case 4:
8047 if (mode == SFmode || mode == DFmode)
8048 regno = FIRST_SSE_REG;
8049 break;
8050 default:
8051 break;
8052 }
8053 }
8054 return gen_rtx_REG (orig_mode, regno);
8055 }
8056
8057 static rtx
8058 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8059 enum machine_mode orig_mode, enum machine_mode mode)
8060 {
8061 const_tree fn, fntype;
8062
8063 fn = NULL_TREE;
8064 if (fntype_or_decl && DECL_P (fntype_or_decl))
8065 fn = fntype_or_decl;
8066 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8067
8068 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8069 return function_value_ms_64 (orig_mode, mode, valtype);
8070 else if (TARGET_64BIT)
8071 return function_value_64 (orig_mode, mode, valtype);
8072 else
8073 return function_value_32 (orig_mode, mode, fntype, fn);
8074 }
8075
8076 static rtx
8077 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8078 {
8079 enum machine_mode mode, orig_mode;
8080
8081 orig_mode = TYPE_MODE (valtype);
8082 mode = type_natural_mode (valtype, NULL, true);
8083 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8084 }
8085
8086 /* Pointer function arguments and return values are promoted to
8087 word_mode. */
8088
8089 static enum machine_mode
8090 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8091 int *punsignedp, const_tree fntype,
8092 int for_return)
8093 {
8094 if (type != NULL_TREE && POINTER_TYPE_P (type))
8095 {
8096 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8097 return word_mode;
8098 }
8099 return default_promote_function_mode (type, mode, punsignedp, fntype,
8100 for_return);
8101 }
8102
8103 /* Return true if a structure, union or array with MODE containing FIELD
8104 should be accessed using BLKmode. */
8105
8106 static bool
8107 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8108 {
8109 /* Union with XFmode must be in BLKmode. */
8110 return (mode == XFmode
8111 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8112 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8113 }
8114
8115 rtx
8116 ix86_libcall_value (enum machine_mode mode)
8117 {
8118 return ix86_function_value_1 (NULL, NULL, mode, mode);
8119 }
8120
8121 /* Return true iff type is returned in memory. */
8122
8123 static bool
8124 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8125 {
8126 #ifdef SUBTARGET_RETURN_IN_MEMORY
8127 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8128 #else
8129 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8130 HOST_WIDE_INT size;
8131
8132 if (TARGET_64BIT)
8133 {
8134 if (ix86_function_type_abi (fntype) == MS_ABI)
8135 {
8136 size = int_size_in_bytes (type);
8137
8138 /* __m128 is returned in xmm0. */
8139 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8140 || INTEGRAL_TYPE_P (type)
8141 || VECTOR_FLOAT_TYPE_P (type))
8142 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8143 && !COMPLEX_MODE_P (mode)
8144 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8145 return false;
8146
8147 /* Otherwise, the size must be exactly in [1248]. */
8148 return size != 1 && size != 2 && size != 4 && size != 8;
8149 }
8150 else
8151 {
8152 int needed_intregs, needed_sseregs;
8153
8154 return examine_argument (mode, type, 1,
8155 &needed_intregs, &needed_sseregs);
8156 }
8157 }
8158 else
8159 {
8160 if (mode == BLKmode)
8161 return true;
8162
8163 size = int_size_in_bytes (type);
8164
8165 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8166 return false;
8167
8168 if (VECTOR_MODE_P (mode) || mode == TImode)
8169 {
8170 /* User-created vectors small enough to fit in EAX. */
8171 if (size < 8)
8172 return false;
8173
8174 /* Unless ABI prescibes otherwise,
8175 MMX/3dNow values are returned in MM0 if available. */
8176
8177 if (size == 8)
8178 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8179
8180 /* SSE values are returned in XMM0 if available. */
8181 if (size == 16)
8182 return !TARGET_SSE;
8183
8184 /* AVX values are returned in YMM0 if available. */
8185 if (size == 32)
8186 return !TARGET_AVX;
8187
8188 /* AVX512F values are returned in ZMM0 if available. */
8189 if (size == 64)
8190 return !TARGET_AVX512F;
8191 }
8192
8193 if (mode == XFmode)
8194 return false;
8195
8196 if (size > 12)
8197 return true;
8198
8199 /* OImode shouldn't be used directly. */
8200 gcc_assert (mode != OImode);
8201
8202 return false;
8203 }
8204 #endif
8205 }
8206
8207 \f
8208 /* Create the va_list data type. */
8209
8210 /* Returns the calling convention specific va_list date type.
8211 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8212
8213 static tree
8214 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8215 {
8216 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8217
8218 /* For i386 we use plain pointer to argument area. */
8219 if (!TARGET_64BIT || abi == MS_ABI)
8220 return build_pointer_type (char_type_node);
8221
8222 record = lang_hooks.types.make_type (RECORD_TYPE);
8223 type_decl = build_decl (BUILTINS_LOCATION,
8224 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8225
8226 f_gpr = build_decl (BUILTINS_LOCATION,
8227 FIELD_DECL, get_identifier ("gp_offset"),
8228 unsigned_type_node);
8229 f_fpr = build_decl (BUILTINS_LOCATION,
8230 FIELD_DECL, get_identifier ("fp_offset"),
8231 unsigned_type_node);
8232 f_ovf = build_decl (BUILTINS_LOCATION,
8233 FIELD_DECL, get_identifier ("overflow_arg_area"),
8234 ptr_type_node);
8235 f_sav = build_decl (BUILTINS_LOCATION,
8236 FIELD_DECL, get_identifier ("reg_save_area"),
8237 ptr_type_node);
8238
8239 va_list_gpr_counter_field = f_gpr;
8240 va_list_fpr_counter_field = f_fpr;
8241
8242 DECL_FIELD_CONTEXT (f_gpr) = record;
8243 DECL_FIELD_CONTEXT (f_fpr) = record;
8244 DECL_FIELD_CONTEXT (f_ovf) = record;
8245 DECL_FIELD_CONTEXT (f_sav) = record;
8246
8247 TYPE_STUB_DECL (record) = type_decl;
8248 TYPE_NAME (record) = type_decl;
8249 TYPE_FIELDS (record) = f_gpr;
8250 DECL_CHAIN (f_gpr) = f_fpr;
8251 DECL_CHAIN (f_fpr) = f_ovf;
8252 DECL_CHAIN (f_ovf) = f_sav;
8253
8254 layout_type (record);
8255
8256 /* The correct type is an array type of one element. */
8257 return build_array_type (record, build_index_type (size_zero_node));
8258 }
8259
8260 /* Setup the builtin va_list data type and for 64-bit the additional
8261 calling convention specific va_list data types. */
8262
8263 static tree
8264 ix86_build_builtin_va_list (void)
8265 {
8266 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8267
8268 /* Initialize abi specific va_list builtin types. */
8269 if (TARGET_64BIT)
8270 {
8271 tree t;
8272 if (ix86_abi == MS_ABI)
8273 {
8274 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8275 if (TREE_CODE (t) != RECORD_TYPE)
8276 t = build_variant_type_copy (t);
8277 sysv_va_list_type_node = t;
8278 }
8279 else
8280 {
8281 t = ret;
8282 if (TREE_CODE (t) != RECORD_TYPE)
8283 t = build_variant_type_copy (t);
8284 sysv_va_list_type_node = t;
8285 }
8286 if (ix86_abi != MS_ABI)
8287 {
8288 t = ix86_build_builtin_va_list_abi (MS_ABI);
8289 if (TREE_CODE (t) != RECORD_TYPE)
8290 t = build_variant_type_copy (t);
8291 ms_va_list_type_node = t;
8292 }
8293 else
8294 {
8295 t = ret;
8296 if (TREE_CODE (t) != RECORD_TYPE)
8297 t = build_variant_type_copy (t);
8298 ms_va_list_type_node = t;
8299 }
8300 }
8301
8302 return ret;
8303 }
8304
8305 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8306
8307 static void
8308 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8309 {
8310 rtx save_area, mem;
8311 alias_set_type set;
8312 int i, max;
8313
8314 /* GPR size of varargs save area. */
8315 if (cfun->va_list_gpr_size)
8316 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8317 else
8318 ix86_varargs_gpr_size = 0;
8319
8320 /* FPR size of varargs save area. We don't need it if we don't pass
8321 anything in SSE registers. */
8322 if (TARGET_SSE && cfun->va_list_fpr_size)
8323 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8324 else
8325 ix86_varargs_fpr_size = 0;
8326
8327 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8328 return;
8329
8330 save_area = frame_pointer_rtx;
8331 set = get_varargs_alias_set ();
8332
8333 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8334 if (max > X86_64_REGPARM_MAX)
8335 max = X86_64_REGPARM_MAX;
8336
8337 for (i = cum->regno; i < max; i++)
8338 {
8339 mem = gen_rtx_MEM (word_mode,
8340 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8341 MEM_NOTRAP_P (mem) = 1;
8342 set_mem_alias_set (mem, set);
8343 emit_move_insn (mem,
8344 gen_rtx_REG (word_mode,
8345 x86_64_int_parameter_registers[i]));
8346 }
8347
8348 if (ix86_varargs_fpr_size)
8349 {
8350 enum machine_mode smode;
8351 rtx_code_label *label;
8352 rtx test;
8353
8354 /* Now emit code to save SSE registers. The AX parameter contains number
8355 of SSE parameter registers used to call this function, though all we
8356 actually check here is the zero/non-zero status. */
8357
8358 label = gen_label_rtx ();
8359 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8360 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8361 label));
8362
8363 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8364 we used movdqa (i.e. TImode) instead? Perhaps even better would
8365 be if we could determine the real mode of the data, via a hook
8366 into pass_stdarg. Ignore all that for now. */
8367 smode = V4SFmode;
8368 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8369 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8370
8371 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8372 if (max > X86_64_SSE_REGPARM_MAX)
8373 max = X86_64_SSE_REGPARM_MAX;
8374
8375 for (i = cum->sse_regno; i < max; ++i)
8376 {
8377 mem = plus_constant (Pmode, save_area,
8378 i * 16 + ix86_varargs_gpr_size);
8379 mem = gen_rtx_MEM (smode, mem);
8380 MEM_NOTRAP_P (mem) = 1;
8381 set_mem_alias_set (mem, set);
8382 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8383
8384 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8385 }
8386
8387 emit_label (label);
8388 }
8389 }
8390
8391 static void
8392 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8393 {
8394 alias_set_type set = get_varargs_alias_set ();
8395 int i;
8396
8397 /* Reset to zero, as there might be a sysv vaarg used
8398 before. */
8399 ix86_varargs_gpr_size = 0;
8400 ix86_varargs_fpr_size = 0;
8401
8402 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8403 {
8404 rtx reg, mem;
8405
8406 mem = gen_rtx_MEM (Pmode,
8407 plus_constant (Pmode, virtual_incoming_args_rtx,
8408 i * UNITS_PER_WORD));
8409 MEM_NOTRAP_P (mem) = 1;
8410 set_mem_alias_set (mem, set);
8411
8412 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8413 emit_move_insn (mem, reg);
8414 }
8415 }
8416
8417 static void
8418 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8419 tree type, int *, int no_rtl)
8420 {
8421 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8422 CUMULATIVE_ARGS next_cum;
8423 tree fntype;
8424
8425 /* This argument doesn't appear to be used anymore. Which is good,
8426 because the old code here didn't suppress rtl generation. */
8427 gcc_assert (!no_rtl);
8428
8429 if (!TARGET_64BIT)
8430 return;
8431
8432 fntype = TREE_TYPE (current_function_decl);
8433
8434 /* For varargs, we do not want to skip the dummy va_dcl argument.
8435 For stdargs, we do want to skip the last named argument. */
8436 next_cum = *cum;
8437 if (stdarg_p (fntype))
8438 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8439 true);
8440
8441 if (cum->call_abi == MS_ABI)
8442 setup_incoming_varargs_ms_64 (&next_cum);
8443 else
8444 setup_incoming_varargs_64 (&next_cum);
8445 }
8446
8447 /* Checks if TYPE is of kind va_list char *. */
8448
8449 static bool
8450 is_va_list_char_pointer (tree type)
8451 {
8452 tree canonic;
8453
8454 /* For 32-bit it is always true. */
8455 if (!TARGET_64BIT)
8456 return true;
8457 canonic = ix86_canonical_va_list_type (type);
8458 return (canonic == ms_va_list_type_node
8459 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8460 }
8461
8462 /* Implement va_start. */
8463
8464 static void
8465 ix86_va_start (tree valist, rtx nextarg)
8466 {
8467 HOST_WIDE_INT words, n_gpr, n_fpr;
8468 tree f_gpr, f_fpr, f_ovf, f_sav;
8469 tree gpr, fpr, ovf, sav, t;
8470 tree type;
8471 rtx ovf_rtx;
8472
8473 if (flag_split_stack
8474 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8475 {
8476 unsigned int scratch_regno;
8477
8478 /* When we are splitting the stack, we can't refer to the stack
8479 arguments using internal_arg_pointer, because they may be on
8480 the old stack. The split stack prologue will arrange to
8481 leave a pointer to the old stack arguments in a scratch
8482 register, which we here copy to a pseudo-register. The split
8483 stack prologue can't set the pseudo-register directly because
8484 it (the prologue) runs before any registers have been saved. */
8485
8486 scratch_regno = split_stack_prologue_scratch_regno ();
8487 if (scratch_regno != INVALID_REGNUM)
8488 {
8489 rtx reg;
8490 rtx_insn *seq;
8491
8492 reg = gen_reg_rtx (Pmode);
8493 cfun->machine->split_stack_varargs_pointer = reg;
8494
8495 start_sequence ();
8496 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8497 seq = get_insns ();
8498 end_sequence ();
8499
8500 push_topmost_sequence ();
8501 emit_insn_after (seq, entry_of_function ());
8502 pop_topmost_sequence ();
8503 }
8504 }
8505
8506 /* Only 64bit target needs something special. */
8507 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8508 {
8509 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8510 std_expand_builtin_va_start (valist, nextarg);
8511 else
8512 {
8513 rtx va_r, next;
8514
8515 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8516 next = expand_binop (ptr_mode, add_optab,
8517 cfun->machine->split_stack_varargs_pointer,
8518 crtl->args.arg_offset_rtx,
8519 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8520 convert_move (va_r, next, 0);
8521 }
8522 return;
8523 }
8524
8525 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8526 f_fpr = DECL_CHAIN (f_gpr);
8527 f_ovf = DECL_CHAIN (f_fpr);
8528 f_sav = DECL_CHAIN (f_ovf);
8529
8530 valist = build_simple_mem_ref (valist);
8531 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8532 /* The following should be folded into the MEM_REF offset. */
8533 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8534 f_gpr, NULL_TREE);
8535 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8536 f_fpr, NULL_TREE);
8537 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8538 f_ovf, NULL_TREE);
8539 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8540 f_sav, NULL_TREE);
8541
8542 /* Count number of gp and fp argument registers used. */
8543 words = crtl->args.info.words;
8544 n_gpr = crtl->args.info.regno;
8545 n_fpr = crtl->args.info.sse_regno;
8546
8547 if (cfun->va_list_gpr_size)
8548 {
8549 type = TREE_TYPE (gpr);
8550 t = build2 (MODIFY_EXPR, type,
8551 gpr, build_int_cst (type, n_gpr * 8));
8552 TREE_SIDE_EFFECTS (t) = 1;
8553 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8554 }
8555
8556 if (TARGET_SSE && cfun->va_list_fpr_size)
8557 {
8558 type = TREE_TYPE (fpr);
8559 t = build2 (MODIFY_EXPR, type, fpr,
8560 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8561 TREE_SIDE_EFFECTS (t) = 1;
8562 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8563 }
8564
8565 /* Find the overflow area. */
8566 type = TREE_TYPE (ovf);
8567 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8568 ovf_rtx = crtl->args.internal_arg_pointer;
8569 else
8570 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8571 t = make_tree (type, ovf_rtx);
8572 if (words != 0)
8573 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8574 t = build2 (MODIFY_EXPR, type, ovf, t);
8575 TREE_SIDE_EFFECTS (t) = 1;
8576 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8577
8578 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8579 {
8580 /* Find the register save area.
8581 Prologue of the function save it right above stack frame. */
8582 type = TREE_TYPE (sav);
8583 t = make_tree (type, frame_pointer_rtx);
8584 if (!ix86_varargs_gpr_size)
8585 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8586 t = build2 (MODIFY_EXPR, type, sav, t);
8587 TREE_SIDE_EFFECTS (t) = 1;
8588 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8589 }
8590 }
8591
8592 /* Implement va_arg. */
8593
8594 static tree
8595 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8596 gimple_seq *post_p)
8597 {
8598 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8599 tree f_gpr, f_fpr, f_ovf, f_sav;
8600 tree gpr, fpr, ovf, sav, t;
8601 int size, rsize;
8602 tree lab_false, lab_over = NULL_TREE;
8603 tree addr, t2;
8604 rtx container;
8605 int indirect_p = 0;
8606 tree ptrtype;
8607 enum machine_mode nat_mode;
8608 unsigned int arg_boundary;
8609
8610 /* Only 64bit target needs something special. */
8611 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8612 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8613
8614 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8615 f_fpr = DECL_CHAIN (f_gpr);
8616 f_ovf = DECL_CHAIN (f_fpr);
8617 f_sav = DECL_CHAIN (f_ovf);
8618
8619 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8620 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8621 valist = build_va_arg_indirect_ref (valist);
8622 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8623 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8624 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8625
8626 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8627 if (indirect_p)
8628 type = build_pointer_type (type);
8629 size = int_size_in_bytes (type);
8630 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8631
8632 nat_mode = type_natural_mode (type, NULL, false);
8633 switch (nat_mode)
8634 {
8635 case V8SFmode:
8636 case V8SImode:
8637 case V32QImode:
8638 case V16HImode:
8639 case V4DFmode:
8640 case V4DImode:
8641 case V16SFmode:
8642 case V16SImode:
8643 case V64QImode:
8644 case V32HImode:
8645 case V8DFmode:
8646 case V8DImode:
8647 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8648 if (!TARGET_64BIT_MS_ABI)
8649 {
8650 container = NULL;
8651 break;
8652 }
8653
8654 default:
8655 container = construct_container (nat_mode, TYPE_MODE (type),
8656 type, 0, X86_64_REGPARM_MAX,
8657 X86_64_SSE_REGPARM_MAX, intreg,
8658 0);
8659 break;
8660 }
8661
8662 /* Pull the value out of the saved registers. */
8663
8664 addr = create_tmp_var (ptr_type_node, "addr");
8665
8666 if (container)
8667 {
8668 int needed_intregs, needed_sseregs;
8669 bool need_temp;
8670 tree int_addr, sse_addr;
8671
8672 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8673 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8674
8675 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8676
8677 need_temp = (!REG_P (container)
8678 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8679 || TYPE_ALIGN (type) > 128));
8680
8681 /* In case we are passing structure, verify that it is consecutive block
8682 on the register save area. If not we need to do moves. */
8683 if (!need_temp && !REG_P (container))
8684 {
8685 /* Verify that all registers are strictly consecutive */
8686 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8687 {
8688 int i;
8689
8690 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8691 {
8692 rtx slot = XVECEXP (container, 0, i);
8693 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8694 || INTVAL (XEXP (slot, 1)) != i * 16)
8695 need_temp = 1;
8696 }
8697 }
8698 else
8699 {
8700 int i;
8701
8702 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8703 {
8704 rtx slot = XVECEXP (container, 0, i);
8705 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8706 || INTVAL (XEXP (slot, 1)) != i * 8)
8707 need_temp = 1;
8708 }
8709 }
8710 }
8711 if (!need_temp)
8712 {
8713 int_addr = addr;
8714 sse_addr = addr;
8715 }
8716 else
8717 {
8718 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8719 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8720 }
8721
8722 /* First ensure that we fit completely in registers. */
8723 if (needed_intregs)
8724 {
8725 t = build_int_cst (TREE_TYPE (gpr),
8726 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8727 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8728 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8729 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8730 gimplify_and_add (t, pre_p);
8731 }
8732 if (needed_sseregs)
8733 {
8734 t = build_int_cst (TREE_TYPE (fpr),
8735 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8736 + X86_64_REGPARM_MAX * 8);
8737 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8738 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8739 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8740 gimplify_and_add (t, pre_p);
8741 }
8742
8743 /* Compute index to start of area used for integer regs. */
8744 if (needed_intregs)
8745 {
8746 /* int_addr = gpr + sav; */
8747 t = fold_build_pointer_plus (sav, gpr);
8748 gimplify_assign (int_addr, t, pre_p);
8749 }
8750 if (needed_sseregs)
8751 {
8752 /* sse_addr = fpr + sav; */
8753 t = fold_build_pointer_plus (sav, fpr);
8754 gimplify_assign (sse_addr, t, pre_p);
8755 }
8756 if (need_temp)
8757 {
8758 int i, prev_size = 0;
8759 tree temp = create_tmp_var (type, "va_arg_tmp");
8760
8761 /* addr = &temp; */
8762 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8763 gimplify_assign (addr, t, pre_p);
8764
8765 for (i = 0; i < XVECLEN (container, 0); i++)
8766 {
8767 rtx slot = XVECEXP (container, 0, i);
8768 rtx reg = XEXP (slot, 0);
8769 enum machine_mode mode = GET_MODE (reg);
8770 tree piece_type;
8771 tree addr_type;
8772 tree daddr_type;
8773 tree src_addr, src;
8774 int src_offset;
8775 tree dest_addr, dest;
8776 int cur_size = GET_MODE_SIZE (mode);
8777
8778 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8779 prev_size = INTVAL (XEXP (slot, 1));
8780 if (prev_size + cur_size > size)
8781 {
8782 cur_size = size - prev_size;
8783 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8784 if (mode == BLKmode)
8785 mode = QImode;
8786 }
8787 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8788 if (mode == GET_MODE (reg))
8789 addr_type = build_pointer_type (piece_type);
8790 else
8791 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8792 true);
8793 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8794 true);
8795
8796 if (SSE_REGNO_P (REGNO (reg)))
8797 {
8798 src_addr = sse_addr;
8799 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8800 }
8801 else
8802 {
8803 src_addr = int_addr;
8804 src_offset = REGNO (reg) * 8;
8805 }
8806 src_addr = fold_convert (addr_type, src_addr);
8807 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8808
8809 dest_addr = fold_convert (daddr_type, addr);
8810 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8811 if (cur_size == GET_MODE_SIZE (mode))
8812 {
8813 src = build_va_arg_indirect_ref (src_addr);
8814 dest = build_va_arg_indirect_ref (dest_addr);
8815
8816 gimplify_assign (dest, src, pre_p);
8817 }
8818 else
8819 {
8820 tree copy
8821 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8822 3, dest_addr, src_addr,
8823 size_int (cur_size));
8824 gimplify_and_add (copy, pre_p);
8825 }
8826 prev_size += cur_size;
8827 }
8828 }
8829
8830 if (needed_intregs)
8831 {
8832 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8833 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8834 gimplify_assign (gpr, t, pre_p);
8835 }
8836
8837 if (needed_sseregs)
8838 {
8839 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8840 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8841 gimplify_assign (fpr, t, pre_p);
8842 }
8843
8844 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8845
8846 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8847 }
8848
8849 /* ... otherwise out of the overflow area. */
8850
8851 /* When we align parameter on stack for caller, if the parameter
8852 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8853 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8854 here with caller. */
8855 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8856 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8857 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8858
8859 /* Care for on-stack alignment if needed. */
8860 if (arg_boundary <= 64 || size == 0)
8861 t = ovf;
8862 else
8863 {
8864 HOST_WIDE_INT align = arg_boundary / 8;
8865 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8866 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8867 build_int_cst (TREE_TYPE (t), -align));
8868 }
8869
8870 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8871 gimplify_assign (addr, t, pre_p);
8872
8873 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8874 gimplify_assign (unshare_expr (ovf), t, pre_p);
8875
8876 if (container)
8877 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8878
8879 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8880 addr = fold_convert (ptrtype, addr);
8881
8882 if (indirect_p)
8883 addr = build_va_arg_indirect_ref (addr);
8884 return build_va_arg_indirect_ref (addr);
8885 }
8886 \f
8887 /* Return true if OPNUM's MEM should be matched
8888 in movabs* patterns. */
8889
8890 bool
8891 ix86_check_movabs (rtx insn, int opnum)
8892 {
8893 rtx set, mem;
8894
8895 set = PATTERN (insn);
8896 if (GET_CODE (set) == PARALLEL)
8897 set = XVECEXP (set, 0, 0);
8898 gcc_assert (GET_CODE (set) == SET);
8899 mem = XEXP (set, opnum);
8900 while (GET_CODE (mem) == SUBREG)
8901 mem = SUBREG_REG (mem);
8902 gcc_assert (MEM_P (mem));
8903 return volatile_ok || !MEM_VOLATILE_P (mem);
8904 }
8905 \f
8906 /* Initialize the table of extra 80387 mathematical constants. */
8907
8908 static void
8909 init_ext_80387_constants (void)
8910 {
8911 static const char * cst[5] =
8912 {
8913 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8914 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8915 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8916 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8917 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8918 };
8919 int i;
8920
8921 for (i = 0; i < 5; i++)
8922 {
8923 real_from_string (&ext_80387_constants_table[i], cst[i]);
8924 /* Ensure each constant is rounded to XFmode precision. */
8925 real_convert (&ext_80387_constants_table[i],
8926 XFmode, &ext_80387_constants_table[i]);
8927 }
8928
8929 ext_80387_constants_init = 1;
8930 }
8931
8932 /* Return non-zero if the constant is something that
8933 can be loaded with a special instruction. */
8934
8935 int
8936 standard_80387_constant_p (rtx x)
8937 {
8938 enum machine_mode mode = GET_MODE (x);
8939
8940 REAL_VALUE_TYPE r;
8941
8942 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8943 return -1;
8944
8945 if (x == CONST0_RTX (mode))
8946 return 1;
8947 if (x == CONST1_RTX (mode))
8948 return 2;
8949
8950 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8951
8952 /* For XFmode constants, try to find a special 80387 instruction when
8953 optimizing for size or on those CPUs that benefit from them. */
8954 if (mode == XFmode
8955 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8956 {
8957 int i;
8958
8959 if (! ext_80387_constants_init)
8960 init_ext_80387_constants ();
8961
8962 for (i = 0; i < 5; i++)
8963 if (real_identical (&r, &ext_80387_constants_table[i]))
8964 return i + 3;
8965 }
8966
8967 /* Load of the constant -0.0 or -1.0 will be split as
8968 fldz;fchs or fld1;fchs sequence. */
8969 if (real_isnegzero (&r))
8970 return 8;
8971 if (real_identical (&r, &dconstm1))
8972 return 9;
8973
8974 return 0;
8975 }
8976
8977 /* Return the opcode of the special instruction to be used to load
8978 the constant X. */
8979
8980 const char *
8981 standard_80387_constant_opcode (rtx x)
8982 {
8983 switch (standard_80387_constant_p (x))
8984 {
8985 case 1:
8986 return "fldz";
8987 case 2:
8988 return "fld1";
8989 case 3:
8990 return "fldlg2";
8991 case 4:
8992 return "fldln2";
8993 case 5:
8994 return "fldl2e";
8995 case 6:
8996 return "fldl2t";
8997 case 7:
8998 return "fldpi";
8999 case 8:
9000 case 9:
9001 return "#";
9002 default:
9003 gcc_unreachable ();
9004 }
9005 }
9006
9007 /* Return the CONST_DOUBLE representing the 80387 constant that is
9008 loaded by the specified special instruction. The argument IDX
9009 matches the return value from standard_80387_constant_p. */
9010
9011 rtx
9012 standard_80387_constant_rtx (int idx)
9013 {
9014 int i;
9015
9016 if (! ext_80387_constants_init)
9017 init_ext_80387_constants ();
9018
9019 switch (idx)
9020 {
9021 case 3:
9022 case 4:
9023 case 5:
9024 case 6:
9025 case 7:
9026 i = idx - 3;
9027 break;
9028
9029 default:
9030 gcc_unreachable ();
9031 }
9032
9033 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9034 XFmode);
9035 }
9036
9037 /* Return 1 if X is all 0s and 2 if x is all 1s
9038 in supported SSE/AVX vector mode. */
9039
9040 int
9041 standard_sse_constant_p (rtx x)
9042 {
9043 enum machine_mode mode = GET_MODE (x);
9044
9045 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9046 return 1;
9047 if (vector_all_ones_operand (x, mode))
9048 switch (mode)
9049 {
9050 case V16QImode:
9051 case V8HImode:
9052 case V4SImode:
9053 case V2DImode:
9054 if (TARGET_SSE2)
9055 return 2;
9056 case V32QImode:
9057 case V16HImode:
9058 case V8SImode:
9059 case V4DImode:
9060 if (TARGET_AVX2)
9061 return 2;
9062 case V64QImode:
9063 case V32HImode:
9064 case V16SImode:
9065 case V8DImode:
9066 if (TARGET_AVX512F)
9067 return 2;
9068 default:
9069 break;
9070 }
9071
9072 return 0;
9073 }
9074
9075 /* Return the opcode of the special instruction to be used to load
9076 the constant X. */
9077
9078 const char *
9079 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9080 {
9081 switch (standard_sse_constant_p (x))
9082 {
9083 case 1:
9084 switch (get_attr_mode (insn))
9085 {
9086 case MODE_XI:
9087 return "vpxord\t%g0, %g0, %g0";
9088 case MODE_V16SF:
9089 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9090 : "vpxord\t%g0, %g0, %g0";
9091 case MODE_V8DF:
9092 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9093 : "vpxorq\t%g0, %g0, %g0";
9094 case MODE_TI:
9095 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9096 : "%vpxor\t%0, %d0";
9097 case MODE_V2DF:
9098 return "%vxorpd\t%0, %d0";
9099 case MODE_V4SF:
9100 return "%vxorps\t%0, %d0";
9101
9102 case MODE_OI:
9103 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9104 : "vpxor\t%x0, %x0, %x0";
9105 case MODE_V4DF:
9106 return "vxorpd\t%x0, %x0, %x0";
9107 case MODE_V8SF:
9108 return "vxorps\t%x0, %x0, %x0";
9109
9110 default:
9111 break;
9112 }
9113
9114 case 2:
9115 if (TARGET_AVX512VL
9116 || get_attr_mode (insn) == MODE_XI
9117 || get_attr_mode (insn) == MODE_V8DF
9118 || get_attr_mode (insn) == MODE_V16SF)
9119 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9120 if (TARGET_AVX)
9121 return "vpcmpeqd\t%0, %0, %0";
9122 else
9123 return "pcmpeqd\t%0, %0";
9124
9125 default:
9126 break;
9127 }
9128 gcc_unreachable ();
9129 }
9130
9131 /* Returns true if OP contains a symbol reference */
9132
9133 bool
9134 symbolic_reference_mentioned_p (rtx op)
9135 {
9136 const char *fmt;
9137 int i;
9138
9139 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9140 return true;
9141
9142 fmt = GET_RTX_FORMAT (GET_CODE (op));
9143 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9144 {
9145 if (fmt[i] == 'E')
9146 {
9147 int j;
9148
9149 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9150 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9151 return true;
9152 }
9153
9154 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9155 return true;
9156 }
9157
9158 return false;
9159 }
9160
9161 /* Return true if it is appropriate to emit `ret' instructions in the
9162 body of a function. Do this only if the epilogue is simple, needing a
9163 couple of insns. Prior to reloading, we can't tell how many registers
9164 must be saved, so return false then. Return false if there is no frame
9165 marker to de-allocate. */
9166
9167 bool
9168 ix86_can_use_return_insn_p (void)
9169 {
9170 struct ix86_frame frame;
9171
9172 if (! reload_completed || frame_pointer_needed)
9173 return 0;
9174
9175 /* Don't allow more than 32k pop, since that's all we can do
9176 with one instruction. */
9177 if (crtl->args.pops_args && crtl->args.size >= 32768)
9178 return 0;
9179
9180 ix86_compute_frame_layout (&frame);
9181 return (frame.stack_pointer_offset == UNITS_PER_WORD
9182 && (frame.nregs + frame.nsseregs) == 0);
9183 }
9184 \f
9185 /* Value should be nonzero if functions must have frame pointers.
9186 Zero means the frame pointer need not be set up (and parms may
9187 be accessed via the stack pointer) in functions that seem suitable. */
9188
9189 static bool
9190 ix86_frame_pointer_required (void)
9191 {
9192 /* If we accessed previous frames, then the generated code expects
9193 to be able to access the saved ebp value in our frame. */
9194 if (cfun->machine->accesses_prev_frame)
9195 return true;
9196
9197 /* Several x86 os'es need a frame pointer for other reasons,
9198 usually pertaining to setjmp. */
9199 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9200 return true;
9201
9202 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9203 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9204 return true;
9205
9206 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9207 allocation is 4GB. */
9208 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9209 return true;
9210
9211 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9212 turns off the frame pointer by default. Turn it back on now if
9213 we've not got a leaf function. */
9214 if (TARGET_OMIT_LEAF_FRAME_POINTER
9215 && (!crtl->is_leaf
9216 || ix86_current_function_calls_tls_descriptor))
9217 return true;
9218
9219 if (crtl->profile && !flag_fentry)
9220 return true;
9221
9222 return false;
9223 }
9224
9225 /* Record that the current function accesses previous call frames. */
9226
9227 void
9228 ix86_setup_frame_addresses (void)
9229 {
9230 cfun->machine->accesses_prev_frame = 1;
9231 }
9232 \f
9233 #ifndef USE_HIDDEN_LINKONCE
9234 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9235 # define USE_HIDDEN_LINKONCE 1
9236 # else
9237 # define USE_HIDDEN_LINKONCE 0
9238 # endif
9239 #endif
9240
9241 static int pic_labels_used;
9242
9243 /* Fills in the label name that should be used for a pc thunk for
9244 the given register. */
9245
9246 static void
9247 get_pc_thunk_name (char name[32], unsigned int regno)
9248 {
9249 gcc_assert (!TARGET_64BIT);
9250
9251 if (USE_HIDDEN_LINKONCE)
9252 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9253 else
9254 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9255 }
9256
9257
9258 /* This function generates code for -fpic that loads %ebx with
9259 the return address of the caller and then returns. */
9260
9261 static void
9262 ix86_code_end (void)
9263 {
9264 rtx xops[2];
9265 int regno;
9266
9267 for (regno = AX_REG; regno <= SP_REG; regno++)
9268 {
9269 char name[32];
9270 tree decl;
9271
9272 if (!(pic_labels_used & (1 << regno)))
9273 continue;
9274
9275 get_pc_thunk_name (name, regno);
9276
9277 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9278 get_identifier (name),
9279 build_function_type_list (void_type_node, NULL_TREE));
9280 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9281 NULL_TREE, void_type_node);
9282 TREE_PUBLIC (decl) = 1;
9283 TREE_STATIC (decl) = 1;
9284 DECL_IGNORED_P (decl) = 1;
9285
9286 #if TARGET_MACHO
9287 if (TARGET_MACHO)
9288 {
9289 switch_to_section (darwin_sections[text_coal_section]);
9290 fputs ("\t.weak_definition\t", asm_out_file);
9291 assemble_name (asm_out_file, name);
9292 fputs ("\n\t.private_extern\t", asm_out_file);
9293 assemble_name (asm_out_file, name);
9294 putc ('\n', asm_out_file);
9295 ASM_OUTPUT_LABEL (asm_out_file, name);
9296 DECL_WEAK (decl) = 1;
9297 }
9298 else
9299 #endif
9300 if (USE_HIDDEN_LINKONCE)
9301 {
9302 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9303
9304 targetm.asm_out.unique_section (decl, 0);
9305 switch_to_section (get_named_section (decl, NULL, 0));
9306
9307 targetm.asm_out.globalize_label (asm_out_file, name);
9308 fputs ("\t.hidden\t", asm_out_file);
9309 assemble_name (asm_out_file, name);
9310 putc ('\n', asm_out_file);
9311 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9312 }
9313 else
9314 {
9315 switch_to_section (text_section);
9316 ASM_OUTPUT_LABEL (asm_out_file, name);
9317 }
9318
9319 DECL_INITIAL (decl) = make_node (BLOCK);
9320 current_function_decl = decl;
9321 init_function_start (decl);
9322 first_function_block_is_cold = false;
9323 /* Make sure unwind info is emitted for the thunk if needed. */
9324 final_start_function (emit_barrier (), asm_out_file, 1);
9325
9326 /* Pad stack IP move with 4 instructions (two NOPs count
9327 as one instruction). */
9328 if (TARGET_PAD_SHORT_FUNCTION)
9329 {
9330 int i = 8;
9331
9332 while (i--)
9333 fputs ("\tnop\n", asm_out_file);
9334 }
9335
9336 xops[0] = gen_rtx_REG (Pmode, regno);
9337 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9338 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9339 fputs ("\tret\n", asm_out_file);
9340 final_end_function ();
9341 init_insn_lengths ();
9342 free_after_compilation (cfun);
9343 set_cfun (NULL);
9344 current_function_decl = NULL;
9345 }
9346
9347 if (flag_split_stack)
9348 file_end_indicate_split_stack ();
9349 }
9350
9351 /* Emit code for the SET_GOT patterns. */
9352
9353 const char *
9354 output_set_got (rtx dest, rtx label)
9355 {
9356 rtx xops[3];
9357
9358 xops[0] = dest;
9359
9360 if (TARGET_VXWORKS_RTP && flag_pic)
9361 {
9362 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9363 xops[2] = gen_rtx_MEM (Pmode,
9364 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9365 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9366
9367 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9368 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9369 an unadorned address. */
9370 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9371 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9372 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9373 return "";
9374 }
9375
9376 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9377
9378 if (!flag_pic)
9379 {
9380 if (TARGET_MACHO)
9381 /* We don't need a pic base, we're not producing pic. */
9382 gcc_unreachable ();
9383
9384 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9385 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9386 targetm.asm_out.internal_label (asm_out_file, "L",
9387 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9388 }
9389 else
9390 {
9391 char name[32];
9392 get_pc_thunk_name (name, REGNO (dest));
9393 pic_labels_used |= 1 << REGNO (dest);
9394
9395 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9396 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9397 output_asm_insn ("call\t%X2", xops);
9398
9399 #if TARGET_MACHO
9400 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9401 This is what will be referenced by the Mach-O PIC subsystem. */
9402 if (machopic_should_output_picbase_label () || !label)
9403 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9404
9405 /* When we are restoring the pic base at the site of a nonlocal label,
9406 and we decided to emit the pic base above, we will still output a
9407 local label used for calculating the correction offset (even though
9408 the offset will be 0 in that case). */
9409 if (label)
9410 targetm.asm_out.internal_label (asm_out_file, "L",
9411 CODE_LABEL_NUMBER (label));
9412 #endif
9413 }
9414
9415 if (!TARGET_MACHO)
9416 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9417
9418 return "";
9419 }
9420
9421 /* Generate an "push" pattern for input ARG. */
9422
9423 static rtx
9424 gen_push (rtx arg)
9425 {
9426 struct machine_function *m = cfun->machine;
9427
9428 if (m->fs.cfa_reg == stack_pointer_rtx)
9429 m->fs.cfa_offset += UNITS_PER_WORD;
9430 m->fs.sp_offset += UNITS_PER_WORD;
9431
9432 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9433 arg = gen_rtx_REG (word_mode, REGNO (arg));
9434
9435 return gen_rtx_SET (VOIDmode,
9436 gen_rtx_MEM (word_mode,
9437 gen_rtx_PRE_DEC (Pmode,
9438 stack_pointer_rtx)),
9439 arg);
9440 }
9441
9442 /* Generate an "pop" pattern for input ARG. */
9443
9444 static rtx
9445 gen_pop (rtx arg)
9446 {
9447 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9448 arg = gen_rtx_REG (word_mode, REGNO (arg));
9449
9450 return gen_rtx_SET (VOIDmode,
9451 arg,
9452 gen_rtx_MEM (word_mode,
9453 gen_rtx_POST_INC (Pmode,
9454 stack_pointer_rtx)));
9455 }
9456
9457 /* Return >= 0 if there is an unused call-clobbered register available
9458 for the entire function. */
9459
9460 static unsigned int
9461 ix86_select_alt_pic_regnum (void)
9462 {
9463 if (ix86_use_pseudo_pic_reg ())
9464 return INVALID_REGNUM;
9465
9466 if (crtl->is_leaf
9467 && !crtl->profile
9468 && !ix86_current_function_calls_tls_descriptor)
9469 {
9470 int i, drap;
9471 /* Can't use the same register for both PIC and DRAP. */
9472 if (crtl->drap_reg)
9473 drap = REGNO (crtl->drap_reg);
9474 else
9475 drap = -1;
9476 for (i = 2; i >= 0; --i)
9477 if (i != drap && !df_regs_ever_live_p (i))
9478 return i;
9479 }
9480
9481 return INVALID_REGNUM;
9482 }
9483
9484 /* Return TRUE if we need to save REGNO. */
9485
9486 static bool
9487 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9488 {
9489 if (pic_offset_table_rtx
9490 && !ix86_use_pseudo_pic_reg ()
9491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9492 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9493 || crtl->profile
9494 || crtl->calls_eh_return
9495 || crtl->uses_const_pool
9496 || cfun->has_nonlocal_label))
9497 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9498
9499 if (crtl->calls_eh_return && maybe_eh_return)
9500 {
9501 unsigned i;
9502 for (i = 0; ; i++)
9503 {
9504 unsigned test = EH_RETURN_DATA_REGNO (i);
9505 if (test == INVALID_REGNUM)
9506 break;
9507 if (test == regno)
9508 return true;
9509 }
9510 }
9511
9512 if (crtl->drap_reg
9513 && regno == REGNO (crtl->drap_reg)
9514 && !cfun->machine->no_drap_save_restore)
9515 return true;
9516
9517 return (df_regs_ever_live_p (regno)
9518 && !call_used_regs[regno]
9519 && !fixed_regs[regno]
9520 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9521 }
9522
9523 /* Return number of saved general prupose registers. */
9524
9525 static int
9526 ix86_nsaved_regs (void)
9527 {
9528 int nregs = 0;
9529 int regno;
9530
9531 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9532 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9533 nregs ++;
9534 return nregs;
9535 }
9536
9537 /* Return number of saved SSE registrers. */
9538
9539 static int
9540 ix86_nsaved_sseregs (void)
9541 {
9542 int nregs = 0;
9543 int regno;
9544
9545 if (!TARGET_64BIT_MS_ABI)
9546 return 0;
9547 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9548 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9549 nregs ++;
9550 return nregs;
9551 }
9552
9553 /* Given FROM and TO register numbers, say whether this elimination is
9554 allowed. If stack alignment is needed, we can only replace argument
9555 pointer with hard frame pointer, or replace frame pointer with stack
9556 pointer. Otherwise, frame pointer elimination is automatically
9557 handled and all other eliminations are valid. */
9558
9559 static bool
9560 ix86_can_eliminate (const int from, const int to)
9561 {
9562 if (stack_realign_fp)
9563 return ((from == ARG_POINTER_REGNUM
9564 && to == HARD_FRAME_POINTER_REGNUM)
9565 || (from == FRAME_POINTER_REGNUM
9566 && to == STACK_POINTER_REGNUM));
9567 else
9568 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9569 }
9570
9571 /* Return the offset between two registers, one to be eliminated, and the other
9572 its replacement, at the start of a routine. */
9573
9574 HOST_WIDE_INT
9575 ix86_initial_elimination_offset (int from, int to)
9576 {
9577 struct ix86_frame frame;
9578 ix86_compute_frame_layout (&frame);
9579
9580 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9581 return frame.hard_frame_pointer_offset;
9582 else if (from == FRAME_POINTER_REGNUM
9583 && to == HARD_FRAME_POINTER_REGNUM)
9584 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9585 else
9586 {
9587 gcc_assert (to == STACK_POINTER_REGNUM);
9588
9589 if (from == ARG_POINTER_REGNUM)
9590 return frame.stack_pointer_offset;
9591
9592 gcc_assert (from == FRAME_POINTER_REGNUM);
9593 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9594 }
9595 }
9596
9597 /* In a dynamically-aligned function, we can't know the offset from
9598 stack pointer to frame pointer, so we must ensure that setjmp
9599 eliminates fp against the hard fp (%ebp) rather than trying to
9600 index from %esp up to the top of the frame across a gap that is
9601 of unknown (at compile-time) size. */
9602 static rtx
9603 ix86_builtin_setjmp_frame_value (void)
9604 {
9605 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9606 }
9607
9608 /* When using -fsplit-stack, the allocation routines set a field in
9609 the TCB to the bottom of the stack plus this much space, measured
9610 in bytes. */
9611
9612 #define SPLIT_STACK_AVAILABLE 256
9613
9614 /* Fill structure ix86_frame about frame of currently computed function. */
9615
9616 static void
9617 ix86_compute_frame_layout (struct ix86_frame *frame)
9618 {
9619 unsigned HOST_WIDE_INT stack_alignment_needed;
9620 HOST_WIDE_INT offset;
9621 unsigned HOST_WIDE_INT preferred_alignment;
9622 HOST_WIDE_INT size = get_frame_size ();
9623 HOST_WIDE_INT to_allocate;
9624
9625 frame->nregs = ix86_nsaved_regs ();
9626 frame->nsseregs = ix86_nsaved_sseregs ();
9627
9628 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9629 function prologues and leaf. */
9630 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9631 && (!crtl->is_leaf || cfun->calls_alloca != 0
9632 || ix86_current_function_calls_tls_descriptor))
9633 {
9634 crtl->preferred_stack_boundary = 128;
9635 crtl->stack_alignment_needed = 128;
9636 }
9637 /* preferred_stack_boundary is never updated for call
9638 expanded from tls descriptor. Update it here. We don't update it in
9639 expand stage because according to the comments before
9640 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9641 away. */
9642 else if (ix86_current_function_calls_tls_descriptor
9643 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9644 {
9645 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9646 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9647 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9648 }
9649
9650 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9651 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9652
9653 gcc_assert (!size || stack_alignment_needed);
9654 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9655 gcc_assert (preferred_alignment <= stack_alignment_needed);
9656
9657 /* For SEH we have to limit the amount of code movement into the prologue.
9658 At present we do this via a BLOCKAGE, at which point there's very little
9659 scheduling that can be done, which means that there's very little point
9660 in doing anything except PUSHs. */
9661 if (TARGET_SEH)
9662 cfun->machine->use_fast_prologue_epilogue = false;
9663
9664 /* During reload iteration the amount of registers saved can change.
9665 Recompute the value as needed. Do not recompute when amount of registers
9666 didn't change as reload does multiple calls to the function and does not
9667 expect the decision to change within single iteration. */
9668 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9669 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9670 {
9671 int count = frame->nregs;
9672 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9673
9674 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9675
9676 /* The fast prologue uses move instead of push to save registers. This
9677 is significantly longer, but also executes faster as modern hardware
9678 can execute the moves in parallel, but can't do that for push/pop.
9679
9680 Be careful about choosing what prologue to emit: When function takes
9681 many instructions to execute we may use slow version as well as in
9682 case function is known to be outside hot spot (this is known with
9683 feedback only). Weight the size of function by number of registers
9684 to save as it is cheap to use one or two push instructions but very
9685 slow to use many of them. */
9686 if (count)
9687 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9688 if (node->frequency < NODE_FREQUENCY_NORMAL
9689 || (flag_branch_probabilities
9690 && node->frequency < NODE_FREQUENCY_HOT))
9691 cfun->machine->use_fast_prologue_epilogue = false;
9692 else
9693 cfun->machine->use_fast_prologue_epilogue
9694 = !expensive_function_p (count);
9695 }
9696
9697 frame->save_regs_using_mov
9698 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9699 /* If static stack checking is enabled and done with probes,
9700 the registers need to be saved before allocating the frame. */
9701 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9702
9703 /* Skip return address. */
9704 offset = UNITS_PER_WORD;
9705
9706 /* Skip pushed static chain. */
9707 if (ix86_static_chain_on_stack)
9708 offset += UNITS_PER_WORD;
9709
9710 /* Skip saved base pointer. */
9711 if (frame_pointer_needed)
9712 offset += UNITS_PER_WORD;
9713 frame->hfp_save_offset = offset;
9714
9715 /* The traditional frame pointer location is at the top of the frame. */
9716 frame->hard_frame_pointer_offset = offset;
9717
9718 /* Register save area */
9719 offset += frame->nregs * UNITS_PER_WORD;
9720 frame->reg_save_offset = offset;
9721
9722 /* On SEH target, registers are pushed just before the frame pointer
9723 location. */
9724 if (TARGET_SEH)
9725 frame->hard_frame_pointer_offset = offset;
9726
9727 /* Align and set SSE register save area. */
9728 if (frame->nsseregs)
9729 {
9730 /* The only ABI that has saved SSE registers (Win64) also has a
9731 16-byte aligned default stack, and thus we don't need to be
9732 within the re-aligned local stack frame to save them. */
9733 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9734 offset = (offset + 16 - 1) & -16;
9735 offset += frame->nsseregs * 16;
9736 }
9737 frame->sse_reg_save_offset = offset;
9738
9739 /* The re-aligned stack starts here. Values before this point are not
9740 directly comparable with values below this point. In order to make
9741 sure that no value happens to be the same before and after, force
9742 the alignment computation below to add a non-zero value. */
9743 if (stack_realign_fp)
9744 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9745
9746 /* Va-arg area */
9747 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9748 offset += frame->va_arg_size;
9749
9750 /* Align start of frame for local function. */
9751 if (stack_realign_fp
9752 || offset != frame->sse_reg_save_offset
9753 || size != 0
9754 || !crtl->is_leaf
9755 || cfun->calls_alloca
9756 || ix86_current_function_calls_tls_descriptor)
9757 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9758
9759 /* Frame pointer points here. */
9760 frame->frame_pointer_offset = offset;
9761
9762 offset += size;
9763
9764 /* Add outgoing arguments area. Can be skipped if we eliminated
9765 all the function calls as dead code.
9766 Skipping is however impossible when function calls alloca. Alloca
9767 expander assumes that last crtl->outgoing_args_size
9768 of stack frame are unused. */
9769 if (ACCUMULATE_OUTGOING_ARGS
9770 && (!crtl->is_leaf || cfun->calls_alloca
9771 || ix86_current_function_calls_tls_descriptor))
9772 {
9773 offset += crtl->outgoing_args_size;
9774 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9775 }
9776 else
9777 frame->outgoing_arguments_size = 0;
9778
9779 /* Align stack boundary. Only needed if we're calling another function
9780 or using alloca. */
9781 if (!crtl->is_leaf || cfun->calls_alloca
9782 || ix86_current_function_calls_tls_descriptor)
9783 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9784
9785 /* We've reached end of stack frame. */
9786 frame->stack_pointer_offset = offset;
9787
9788 /* Size prologue needs to allocate. */
9789 to_allocate = offset - frame->sse_reg_save_offset;
9790
9791 if ((!to_allocate && frame->nregs <= 1)
9792 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9793 frame->save_regs_using_mov = false;
9794
9795 if (ix86_using_red_zone ()
9796 && crtl->sp_is_unchanging
9797 && crtl->is_leaf
9798 && !ix86_current_function_calls_tls_descriptor)
9799 {
9800 frame->red_zone_size = to_allocate;
9801 if (frame->save_regs_using_mov)
9802 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9803 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9804 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9805 }
9806 else
9807 frame->red_zone_size = 0;
9808 frame->stack_pointer_offset -= frame->red_zone_size;
9809
9810 /* The SEH frame pointer location is near the bottom of the frame.
9811 This is enforced by the fact that the difference between the
9812 stack pointer and the frame pointer is limited to 240 bytes in
9813 the unwind data structure. */
9814 if (TARGET_SEH)
9815 {
9816 HOST_WIDE_INT diff;
9817
9818 /* If we can leave the frame pointer where it is, do so. Also, returns
9819 the establisher frame for __builtin_frame_address (0). */
9820 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9821 if (diff <= SEH_MAX_FRAME_SIZE
9822 && (diff > 240 || (diff & 15) != 0)
9823 && !crtl->accesses_prior_frames)
9824 {
9825 /* Ideally we'd determine what portion of the local stack frame
9826 (within the constraint of the lowest 240) is most heavily used.
9827 But without that complication, simply bias the frame pointer
9828 by 128 bytes so as to maximize the amount of the local stack
9829 frame that is addressable with 8-bit offsets. */
9830 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9831 }
9832 }
9833 }
9834
9835 /* This is semi-inlined memory_address_length, but simplified
9836 since we know that we're always dealing with reg+offset, and
9837 to avoid having to create and discard all that rtl. */
9838
9839 static inline int
9840 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9841 {
9842 int len = 4;
9843
9844 if (offset == 0)
9845 {
9846 /* EBP and R13 cannot be encoded without an offset. */
9847 len = (regno == BP_REG || regno == R13_REG);
9848 }
9849 else if (IN_RANGE (offset, -128, 127))
9850 len = 1;
9851
9852 /* ESP and R12 must be encoded with a SIB byte. */
9853 if (regno == SP_REG || regno == R12_REG)
9854 len++;
9855
9856 return len;
9857 }
9858
9859 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9860 The valid base registers are taken from CFUN->MACHINE->FS. */
9861
9862 static rtx
9863 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9864 {
9865 const struct machine_function *m = cfun->machine;
9866 rtx base_reg = NULL;
9867 HOST_WIDE_INT base_offset = 0;
9868
9869 if (m->use_fast_prologue_epilogue)
9870 {
9871 /* Choose the base register most likely to allow the most scheduling
9872 opportunities. Generally FP is valid throughout the function,
9873 while DRAP must be reloaded within the epilogue. But choose either
9874 over the SP due to increased encoding size. */
9875
9876 if (m->fs.fp_valid)
9877 {
9878 base_reg = hard_frame_pointer_rtx;
9879 base_offset = m->fs.fp_offset - cfa_offset;
9880 }
9881 else if (m->fs.drap_valid)
9882 {
9883 base_reg = crtl->drap_reg;
9884 base_offset = 0 - cfa_offset;
9885 }
9886 else if (m->fs.sp_valid)
9887 {
9888 base_reg = stack_pointer_rtx;
9889 base_offset = m->fs.sp_offset - cfa_offset;
9890 }
9891 }
9892 else
9893 {
9894 HOST_WIDE_INT toffset;
9895 int len = 16, tlen;
9896
9897 /* Choose the base register with the smallest address encoding.
9898 With a tie, choose FP > DRAP > SP. */
9899 if (m->fs.sp_valid)
9900 {
9901 base_reg = stack_pointer_rtx;
9902 base_offset = m->fs.sp_offset - cfa_offset;
9903 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9904 }
9905 if (m->fs.drap_valid)
9906 {
9907 toffset = 0 - cfa_offset;
9908 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9909 if (tlen <= len)
9910 {
9911 base_reg = crtl->drap_reg;
9912 base_offset = toffset;
9913 len = tlen;
9914 }
9915 }
9916 if (m->fs.fp_valid)
9917 {
9918 toffset = m->fs.fp_offset - cfa_offset;
9919 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9920 if (tlen <= len)
9921 {
9922 base_reg = hard_frame_pointer_rtx;
9923 base_offset = toffset;
9924 len = tlen;
9925 }
9926 }
9927 }
9928 gcc_assert (base_reg != NULL);
9929
9930 return plus_constant (Pmode, base_reg, base_offset);
9931 }
9932
9933 /* Emit code to save registers in the prologue. */
9934
9935 static void
9936 ix86_emit_save_regs (void)
9937 {
9938 unsigned int regno;
9939 rtx insn;
9940
9941 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9942 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9943 {
9944 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9945 RTX_FRAME_RELATED_P (insn) = 1;
9946 }
9947 }
9948
9949 /* Emit a single register save at CFA - CFA_OFFSET. */
9950
9951 static void
9952 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9953 HOST_WIDE_INT cfa_offset)
9954 {
9955 struct machine_function *m = cfun->machine;
9956 rtx reg = gen_rtx_REG (mode, regno);
9957 rtx mem, addr, base, insn;
9958
9959 addr = choose_baseaddr (cfa_offset);
9960 mem = gen_frame_mem (mode, addr);
9961
9962 /* For SSE saves, we need to indicate the 128-bit alignment. */
9963 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9964
9965 insn = emit_move_insn (mem, reg);
9966 RTX_FRAME_RELATED_P (insn) = 1;
9967
9968 base = addr;
9969 if (GET_CODE (base) == PLUS)
9970 base = XEXP (base, 0);
9971 gcc_checking_assert (REG_P (base));
9972
9973 /* When saving registers into a re-aligned local stack frame, avoid
9974 any tricky guessing by dwarf2out. */
9975 if (m->fs.realigned)
9976 {
9977 gcc_checking_assert (stack_realign_drap);
9978
9979 if (regno == REGNO (crtl->drap_reg))
9980 {
9981 /* A bit of a hack. We force the DRAP register to be saved in
9982 the re-aligned stack frame, which provides us with a copy
9983 of the CFA that will last past the prologue. Install it. */
9984 gcc_checking_assert (cfun->machine->fs.fp_valid);
9985 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9986 cfun->machine->fs.fp_offset - cfa_offset);
9987 mem = gen_rtx_MEM (mode, addr);
9988 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9989 }
9990 else
9991 {
9992 /* The frame pointer is a stable reference within the
9993 aligned frame. Use it. */
9994 gcc_checking_assert (cfun->machine->fs.fp_valid);
9995 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9996 cfun->machine->fs.fp_offset - cfa_offset);
9997 mem = gen_rtx_MEM (mode, addr);
9998 add_reg_note (insn, REG_CFA_EXPRESSION,
9999 gen_rtx_SET (VOIDmode, mem, reg));
10000 }
10001 }
10002
10003 /* The memory may not be relative to the current CFA register,
10004 which means that we may need to generate a new pattern for
10005 use by the unwind info. */
10006 else if (base != m->fs.cfa_reg)
10007 {
10008 addr = plus_constant (Pmode, m->fs.cfa_reg,
10009 m->fs.cfa_offset - cfa_offset);
10010 mem = gen_rtx_MEM (mode, addr);
10011 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10012 }
10013 }
10014
10015 /* Emit code to save registers using MOV insns.
10016 First register is stored at CFA - CFA_OFFSET. */
10017 static void
10018 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10019 {
10020 unsigned int regno;
10021
10022 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10023 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10024 {
10025 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10026 cfa_offset -= UNITS_PER_WORD;
10027 }
10028 }
10029
10030 /* Emit code to save SSE registers using MOV insns.
10031 First register is stored at CFA - CFA_OFFSET. */
10032 static void
10033 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10034 {
10035 unsigned int regno;
10036
10037 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10038 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10039 {
10040 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10041 cfa_offset -= 16;
10042 }
10043 }
10044
10045 static GTY(()) rtx queued_cfa_restores;
10046
10047 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10048 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10049 Don't add the note if the previously saved value will be left untouched
10050 within stack red-zone till return, as unwinders can find the same value
10051 in the register and on the stack. */
10052
10053 static void
10054 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10055 {
10056 if (!crtl->shrink_wrapped
10057 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10058 return;
10059
10060 if (insn)
10061 {
10062 add_reg_note (insn, REG_CFA_RESTORE, reg);
10063 RTX_FRAME_RELATED_P (insn) = 1;
10064 }
10065 else
10066 queued_cfa_restores
10067 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10068 }
10069
10070 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10071
10072 static void
10073 ix86_add_queued_cfa_restore_notes (rtx insn)
10074 {
10075 rtx last;
10076 if (!queued_cfa_restores)
10077 return;
10078 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10079 ;
10080 XEXP (last, 1) = REG_NOTES (insn);
10081 REG_NOTES (insn) = queued_cfa_restores;
10082 queued_cfa_restores = NULL_RTX;
10083 RTX_FRAME_RELATED_P (insn) = 1;
10084 }
10085
10086 /* Expand prologue or epilogue stack adjustment.
10087 The pattern exist to put a dependency on all ebp-based memory accesses.
10088 STYLE should be negative if instructions should be marked as frame related,
10089 zero if %r11 register is live and cannot be freely used and positive
10090 otherwise. */
10091
10092 static void
10093 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10094 int style, bool set_cfa)
10095 {
10096 struct machine_function *m = cfun->machine;
10097 rtx insn;
10098 bool add_frame_related_expr = false;
10099
10100 if (Pmode == SImode)
10101 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10102 else if (x86_64_immediate_operand (offset, DImode))
10103 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10104 else
10105 {
10106 rtx tmp;
10107 /* r11 is used by indirect sibcall return as well, set before the
10108 epilogue and used after the epilogue. */
10109 if (style)
10110 tmp = gen_rtx_REG (DImode, R11_REG);
10111 else
10112 {
10113 gcc_assert (src != hard_frame_pointer_rtx
10114 && dest != hard_frame_pointer_rtx);
10115 tmp = hard_frame_pointer_rtx;
10116 }
10117 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10118 if (style < 0)
10119 add_frame_related_expr = true;
10120
10121 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10122 }
10123
10124 insn = emit_insn (insn);
10125 if (style >= 0)
10126 ix86_add_queued_cfa_restore_notes (insn);
10127
10128 if (set_cfa)
10129 {
10130 rtx r;
10131
10132 gcc_assert (m->fs.cfa_reg == src);
10133 m->fs.cfa_offset += INTVAL (offset);
10134 m->fs.cfa_reg = dest;
10135
10136 r = gen_rtx_PLUS (Pmode, src, offset);
10137 r = gen_rtx_SET (VOIDmode, dest, r);
10138 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10139 RTX_FRAME_RELATED_P (insn) = 1;
10140 }
10141 else if (style < 0)
10142 {
10143 RTX_FRAME_RELATED_P (insn) = 1;
10144 if (add_frame_related_expr)
10145 {
10146 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10147 r = gen_rtx_SET (VOIDmode, dest, r);
10148 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10149 }
10150 }
10151
10152 if (dest == stack_pointer_rtx)
10153 {
10154 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10155 bool valid = m->fs.sp_valid;
10156
10157 if (src == hard_frame_pointer_rtx)
10158 {
10159 valid = m->fs.fp_valid;
10160 ooffset = m->fs.fp_offset;
10161 }
10162 else if (src == crtl->drap_reg)
10163 {
10164 valid = m->fs.drap_valid;
10165 ooffset = 0;
10166 }
10167 else
10168 {
10169 /* Else there are two possibilities: SP itself, which we set
10170 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10171 taken care of this by hand along the eh_return path. */
10172 gcc_checking_assert (src == stack_pointer_rtx
10173 || offset == const0_rtx);
10174 }
10175
10176 m->fs.sp_offset = ooffset - INTVAL (offset);
10177 m->fs.sp_valid = valid;
10178 }
10179 }
10180
10181 /* Find an available register to be used as dynamic realign argument
10182 pointer regsiter. Such a register will be written in prologue and
10183 used in begin of body, so it must not be
10184 1. parameter passing register.
10185 2. GOT pointer.
10186 We reuse static-chain register if it is available. Otherwise, we
10187 use DI for i386 and R13 for x86-64. We chose R13 since it has
10188 shorter encoding.
10189
10190 Return: the regno of chosen register. */
10191
10192 static unsigned int
10193 find_drap_reg (void)
10194 {
10195 tree decl = cfun->decl;
10196
10197 if (TARGET_64BIT)
10198 {
10199 /* Use R13 for nested function or function need static chain.
10200 Since function with tail call may use any caller-saved
10201 registers in epilogue, DRAP must not use caller-saved
10202 register in such case. */
10203 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10204 return R13_REG;
10205
10206 return R10_REG;
10207 }
10208 else
10209 {
10210 /* Use DI for nested function or function need static chain.
10211 Since function with tail call may use any caller-saved
10212 registers in epilogue, DRAP must not use caller-saved
10213 register in such case. */
10214 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10215 return DI_REG;
10216
10217 /* Reuse static chain register if it isn't used for parameter
10218 passing. */
10219 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10220 {
10221 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10222 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10223 return CX_REG;
10224 }
10225 return DI_REG;
10226 }
10227 }
10228
10229 /* Return minimum incoming stack alignment. */
10230
10231 static unsigned int
10232 ix86_minimum_incoming_stack_boundary (bool sibcall)
10233 {
10234 unsigned int incoming_stack_boundary;
10235
10236 /* Prefer the one specified at command line. */
10237 if (ix86_user_incoming_stack_boundary)
10238 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10239 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10240 if -mstackrealign is used, it isn't used for sibcall check and
10241 estimated stack alignment is 128bit. */
10242 else if (!sibcall
10243 && !TARGET_64BIT
10244 && ix86_force_align_arg_pointer
10245 && crtl->stack_alignment_estimated == 128)
10246 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10247 else
10248 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10249
10250 /* Incoming stack alignment can be changed on individual functions
10251 via force_align_arg_pointer attribute. We use the smallest
10252 incoming stack boundary. */
10253 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10254 && lookup_attribute (ix86_force_align_arg_pointer_string,
10255 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10256 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10257
10258 /* The incoming stack frame has to be aligned at least at
10259 parm_stack_boundary. */
10260 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10261 incoming_stack_boundary = crtl->parm_stack_boundary;
10262
10263 /* Stack at entrance of main is aligned by runtime. We use the
10264 smallest incoming stack boundary. */
10265 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10266 && DECL_NAME (current_function_decl)
10267 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10268 && DECL_FILE_SCOPE_P (current_function_decl))
10269 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10270
10271 return incoming_stack_boundary;
10272 }
10273
10274 /* Update incoming stack boundary and estimated stack alignment. */
10275
10276 static void
10277 ix86_update_stack_boundary (void)
10278 {
10279 ix86_incoming_stack_boundary
10280 = ix86_minimum_incoming_stack_boundary (false);
10281
10282 /* x86_64 vararg needs 16byte stack alignment for register save
10283 area. */
10284 if (TARGET_64BIT
10285 && cfun->stdarg
10286 && crtl->stack_alignment_estimated < 128)
10287 crtl->stack_alignment_estimated = 128;
10288 }
10289
10290 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10291 needed or an rtx for DRAP otherwise. */
10292
10293 static rtx
10294 ix86_get_drap_rtx (void)
10295 {
10296 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10297 crtl->need_drap = true;
10298
10299 if (stack_realign_drap)
10300 {
10301 /* Assign DRAP to vDRAP and returns vDRAP */
10302 unsigned int regno = find_drap_reg ();
10303 rtx drap_vreg;
10304 rtx arg_ptr;
10305 rtx_insn *seq, *insn;
10306
10307 arg_ptr = gen_rtx_REG (Pmode, regno);
10308 crtl->drap_reg = arg_ptr;
10309
10310 start_sequence ();
10311 drap_vreg = copy_to_reg (arg_ptr);
10312 seq = get_insns ();
10313 end_sequence ();
10314
10315 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10316 if (!optimize)
10317 {
10318 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10319 RTX_FRAME_RELATED_P (insn) = 1;
10320 }
10321 return drap_vreg;
10322 }
10323 else
10324 return NULL;
10325 }
10326
10327 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10328
10329 static rtx
10330 ix86_internal_arg_pointer (void)
10331 {
10332 return virtual_incoming_args_rtx;
10333 }
10334
10335 struct scratch_reg {
10336 rtx reg;
10337 bool saved;
10338 };
10339
10340 /* Return a short-lived scratch register for use on function entry.
10341 In 32-bit mode, it is valid only after the registers are saved
10342 in the prologue. This register must be released by means of
10343 release_scratch_register_on_entry once it is dead. */
10344
10345 static void
10346 get_scratch_register_on_entry (struct scratch_reg *sr)
10347 {
10348 int regno;
10349
10350 sr->saved = false;
10351
10352 if (TARGET_64BIT)
10353 {
10354 /* We always use R11 in 64-bit mode. */
10355 regno = R11_REG;
10356 }
10357 else
10358 {
10359 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10360 bool fastcall_p
10361 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10362 bool thiscall_p
10363 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10364 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10365 int regparm = ix86_function_regparm (fntype, decl);
10366 int drap_regno
10367 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10368
10369 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10370 for the static chain register. */
10371 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10372 && drap_regno != AX_REG)
10373 regno = AX_REG;
10374 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10375 for the static chain register. */
10376 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10377 regno = AX_REG;
10378 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10379 regno = DX_REG;
10380 /* ecx is the static chain register. */
10381 else if (regparm < 3 && !fastcall_p && !thiscall_p
10382 && !static_chain_p
10383 && drap_regno != CX_REG)
10384 regno = CX_REG;
10385 else if (ix86_save_reg (BX_REG, true))
10386 regno = BX_REG;
10387 /* esi is the static chain register. */
10388 else if (!(regparm == 3 && static_chain_p)
10389 && ix86_save_reg (SI_REG, true))
10390 regno = SI_REG;
10391 else if (ix86_save_reg (DI_REG, true))
10392 regno = DI_REG;
10393 else
10394 {
10395 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10396 sr->saved = true;
10397 }
10398 }
10399
10400 sr->reg = gen_rtx_REG (Pmode, regno);
10401 if (sr->saved)
10402 {
10403 rtx insn = emit_insn (gen_push (sr->reg));
10404 RTX_FRAME_RELATED_P (insn) = 1;
10405 }
10406 }
10407
10408 /* Release a scratch register obtained from the preceding function. */
10409
10410 static void
10411 release_scratch_register_on_entry (struct scratch_reg *sr)
10412 {
10413 if (sr->saved)
10414 {
10415 struct machine_function *m = cfun->machine;
10416 rtx x, insn = emit_insn (gen_pop (sr->reg));
10417
10418 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10419 RTX_FRAME_RELATED_P (insn) = 1;
10420 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10421 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10422 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10423 m->fs.sp_offset -= UNITS_PER_WORD;
10424 }
10425 }
10426
10427 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10428
10429 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10430
10431 static void
10432 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10433 {
10434 /* We skip the probe for the first interval + a small dope of 4 words and
10435 probe that many bytes past the specified size to maintain a protection
10436 area at the botton of the stack. */
10437 const int dope = 4 * UNITS_PER_WORD;
10438 rtx size_rtx = GEN_INT (size), last;
10439
10440 /* See if we have a constant small number of probes to generate. If so,
10441 that's the easy case. The run-time loop is made up of 11 insns in the
10442 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10443 for n # of intervals. */
10444 if (size <= 5 * PROBE_INTERVAL)
10445 {
10446 HOST_WIDE_INT i, adjust;
10447 bool first_probe = true;
10448
10449 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10450 values of N from 1 until it exceeds SIZE. If only one probe is
10451 needed, this will not generate any code. Then adjust and probe
10452 to PROBE_INTERVAL + SIZE. */
10453 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10454 {
10455 if (first_probe)
10456 {
10457 adjust = 2 * PROBE_INTERVAL + dope;
10458 first_probe = false;
10459 }
10460 else
10461 adjust = PROBE_INTERVAL;
10462
10463 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10464 plus_constant (Pmode, stack_pointer_rtx,
10465 -adjust)));
10466 emit_stack_probe (stack_pointer_rtx);
10467 }
10468
10469 if (first_probe)
10470 adjust = size + PROBE_INTERVAL + dope;
10471 else
10472 adjust = size + PROBE_INTERVAL - i;
10473
10474 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10475 plus_constant (Pmode, stack_pointer_rtx,
10476 -adjust)));
10477 emit_stack_probe (stack_pointer_rtx);
10478
10479 /* Adjust back to account for the additional first interval. */
10480 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10481 plus_constant (Pmode, stack_pointer_rtx,
10482 PROBE_INTERVAL + dope)));
10483 }
10484
10485 /* Otherwise, do the same as above, but in a loop. Note that we must be
10486 extra careful with variables wrapping around because we might be at
10487 the very top (or the very bottom) of the address space and we have
10488 to be able to handle this case properly; in particular, we use an
10489 equality test for the loop condition. */
10490 else
10491 {
10492 HOST_WIDE_INT rounded_size;
10493 struct scratch_reg sr;
10494
10495 get_scratch_register_on_entry (&sr);
10496
10497
10498 /* Step 1: round SIZE to the previous multiple of the interval. */
10499
10500 rounded_size = size & -PROBE_INTERVAL;
10501
10502
10503 /* Step 2: compute initial and final value of the loop counter. */
10504
10505 /* SP = SP_0 + PROBE_INTERVAL. */
10506 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10507 plus_constant (Pmode, stack_pointer_rtx,
10508 - (PROBE_INTERVAL + dope))));
10509
10510 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10511 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10512 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10513 gen_rtx_PLUS (Pmode, sr.reg,
10514 stack_pointer_rtx)));
10515
10516
10517 /* Step 3: the loop
10518
10519 while (SP != LAST_ADDR)
10520 {
10521 SP = SP + PROBE_INTERVAL
10522 probe at SP
10523 }
10524
10525 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10526 values of N from 1 until it is equal to ROUNDED_SIZE. */
10527
10528 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10529
10530
10531 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10532 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10533
10534 if (size != rounded_size)
10535 {
10536 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10537 plus_constant (Pmode, stack_pointer_rtx,
10538 rounded_size - size)));
10539 emit_stack_probe (stack_pointer_rtx);
10540 }
10541
10542 /* Adjust back to account for the additional first interval. */
10543 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10544 plus_constant (Pmode, stack_pointer_rtx,
10545 PROBE_INTERVAL + dope)));
10546
10547 release_scratch_register_on_entry (&sr);
10548 }
10549
10550 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10551
10552 /* Even if the stack pointer isn't the CFA register, we need to correctly
10553 describe the adjustments made to it, in particular differentiate the
10554 frame-related ones from the frame-unrelated ones. */
10555 if (size > 0)
10556 {
10557 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10558 XVECEXP (expr, 0, 0)
10559 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10560 plus_constant (Pmode, stack_pointer_rtx, -size));
10561 XVECEXP (expr, 0, 1)
10562 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10563 plus_constant (Pmode, stack_pointer_rtx,
10564 PROBE_INTERVAL + dope + size));
10565 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10566 RTX_FRAME_RELATED_P (last) = 1;
10567
10568 cfun->machine->fs.sp_offset += size;
10569 }
10570
10571 /* Make sure nothing is scheduled before we are done. */
10572 emit_insn (gen_blockage ());
10573 }
10574
10575 /* Adjust the stack pointer up to REG while probing it. */
10576
10577 const char *
10578 output_adjust_stack_and_probe (rtx reg)
10579 {
10580 static int labelno = 0;
10581 char loop_lab[32], end_lab[32];
10582 rtx xops[2];
10583
10584 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10585 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10586
10587 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10588
10589 /* Jump to END_LAB if SP == LAST_ADDR. */
10590 xops[0] = stack_pointer_rtx;
10591 xops[1] = reg;
10592 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10593 fputs ("\tje\t", asm_out_file);
10594 assemble_name_raw (asm_out_file, end_lab);
10595 fputc ('\n', asm_out_file);
10596
10597 /* SP = SP + PROBE_INTERVAL. */
10598 xops[1] = GEN_INT (PROBE_INTERVAL);
10599 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10600
10601 /* Probe at SP. */
10602 xops[1] = const0_rtx;
10603 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10604
10605 fprintf (asm_out_file, "\tjmp\t");
10606 assemble_name_raw (asm_out_file, loop_lab);
10607 fputc ('\n', asm_out_file);
10608
10609 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10610
10611 return "";
10612 }
10613
10614 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10615 inclusive. These are offsets from the current stack pointer. */
10616
10617 static void
10618 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10619 {
10620 /* See if we have a constant small number of probes to generate. If so,
10621 that's the easy case. The run-time loop is made up of 7 insns in the
10622 generic case while the compile-time loop is made up of n insns for n #
10623 of intervals. */
10624 if (size <= 7 * PROBE_INTERVAL)
10625 {
10626 HOST_WIDE_INT i;
10627
10628 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10629 it exceeds SIZE. If only one probe is needed, this will not
10630 generate any code. Then probe at FIRST + SIZE. */
10631 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10632 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10633 -(first + i)));
10634
10635 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10636 -(first + size)));
10637 }
10638
10639 /* Otherwise, do the same as above, but in a loop. Note that we must be
10640 extra careful with variables wrapping around because we might be at
10641 the very top (or the very bottom) of the address space and we have
10642 to be able to handle this case properly; in particular, we use an
10643 equality test for the loop condition. */
10644 else
10645 {
10646 HOST_WIDE_INT rounded_size, last;
10647 struct scratch_reg sr;
10648
10649 get_scratch_register_on_entry (&sr);
10650
10651
10652 /* Step 1: round SIZE to the previous multiple of the interval. */
10653
10654 rounded_size = size & -PROBE_INTERVAL;
10655
10656
10657 /* Step 2: compute initial and final value of the loop counter. */
10658
10659 /* TEST_OFFSET = FIRST. */
10660 emit_move_insn (sr.reg, GEN_INT (-first));
10661
10662 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10663 last = first + rounded_size;
10664
10665
10666 /* Step 3: the loop
10667
10668 while (TEST_ADDR != LAST_ADDR)
10669 {
10670 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10671 probe at TEST_ADDR
10672 }
10673
10674 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10675 until it is equal to ROUNDED_SIZE. */
10676
10677 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10678
10679
10680 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10681 that SIZE is equal to ROUNDED_SIZE. */
10682
10683 if (size != rounded_size)
10684 emit_stack_probe (plus_constant (Pmode,
10685 gen_rtx_PLUS (Pmode,
10686 stack_pointer_rtx,
10687 sr.reg),
10688 rounded_size - size));
10689
10690 release_scratch_register_on_entry (&sr);
10691 }
10692
10693 /* Make sure nothing is scheduled before we are done. */
10694 emit_insn (gen_blockage ());
10695 }
10696
10697 /* Probe a range of stack addresses from REG to END, inclusive. These are
10698 offsets from the current stack pointer. */
10699
10700 const char *
10701 output_probe_stack_range (rtx reg, rtx end)
10702 {
10703 static int labelno = 0;
10704 char loop_lab[32], end_lab[32];
10705 rtx xops[3];
10706
10707 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10708 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10709
10710 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10711
10712 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10713 xops[0] = reg;
10714 xops[1] = end;
10715 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10716 fputs ("\tje\t", asm_out_file);
10717 assemble_name_raw (asm_out_file, end_lab);
10718 fputc ('\n', asm_out_file);
10719
10720 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10721 xops[1] = GEN_INT (PROBE_INTERVAL);
10722 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10723
10724 /* Probe at TEST_ADDR. */
10725 xops[0] = stack_pointer_rtx;
10726 xops[1] = reg;
10727 xops[2] = const0_rtx;
10728 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10729
10730 fprintf (asm_out_file, "\tjmp\t");
10731 assemble_name_raw (asm_out_file, loop_lab);
10732 fputc ('\n', asm_out_file);
10733
10734 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10735
10736 return "";
10737 }
10738
10739 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10740 to be generated in correct form. */
10741 static void
10742 ix86_finalize_stack_realign_flags (void)
10743 {
10744 /* Check if stack realign is really needed after reload, and
10745 stores result in cfun */
10746 unsigned int incoming_stack_boundary
10747 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10748 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10749 unsigned int stack_realign = (incoming_stack_boundary
10750 < (crtl->is_leaf
10751 ? crtl->max_used_stack_slot_alignment
10752 : crtl->stack_alignment_needed));
10753
10754 if (crtl->stack_realign_finalized)
10755 {
10756 /* After stack_realign_needed is finalized, we can't no longer
10757 change it. */
10758 gcc_assert (crtl->stack_realign_needed == stack_realign);
10759 return;
10760 }
10761
10762 /* If the only reason for frame_pointer_needed is that we conservatively
10763 assumed stack realignment might be needed, but in the end nothing that
10764 needed the stack alignment had been spilled, clear frame_pointer_needed
10765 and say we don't need stack realignment. */
10766 if (stack_realign
10767 && frame_pointer_needed
10768 && crtl->is_leaf
10769 && flag_omit_frame_pointer
10770 && crtl->sp_is_unchanging
10771 && !ix86_current_function_calls_tls_descriptor
10772 && !crtl->accesses_prior_frames
10773 && !cfun->calls_alloca
10774 && !crtl->calls_eh_return
10775 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10776 && !ix86_frame_pointer_required ()
10777 && get_frame_size () == 0
10778 && ix86_nsaved_sseregs () == 0
10779 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10780 {
10781 HARD_REG_SET set_up_by_prologue, prologue_used;
10782 basic_block bb;
10783
10784 CLEAR_HARD_REG_SET (prologue_used);
10785 CLEAR_HARD_REG_SET (set_up_by_prologue);
10786 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10787 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10788 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10789 HARD_FRAME_POINTER_REGNUM);
10790 FOR_EACH_BB_FN (bb, cfun)
10791 {
10792 rtx_insn *insn;
10793 FOR_BB_INSNS (bb, insn)
10794 if (NONDEBUG_INSN_P (insn)
10795 && requires_stack_frame_p (insn, prologue_used,
10796 set_up_by_prologue))
10797 {
10798 crtl->stack_realign_needed = stack_realign;
10799 crtl->stack_realign_finalized = true;
10800 return;
10801 }
10802 }
10803
10804 /* If drap has been set, but it actually isn't live at the start
10805 of the function, there is no reason to set it up. */
10806 if (crtl->drap_reg)
10807 {
10808 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10809 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10810 {
10811 crtl->drap_reg = NULL_RTX;
10812 crtl->need_drap = false;
10813 }
10814 }
10815 else
10816 cfun->machine->no_drap_save_restore = true;
10817
10818 frame_pointer_needed = false;
10819 stack_realign = false;
10820 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10821 crtl->stack_alignment_needed = incoming_stack_boundary;
10822 crtl->stack_alignment_estimated = incoming_stack_boundary;
10823 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10824 crtl->preferred_stack_boundary = incoming_stack_boundary;
10825 df_finish_pass (true);
10826 df_scan_alloc (NULL);
10827 df_scan_blocks ();
10828 df_compute_regs_ever_live (true);
10829 df_analyze ();
10830 }
10831
10832 crtl->stack_realign_needed = stack_realign;
10833 crtl->stack_realign_finalized = true;
10834 }
10835
10836 /* Expand the prologue into a bunch of separate insns. */
10837
10838 void
10839 ix86_expand_prologue (void)
10840 {
10841 struct machine_function *m = cfun->machine;
10842 rtx insn, t;
10843 struct ix86_frame frame;
10844 HOST_WIDE_INT allocate;
10845 bool int_registers_saved;
10846 bool sse_registers_saved;
10847
10848 ix86_finalize_stack_realign_flags ();
10849
10850 /* DRAP should not coexist with stack_realign_fp */
10851 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10852
10853 memset (&m->fs, 0, sizeof (m->fs));
10854
10855 /* Initialize CFA state for before the prologue. */
10856 m->fs.cfa_reg = stack_pointer_rtx;
10857 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10858
10859 /* Track SP offset to the CFA. We continue tracking this after we've
10860 swapped the CFA register away from SP. In the case of re-alignment
10861 this is fudged; we're interested to offsets within the local frame. */
10862 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10863 m->fs.sp_valid = true;
10864
10865 ix86_compute_frame_layout (&frame);
10866
10867 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10868 {
10869 /* We should have already generated an error for any use of
10870 ms_hook on a nested function. */
10871 gcc_checking_assert (!ix86_static_chain_on_stack);
10872
10873 /* Check if profiling is active and we shall use profiling before
10874 prologue variant. If so sorry. */
10875 if (crtl->profile && flag_fentry != 0)
10876 sorry ("ms_hook_prologue attribute isn%'t compatible "
10877 "with -mfentry for 32-bit");
10878
10879 /* In ix86_asm_output_function_label we emitted:
10880 8b ff movl.s %edi,%edi
10881 55 push %ebp
10882 8b ec movl.s %esp,%ebp
10883
10884 This matches the hookable function prologue in Win32 API
10885 functions in Microsoft Windows XP Service Pack 2 and newer.
10886 Wine uses this to enable Windows apps to hook the Win32 API
10887 functions provided by Wine.
10888
10889 What that means is that we've already set up the frame pointer. */
10890
10891 if (frame_pointer_needed
10892 && !(crtl->drap_reg && crtl->stack_realign_needed))
10893 {
10894 rtx push, mov;
10895
10896 /* We've decided to use the frame pointer already set up.
10897 Describe this to the unwinder by pretending that both
10898 push and mov insns happen right here.
10899
10900 Putting the unwind info here at the end of the ms_hook
10901 is done so that we can make absolutely certain we get
10902 the required byte sequence at the start of the function,
10903 rather than relying on an assembler that can produce
10904 the exact encoding required.
10905
10906 However it does mean (in the unpatched case) that we have
10907 a 1 insn window where the asynchronous unwind info is
10908 incorrect. However, if we placed the unwind info at
10909 its correct location we would have incorrect unwind info
10910 in the patched case. Which is probably all moot since
10911 I don't expect Wine generates dwarf2 unwind info for the
10912 system libraries that use this feature. */
10913
10914 insn = emit_insn (gen_blockage ());
10915
10916 push = gen_push (hard_frame_pointer_rtx);
10917 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10918 stack_pointer_rtx);
10919 RTX_FRAME_RELATED_P (push) = 1;
10920 RTX_FRAME_RELATED_P (mov) = 1;
10921
10922 RTX_FRAME_RELATED_P (insn) = 1;
10923 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10925
10926 /* Note that gen_push incremented m->fs.cfa_offset, even
10927 though we didn't emit the push insn here. */
10928 m->fs.cfa_reg = hard_frame_pointer_rtx;
10929 m->fs.fp_offset = m->fs.cfa_offset;
10930 m->fs.fp_valid = true;
10931 }
10932 else
10933 {
10934 /* The frame pointer is not needed so pop %ebp again.
10935 This leaves us with a pristine state. */
10936 emit_insn (gen_pop (hard_frame_pointer_rtx));
10937 }
10938 }
10939
10940 /* The first insn of a function that accepts its static chain on the
10941 stack is to push the register that would be filled in by a direct
10942 call. This insn will be skipped by the trampoline. */
10943 else if (ix86_static_chain_on_stack)
10944 {
10945 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10946 emit_insn (gen_blockage ());
10947
10948 /* We don't want to interpret this push insn as a register save,
10949 only as a stack adjustment. The real copy of the register as
10950 a save will be done later, if needed. */
10951 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10952 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10953 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10954 RTX_FRAME_RELATED_P (insn) = 1;
10955 }
10956
10957 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10958 of DRAP is needed and stack realignment is really needed after reload */
10959 if (stack_realign_drap)
10960 {
10961 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10962
10963 /* Only need to push parameter pointer reg if it is caller saved. */
10964 if (!call_used_regs[REGNO (crtl->drap_reg)])
10965 {
10966 /* Push arg pointer reg */
10967 insn = emit_insn (gen_push (crtl->drap_reg));
10968 RTX_FRAME_RELATED_P (insn) = 1;
10969 }
10970
10971 /* Grab the argument pointer. */
10972 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10973 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10974 RTX_FRAME_RELATED_P (insn) = 1;
10975 m->fs.cfa_reg = crtl->drap_reg;
10976 m->fs.cfa_offset = 0;
10977
10978 /* Align the stack. */
10979 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10980 stack_pointer_rtx,
10981 GEN_INT (-align_bytes)));
10982 RTX_FRAME_RELATED_P (insn) = 1;
10983
10984 /* Replicate the return address on the stack so that return
10985 address can be reached via (argp - 1) slot. This is needed
10986 to implement macro RETURN_ADDR_RTX and intrinsic function
10987 expand_builtin_return_addr etc. */
10988 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10989 t = gen_frame_mem (word_mode, t);
10990 insn = emit_insn (gen_push (t));
10991 RTX_FRAME_RELATED_P (insn) = 1;
10992
10993 /* For the purposes of frame and register save area addressing,
10994 we've started over with a new frame. */
10995 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10996 m->fs.realigned = true;
10997 }
10998
10999 int_registers_saved = (frame.nregs == 0);
11000 sse_registers_saved = (frame.nsseregs == 0);
11001
11002 if (frame_pointer_needed && !m->fs.fp_valid)
11003 {
11004 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11005 slower on all targets. Also sdb doesn't like it. */
11006 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11007 RTX_FRAME_RELATED_P (insn) = 1;
11008
11009 /* Push registers now, before setting the frame pointer
11010 on SEH target. */
11011 if (!int_registers_saved
11012 && TARGET_SEH
11013 && !frame.save_regs_using_mov)
11014 {
11015 ix86_emit_save_regs ();
11016 int_registers_saved = true;
11017 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11018 }
11019
11020 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11021 {
11022 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11023 RTX_FRAME_RELATED_P (insn) = 1;
11024
11025 if (m->fs.cfa_reg == stack_pointer_rtx)
11026 m->fs.cfa_reg = hard_frame_pointer_rtx;
11027 m->fs.fp_offset = m->fs.sp_offset;
11028 m->fs.fp_valid = true;
11029 }
11030 }
11031
11032 if (!int_registers_saved)
11033 {
11034 /* If saving registers via PUSH, do so now. */
11035 if (!frame.save_regs_using_mov)
11036 {
11037 ix86_emit_save_regs ();
11038 int_registers_saved = true;
11039 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11040 }
11041
11042 /* When using red zone we may start register saving before allocating
11043 the stack frame saving one cycle of the prologue. However, avoid
11044 doing this if we have to probe the stack; at least on x86_64 the
11045 stack probe can turn into a call that clobbers a red zone location. */
11046 else if (ix86_using_red_zone ()
11047 && (! TARGET_STACK_PROBE
11048 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11049 {
11050 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11051 int_registers_saved = true;
11052 }
11053 }
11054
11055 if (stack_realign_fp)
11056 {
11057 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11058 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11059
11060 /* The computation of the size of the re-aligned stack frame means
11061 that we must allocate the size of the register save area before
11062 performing the actual alignment. Otherwise we cannot guarantee
11063 that there's enough storage above the realignment point. */
11064 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11065 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11066 GEN_INT (m->fs.sp_offset
11067 - frame.sse_reg_save_offset),
11068 -1, false);
11069
11070 /* Align the stack. */
11071 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11072 stack_pointer_rtx,
11073 GEN_INT (-align_bytes)));
11074
11075 /* For the purposes of register save area addressing, the stack
11076 pointer is no longer valid. As for the value of sp_offset,
11077 see ix86_compute_frame_layout, which we need to match in order
11078 to pass verification of stack_pointer_offset at the end. */
11079 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11080 m->fs.sp_valid = false;
11081 }
11082
11083 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11084
11085 if (flag_stack_usage_info)
11086 {
11087 /* We start to count from ARG_POINTER. */
11088 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11089
11090 /* If it was realigned, take into account the fake frame. */
11091 if (stack_realign_drap)
11092 {
11093 if (ix86_static_chain_on_stack)
11094 stack_size += UNITS_PER_WORD;
11095
11096 if (!call_used_regs[REGNO (crtl->drap_reg)])
11097 stack_size += UNITS_PER_WORD;
11098
11099 /* This over-estimates by 1 minimal-stack-alignment-unit but
11100 mitigates that by counting in the new return address slot. */
11101 current_function_dynamic_stack_size
11102 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11103 }
11104
11105 current_function_static_stack_size = stack_size;
11106 }
11107
11108 /* On SEH target with very large frame size, allocate an area to save
11109 SSE registers (as the very large allocation won't be described). */
11110 if (TARGET_SEH
11111 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11112 && !sse_registers_saved)
11113 {
11114 HOST_WIDE_INT sse_size =
11115 frame.sse_reg_save_offset - frame.reg_save_offset;
11116
11117 gcc_assert (int_registers_saved);
11118
11119 /* No need to do stack checking as the area will be immediately
11120 written. */
11121 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11122 GEN_INT (-sse_size), -1,
11123 m->fs.cfa_reg == stack_pointer_rtx);
11124 allocate -= sse_size;
11125 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11126 sse_registers_saved = true;
11127 }
11128
11129 /* The stack has already been decremented by the instruction calling us
11130 so probe if the size is non-negative to preserve the protection area. */
11131 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11132 {
11133 /* We expect the registers to be saved when probes are used. */
11134 gcc_assert (int_registers_saved);
11135
11136 if (STACK_CHECK_MOVING_SP)
11137 {
11138 if (!(crtl->is_leaf && !cfun->calls_alloca
11139 && allocate <= PROBE_INTERVAL))
11140 {
11141 ix86_adjust_stack_and_probe (allocate);
11142 allocate = 0;
11143 }
11144 }
11145 else
11146 {
11147 HOST_WIDE_INT size = allocate;
11148
11149 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11150 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11151
11152 if (TARGET_STACK_PROBE)
11153 {
11154 if (crtl->is_leaf && !cfun->calls_alloca)
11155 {
11156 if (size > PROBE_INTERVAL)
11157 ix86_emit_probe_stack_range (0, size);
11158 }
11159 else
11160 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11161 }
11162 else
11163 {
11164 if (crtl->is_leaf && !cfun->calls_alloca)
11165 {
11166 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11167 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11168 size - STACK_CHECK_PROTECT);
11169 }
11170 else
11171 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11172 }
11173 }
11174 }
11175
11176 if (allocate == 0)
11177 ;
11178 else if (!ix86_target_stack_probe ()
11179 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11180 {
11181 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11182 GEN_INT (-allocate), -1,
11183 m->fs.cfa_reg == stack_pointer_rtx);
11184 }
11185 else
11186 {
11187 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11188 rtx r10 = NULL;
11189 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11190 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11191 bool eax_live = ix86_eax_live_at_start_p ();
11192 bool r10_live = false;
11193
11194 if (TARGET_64BIT)
11195 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11196
11197 if (eax_live)
11198 {
11199 insn = emit_insn (gen_push (eax));
11200 allocate -= UNITS_PER_WORD;
11201 /* Note that SEH directives need to continue tracking the stack
11202 pointer even after the frame pointer has been set up. */
11203 if (sp_is_cfa_reg || TARGET_SEH)
11204 {
11205 if (sp_is_cfa_reg)
11206 m->fs.cfa_offset += UNITS_PER_WORD;
11207 RTX_FRAME_RELATED_P (insn) = 1;
11208 }
11209 }
11210
11211 if (r10_live)
11212 {
11213 r10 = gen_rtx_REG (Pmode, R10_REG);
11214 insn = emit_insn (gen_push (r10));
11215 allocate -= UNITS_PER_WORD;
11216 if (sp_is_cfa_reg || TARGET_SEH)
11217 {
11218 if (sp_is_cfa_reg)
11219 m->fs.cfa_offset += UNITS_PER_WORD;
11220 RTX_FRAME_RELATED_P (insn) = 1;
11221 }
11222 }
11223
11224 emit_move_insn (eax, GEN_INT (allocate));
11225 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11226
11227 /* Use the fact that AX still contains ALLOCATE. */
11228 adjust_stack_insn = (Pmode == DImode
11229 ? gen_pro_epilogue_adjust_stack_di_sub
11230 : gen_pro_epilogue_adjust_stack_si_sub);
11231
11232 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11233 stack_pointer_rtx, eax));
11234
11235 if (sp_is_cfa_reg || TARGET_SEH)
11236 {
11237 if (sp_is_cfa_reg)
11238 m->fs.cfa_offset += allocate;
11239 RTX_FRAME_RELATED_P (insn) = 1;
11240 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11241 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11242 plus_constant (Pmode, stack_pointer_rtx,
11243 -allocate)));
11244 }
11245 m->fs.sp_offset += allocate;
11246
11247 /* Use stack_pointer_rtx for relative addressing so that code
11248 works for realigned stack, too. */
11249 if (r10_live && eax_live)
11250 {
11251 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11252 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11253 gen_frame_mem (word_mode, t));
11254 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11255 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11256 gen_frame_mem (word_mode, t));
11257 }
11258 else if (eax_live || r10_live)
11259 {
11260 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11261 emit_move_insn (gen_rtx_REG (word_mode,
11262 (eax_live ? AX_REG : R10_REG)),
11263 gen_frame_mem (word_mode, t));
11264 }
11265 }
11266 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11267
11268 /* If we havn't already set up the frame pointer, do so now. */
11269 if (frame_pointer_needed && !m->fs.fp_valid)
11270 {
11271 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11272 GEN_INT (frame.stack_pointer_offset
11273 - frame.hard_frame_pointer_offset));
11274 insn = emit_insn (insn);
11275 RTX_FRAME_RELATED_P (insn) = 1;
11276 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11277
11278 if (m->fs.cfa_reg == stack_pointer_rtx)
11279 m->fs.cfa_reg = hard_frame_pointer_rtx;
11280 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11281 m->fs.fp_valid = true;
11282 }
11283
11284 if (!int_registers_saved)
11285 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11286 if (!sse_registers_saved)
11287 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11288
11289 if (crtl->drap_reg && !crtl->stack_realign_needed)
11290 {
11291 /* vDRAP is setup but after reload it turns out stack realign
11292 isn't necessary, here we will emit prologue to setup DRAP
11293 without stack realign adjustment */
11294 t = choose_baseaddr (0);
11295 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11296 }
11297
11298 /* Prevent instructions from being scheduled into register save push
11299 sequence when access to the redzone area is done through frame pointer.
11300 The offset between the frame pointer and the stack pointer is calculated
11301 relative to the value of the stack pointer at the end of the function
11302 prologue, and moving instructions that access redzone area via frame
11303 pointer inside push sequence violates this assumption. */
11304 if (frame_pointer_needed && frame.red_zone_size)
11305 emit_insn (gen_memory_blockage ());
11306
11307 /* Emit cld instruction if stringops are used in the function. */
11308 if (TARGET_CLD && ix86_current_function_needs_cld)
11309 emit_insn (gen_cld ());
11310
11311 /* SEH requires that the prologue end within 256 bytes of the start of
11312 the function. Prevent instruction schedules that would extend that.
11313 Further, prevent alloca modifications to the stack pointer from being
11314 combined with prologue modifications. */
11315 if (TARGET_SEH)
11316 emit_insn (gen_prologue_use (stack_pointer_rtx));
11317 }
11318
11319 /* Emit code to restore REG using a POP insn. */
11320
11321 static void
11322 ix86_emit_restore_reg_using_pop (rtx reg)
11323 {
11324 struct machine_function *m = cfun->machine;
11325 rtx insn = emit_insn (gen_pop (reg));
11326
11327 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11328 m->fs.sp_offset -= UNITS_PER_WORD;
11329
11330 if (m->fs.cfa_reg == crtl->drap_reg
11331 && REGNO (reg) == REGNO (crtl->drap_reg))
11332 {
11333 /* Previously we'd represented the CFA as an expression
11334 like *(%ebp - 8). We've just popped that value from
11335 the stack, which means we need to reset the CFA to
11336 the drap register. This will remain until we restore
11337 the stack pointer. */
11338 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11339 RTX_FRAME_RELATED_P (insn) = 1;
11340
11341 /* This means that the DRAP register is valid for addressing too. */
11342 m->fs.drap_valid = true;
11343 return;
11344 }
11345
11346 if (m->fs.cfa_reg == stack_pointer_rtx)
11347 {
11348 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11349 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11350 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11351 RTX_FRAME_RELATED_P (insn) = 1;
11352
11353 m->fs.cfa_offset -= UNITS_PER_WORD;
11354 }
11355
11356 /* When the frame pointer is the CFA, and we pop it, we are
11357 swapping back to the stack pointer as the CFA. This happens
11358 for stack frames that don't allocate other data, so we assume
11359 the stack pointer is now pointing at the return address, i.e.
11360 the function entry state, which makes the offset be 1 word. */
11361 if (reg == hard_frame_pointer_rtx)
11362 {
11363 m->fs.fp_valid = false;
11364 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11365 {
11366 m->fs.cfa_reg = stack_pointer_rtx;
11367 m->fs.cfa_offset -= UNITS_PER_WORD;
11368
11369 add_reg_note (insn, REG_CFA_DEF_CFA,
11370 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11371 GEN_INT (m->fs.cfa_offset)));
11372 RTX_FRAME_RELATED_P (insn) = 1;
11373 }
11374 }
11375 }
11376
11377 /* Emit code to restore saved registers using POP insns. */
11378
11379 static void
11380 ix86_emit_restore_regs_using_pop (void)
11381 {
11382 unsigned int regno;
11383
11384 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11385 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11386 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11387 }
11388
11389 /* Emit code and notes for the LEAVE instruction. */
11390
11391 static void
11392 ix86_emit_leave (void)
11393 {
11394 struct machine_function *m = cfun->machine;
11395 rtx insn = emit_insn (ix86_gen_leave ());
11396
11397 ix86_add_queued_cfa_restore_notes (insn);
11398
11399 gcc_assert (m->fs.fp_valid);
11400 m->fs.sp_valid = true;
11401 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11402 m->fs.fp_valid = false;
11403
11404 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11405 {
11406 m->fs.cfa_reg = stack_pointer_rtx;
11407 m->fs.cfa_offset = m->fs.sp_offset;
11408
11409 add_reg_note (insn, REG_CFA_DEF_CFA,
11410 plus_constant (Pmode, stack_pointer_rtx,
11411 m->fs.sp_offset));
11412 RTX_FRAME_RELATED_P (insn) = 1;
11413 }
11414 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11415 m->fs.fp_offset);
11416 }
11417
11418 /* Emit code to restore saved registers using MOV insns.
11419 First register is restored from CFA - CFA_OFFSET. */
11420 static void
11421 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11422 bool maybe_eh_return)
11423 {
11424 struct machine_function *m = cfun->machine;
11425 unsigned int regno;
11426
11427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11428 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11429 {
11430 rtx reg = gen_rtx_REG (word_mode, regno);
11431 rtx insn, mem;
11432
11433 mem = choose_baseaddr (cfa_offset);
11434 mem = gen_frame_mem (word_mode, mem);
11435 insn = emit_move_insn (reg, mem);
11436
11437 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11438 {
11439 /* Previously we'd represented the CFA as an expression
11440 like *(%ebp - 8). We've just popped that value from
11441 the stack, which means we need to reset the CFA to
11442 the drap register. This will remain until we restore
11443 the stack pointer. */
11444 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11445 RTX_FRAME_RELATED_P (insn) = 1;
11446
11447 /* This means that the DRAP register is valid for addressing. */
11448 m->fs.drap_valid = true;
11449 }
11450 else
11451 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11452
11453 cfa_offset -= UNITS_PER_WORD;
11454 }
11455 }
11456
11457 /* Emit code to restore saved registers using MOV insns.
11458 First register is restored from CFA - CFA_OFFSET. */
11459 static void
11460 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11461 bool maybe_eh_return)
11462 {
11463 unsigned int regno;
11464
11465 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11466 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11467 {
11468 rtx reg = gen_rtx_REG (V4SFmode, regno);
11469 rtx mem;
11470
11471 mem = choose_baseaddr (cfa_offset);
11472 mem = gen_rtx_MEM (V4SFmode, mem);
11473 set_mem_align (mem, 128);
11474 emit_move_insn (reg, mem);
11475
11476 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11477
11478 cfa_offset -= 16;
11479 }
11480 }
11481
11482 /* Restore function stack, frame, and registers. */
11483
11484 void
11485 ix86_expand_epilogue (int style)
11486 {
11487 struct machine_function *m = cfun->machine;
11488 struct machine_frame_state frame_state_save = m->fs;
11489 struct ix86_frame frame;
11490 bool restore_regs_via_mov;
11491 bool using_drap;
11492
11493 ix86_finalize_stack_realign_flags ();
11494 ix86_compute_frame_layout (&frame);
11495
11496 m->fs.sp_valid = (!frame_pointer_needed
11497 || (crtl->sp_is_unchanging
11498 && !stack_realign_fp));
11499 gcc_assert (!m->fs.sp_valid
11500 || m->fs.sp_offset == frame.stack_pointer_offset);
11501
11502 /* The FP must be valid if the frame pointer is present. */
11503 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11504 gcc_assert (!m->fs.fp_valid
11505 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11506
11507 /* We must have *some* valid pointer to the stack frame. */
11508 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11509
11510 /* The DRAP is never valid at this point. */
11511 gcc_assert (!m->fs.drap_valid);
11512
11513 /* See the comment about red zone and frame
11514 pointer usage in ix86_expand_prologue. */
11515 if (frame_pointer_needed && frame.red_zone_size)
11516 emit_insn (gen_memory_blockage ());
11517
11518 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11519 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11520
11521 /* Determine the CFA offset of the end of the red-zone. */
11522 m->fs.red_zone_offset = 0;
11523 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11524 {
11525 /* The red-zone begins below the return address. */
11526 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11527
11528 /* When the register save area is in the aligned portion of
11529 the stack, determine the maximum runtime displacement that
11530 matches up with the aligned frame. */
11531 if (stack_realign_drap)
11532 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11533 + UNITS_PER_WORD);
11534 }
11535
11536 /* Special care must be taken for the normal return case of a function
11537 using eh_return: the eax and edx registers are marked as saved, but
11538 not restored along this path. Adjust the save location to match. */
11539 if (crtl->calls_eh_return && style != 2)
11540 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11541
11542 /* EH_RETURN requires the use of moves to function properly. */
11543 if (crtl->calls_eh_return)
11544 restore_regs_via_mov = true;
11545 /* SEH requires the use of pops to identify the epilogue. */
11546 else if (TARGET_SEH)
11547 restore_regs_via_mov = false;
11548 /* If we're only restoring one register and sp is not valid then
11549 using a move instruction to restore the register since it's
11550 less work than reloading sp and popping the register. */
11551 else if (!m->fs.sp_valid && frame.nregs <= 1)
11552 restore_regs_via_mov = true;
11553 else if (TARGET_EPILOGUE_USING_MOVE
11554 && cfun->machine->use_fast_prologue_epilogue
11555 && (frame.nregs > 1
11556 || m->fs.sp_offset != frame.reg_save_offset))
11557 restore_regs_via_mov = true;
11558 else if (frame_pointer_needed
11559 && !frame.nregs
11560 && m->fs.sp_offset != frame.reg_save_offset)
11561 restore_regs_via_mov = true;
11562 else if (frame_pointer_needed
11563 && TARGET_USE_LEAVE
11564 && cfun->machine->use_fast_prologue_epilogue
11565 && frame.nregs == 1)
11566 restore_regs_via_mov = true;
11567 else
11568 restore_regs_via_mov = false;
11569
11570 if (restore_regs_via_mov || frame.nsseregs)
11571 {
11572 /* Ensure that the entire register save area is addressable via
11573 the stack pointer, if we will restore via sp. */
11574 if (TARGET_64BIT
11575 && m->fs.sp_offset > 0x7fffffff
11576 && !(m->fs.fp_valid || m->fs.drap_valid)
11577 && (frame.nsseregs + frame.nregs) != 0)
11578 {
11579 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11580 GEN_INT (m->fs.sp_offset
11581 - frame.sse_reg_save_offset),
11582 style,
11583 m->fs.cfa_reg == stack_pointer_rtx);
11584 }
11585 }
11586
11587 /* If there are any SSE registers to restore, then we have to do it
11588 via moves, since there's obviously no pop for SSE regs. */
11589 if (frame.nsseregs)
11590 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11591 style == 2);
11592
11593 if (restore_regs_via_mov)
11594 {
11595 rtx t;
11596
11597 if (frame.nregs)
11598 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11599
11600 /* eh_return epilogues need %ecx added to the stack pointer. */
11601 if (style == 2)
11602 {
11603 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11604
11605 /* Stack align doesn't work with eh_return. */
11606 gcc_assert (!stack_realign_drap);
11607 /* Neither does regparm nested functions. */
11608 gcc_assert (!ix86_static_chain_on_stack);
11609
11610 if (frame_pointer_needed)
11611 {
11612 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11613 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11614 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11615
11616 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11617 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11618
11619 /* Note that we use SA as a temporary CFA, as the return
11620 address is at the proper place relative to it. We
11621 pretend this happens at the FP restore insn because
11622 prior to this insn the FP would be stored at the wrong
11623 offset relative to SA, and after this insn we have no
11624 other reasonable register to use for the CFA. We don't
11625 bother resetting the CFA to the SP for the duration of
11626 the return insn. */
11627 add_reg_note (insn, REG_CFA_DEF_CFA,
11628 plus_constant (Pmode, sa, UNITS_PER_WORD));
11629 ix86_add_queued_cfa_restore_notes (insn);
11630 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11631 RTX_FRAME_RELATED_P (insn) = 1;
11632
11633 m->fs.cfa_reg = sa;
11634 m->fs.cfa_offset = UNITS_PER_WORD;
11635 m->fs.fp_valid = false;
11636
11637 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11638 const0_rtx, style, false);
11639 }
11640 else
11641 {
11642 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11643 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11644 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11645 ix86_add_queued_cfa_restore_notes (insn);
11646
11647 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11648 if (m->fs.cfa_offset != UNITS_PER_WORD)
11649 {
11650 m->fs.cfa_offset = UNITS_PER_WORD;
11651 add_reg_note (insn, REG_CFA_DEF_CFA,
11652 plus_constant (Pmode, stack_pointer_rtx,
11653 UNITS_PER_WORD));
11654 RTX_FRAME_RELATED_P (insn) = 1;
11655 }
11656 }
11657 m->fs.sp_offset = UNITS_PER_WORD;
11658 m->fs.sp_valid = true;
11659 }
11660 }
11661 else
11662 {
11663 /* SEH requires that the function end with (1) a stack adjustment
11664 if necessary, (2) a sequence of pops, and (3) a return or
11665 jump instruction. Prevent insns from the function body from
11666 being scheduled into this sequence. */
11667 if (TARGET_SEH)
11668 {
11669 /* Prevent a catch region from being adjacent to the standard
11670 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11671 several other flags that would be interesting to test are
11672 not yet set up. */
11673 if (flag_non_call_exceptions)
11674 emit_insn (gen_nops (const1_rtx));
11675 else
11676 emit_insn (gen_blockage ());
11677 }
11678
11679 /* First step is to deallocate the stack frame so that we can
11680 pop the registers. Also do it on SEH target for very large
11681 frame as the emitted instructions aren't allowed by the ABI in
11682 epilogues. */
11683 if (!m->fs.sp_valid
11684 || (TARGET_SEH
11685 && (m->fs.sp_offset - frame.reg_save_offset
11686 >= SEH_MAX_FRAME_SIZE)))
11687 {
11688 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11689 GEN_INT (m->fs.fp_offset
11690 - frame.reg_save_offset),
11691 style, false);
11692 }
11693 else if (m->fs.sp_offset != frame.reg_save_offset)
11694 {
11695 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11696 GEN_INT (m->fs.sp_offset
11697 - frame.reg_save_offset),
11698 style,
11699 m->fs.cfa_reg == stack_pointer_rtx);
11700 }
11701
11702 ix86_emit_restore_regs_using_pop ();
11703 }
11704
11705 /* If we used a stack pointer and haven't already got rid of it,
11706 then do so now. */
11707 if (m->fs.fp_valid)
11708 {
11709 /* If the stack pointer is valid and pointing at the frame
11710 pointer store address, then we only need a pop. */
11711 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11712 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11713 /* Leave results in shorter dependency chains on CPUs that are
11714 able to grok it fast. */
11715 else if (TARGET_USE_LEAVE
11716 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11717 || !cfun->machine->use_fast_prologue_epilogue)
11718 ix86_emit_leave ();
11719 else
11720 {
11721 pro_epilogue_adjust_stack (stack_pointer_rtx,
11722 hard_frame_pointer_rtx,
11723 const0_rtx, style, !using_drap);
11724 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11725 }
11726 }
11727
11728 if (using_drap)
11729 {
11730 int param_ptr_offset = UNITS_PER_WORD;
11731 rtx insn;
11732
11733 gcc_assert (stack_realign_drap);
11734
11735 if (ix86_static_chain_on_stack)
11736 param_ptr_offset += UNITS_PER_WORD;
11737 if (!call_used_regs[REGNO (crtl->drap_reg)])
11738 param_ptr_offset += UNITS_PER_WORD;
11739
11740 insn = emit_insn (gen_rtx_SET
11741 (VOIDmode, stack_pointer_rtx,
11742 gen_rtx_PLUS (Pmode,
11743 crtl->drap_reg,
11744 GEN_INT (-param_ptr_offset))));
11745 m->fs.cfa_reg = stack_pointer_rtx;
11746 m->fs.cfa_offset = param_ptr_offset;
11747 m->fs.sp_offset = param_ptr_offset;
11748 m->fs.realigned = false;
11749
11750 add_reg_note (insn, REG_CFA_DEF_CFA,
11751 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11752 GEN_INT (param_ptr_offset)));
11753 RTX_FRAME_RELATED_P (insn) = 1;
11754
11755 if (!call_used_regs[REGNO (crtl->drap_reg)])
11756 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11757 }
11758
11759 /* At this point the stack pointer must be valid, and we must have
11760 restored all of the registers. We may not have deallocated the
11761 entire stack frame. We've delayed this until now because it may
11762 be possible to merge the local stack deallocation with the
11763 deallocation forced by ix86_static_chain_on_stack. */
11764 gcc_assert (m->fs.sp_valid);
11765 gcc_assert (!m->fs.fp_valid);
11766 gcc_assert (!m->fs.realigned);
11767 if (m->fs.sp_offset != UNITS_PER_WORD)
11768 {
11769 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11770 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11771 style, true);
11772 }
11773 else
11774 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11775
11776 /* Sibcall epilogues don't want a return instruction. */
11777 if (style == 0)
11778 {
11779 m->fs = frame_state_save;
11780 return;
11781 }
11782
11783 if (crtl->args.pops_args && crtl->args.size)
11784 {
11785 rtx popc = GEN_INT (crtl->args.pops_args);
11786
11787 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11788 address, do explicit add, and jump indirectly to the caller. */
11789
11790 if (crtl->args.pops_args >= 65536)
11791 {
11792 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11793 rtx insn;
11794
11795 /* There is no "pascal" calling convention in any 64bit ABI. */
11796 gcc_assert (!TARGET_64BIT);
11797
11798 insn = emit_insn (gen_pop (ecx));
11799 m->fs.cfa_offset -= UNITS_PER_WORD;
11800 m->fs.sp_offset -= UNITS_PER_WORD;
11801
11802 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11803 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11804 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11805 add_reg_note (insn, REG_CFA_REGISTER,
11806 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11807 RTX_FRAME_RELATED_P (insn) = 1;
11808
11809 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11810 popc, -1, true);
11811 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11812 }
11813 else
11814 emit_jump_insn (gen_simple_return_pop_internal (popc));
11815 }
11816 else
11817 emit_jump_insn (gen_simple_return_internal ());
11818
11819 /* Restore the state back to the state from the prologue,
11820 so that it's correct for the next epilogue. */
11821 m->fs = frame_state_save;
11822 }
11823
11824 /* Reset from the function's potential modifications. */
11825
11826 static void
11827 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11828 {
11829 if (pic_offset_table_rtx
11830 && !ix86_use_pseudo_pic_reg ())
11831 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11832 #if TARGET_MACHO
11833 /* Mach-O doesn't support labels at the end of objects, so if
11834 it looks like we might want one, insert a NOP. */
11835 {
11836 rtx_insn *insn = get_last_insn ();
11837 rtx_insn *deleted_debug_label = NULL;
11838 while (insn
11839 && NOTE_P (insn)
11840 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11841 {
11842 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11843 notes only, instead set their CODE_LABEL_NUMBER to -1,
11844 otherwise there would be code generation differences
11845 in between -g and -g0. */
11846 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11847 deleted_debug_label = insn;
11848 insn = PREV_INSN (insn);
11849 }
11850 if (insn
11851 && (LABEL_P (insn)
11852 || (NOTE_P (insn)
11853 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11854 fputs ("\tnop\n", file);
11855 else if (deleted_debug_label)
11856 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11857 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11858 CODE_LABEL_NUMBER (insn) = -1;
11859 }
11860 #endif
11861
11862 }
11863
11864 /* Return a scratch register to use in the split stack prologue. The
11865 split stack prologue is used for -fsplit-stack. It is the first
11866 instructions in the function, even before the regular prologue.
11867 The scratch register can be any caller-saved register which is not
11868 used for parameters or for the static chain. */
11869
11870 static unsigned int
11871 split_stack_prologue_scratch_regno (void)
11872 {
11873 if (TARGET_64BIT)
11874 return R11_REG;
11875 else
11876 {
11877 bool is_fastcall, is_thiscall;
11878 int regparm;
11879
11880 is_fastcall = (lookup_attribute ("fastcall",
11881 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11882 != NULL);
11883 is_thiscall = (lookup_attribute ("thiscall",
11884 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11885 != NULL);
11886 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11887
11888 if (is_fastcall)
11889 {
11890 if (DECL_STATIC_CHAIN (cfun->decl))
11891 {
11892 sorry ("-fsplit-stack does not support fastcall with "
11893 "nested function");
11894 return INVALID_REGNUM;
11895 }
11896 return AX_REG;
11897 }
11898 else if (is_thiscall)
11899 {
11900 if (!DECL_STATIC_CHAIN (cfun->decl))
11901 return DX_REG;
11902 return AX_REG;
11903 }
11904 else if (regparm < 3)
11905 {
11906 if (!DECL_STATIC_CHAIN (cfun->decl))
11907 return CX_REG;
11908 else
11909 {
11910 if (regparm >= 2)
11911 {
11912 sorry ("-fsplit-stack does not support 2 register "
11913 "parameters for a nested function");
11914 return INVALID_REGNUM;
11915 }
11916 return DX_REG;
11917 }
11918 }
11919 else
11920 {
11921 /* FIXME: We could make this work by pushing a register
11922 around the addition and comparison. */
11923 sorry ("-fsplit-stack does not support 3 register parameters");
11924 return INVALID_REGNUM;
11925 }
11926 }
11927 }
11928
11929 /* A SYMBOL_REF for the function which allocates new stackspace for
11930 -fsplit-stack. */
11931
11932 static GTY(()) rtx split_stack_fn;
11933
11934 /* A SYMBOL_REF for the more stack function when using the large
11935 model. */
11936
11937 static GTY(()) rtx split_stack_fn_large;
11938
11939 /* Handle -fsplit-stack. These are the first instructions in the
11940 function, even before the regular prologue. */
11941
11942 void
11943 ix86_expand_split_stack_prologue (void)
11944 {
11945 struct ix86_frame frame;
11946 HOST_WIDE_INT allocate;
11947 unsigned HOST_WIDE_INT args_size;
11948 rtx_code_label *label;
11949 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11950 rtx scratch_reg = NULL_RTX;
11951 rtx_code_label *varargs_label = NULL;
11952 rtx fn;
11953
11954 gcc_assert (flag_split_stack && reload_completed);
11955
11956 ix86_finalize_stack_realign_flags ();
11957 ix86_compute_frame_layout (&frame);
11958 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11959
11960 /* This is the label we will branch to if we have enough stack
11961 space. We expect the basic block reordering pass to reverse this
11962 branch if optimizing, so that we branch in the unlikely case. */
11963 label = gen_label_rtx ();
11964
11965 /* We need to compare the stack pointer minus the frame size with
11966 the stack boundary in the TCB. The stack boundary always gives
11967 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11968 can compare directly. Otherwise we need to do an addition. */
11969
11970 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11971 UNSPEC_STACK_CHECK);
11972 limit = gen_rtx_CONST (Pmode, limit);
11973 limit = gen_rtx_MEM (Pmode, limit);
11974 if (allocate < SPLIT_STACK_AVAILABLE)
11975 current = stack_pointer_rtx;
11976 else
11977 {
11978 unsigned int scratch_regno;
11979 rtx offset;
11980
11981 /* We need a scratch register to hold the stack pointer minus
11982 the required frame size. Since this is the very start of the
11983 function, the scratch register can be any caller-saved
11984 register which is not used for parameters. */
11985 offset = GEN_INT (- allocate);
11986 scratch_regno = split_stack_prologue_scratch_regno ();
11987 if (scratch_regno == INVALID_REGNUM)
11988 return;
11989 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11990 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11991 {
11992 /* We don't use ix86_gen_add3 in this case because it will
11993 want to split to lea, but when not optimizing the insn
11994 will not be split after this point. */
11995 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11996 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11997 offset)));
11998 }
11999 else
12000 {
12001 emit_move_insn (scratch_reg, offset);
12002 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12003 stack_pointer_rtx));
12004 }
12005 current = scratch_reg;
12006 }
12007
12008 ix86_expand_branch (GEU, current, limit, label);
12009 jump_insn = get_last_insn ();
12010 JUMP_LABEL (jump_insn) = label;
12011
12012 /* Mark the jump as very likely to be taken. */
12013 add_int_reg_note (jump_insn, REG_BR_PROB,
12014 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12015
12016 if (split_stack_fn == NULL_RTX)
12017 {
12018 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12019 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12020 }
12021 fn = split_stack_fn;
12022
12023 /* Get more stack space. We pass in the desired stack space and the
12024 size of the arguments to copy to the new stack. In 32-bit mode
12025 we push the parameters; __morestack will return on a new stack
12026 anyhow. In 64-bit mode we pass the parameters in r10 and
12027 r11. */
12028 allocate_rtx = GEN_INT (allocate);
12029 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12030 call_fusage = NULL_RTX;
12031 if (TARGET_64BIT)
12032 {
12033 rtx reg10, reg11;
12034
12035 reg10 = gen_rtx_REG (Pmode, R10_REG);
12036 reg11 = gen_rtx_REG (Pmode, R11_REG);
12037
12038 /* If this function uses a static chain, it will be in %r10.
12039 Preserve it across the call to __morestack. */
12040 if (DECL_STATIC_CHAIN (cfun->decl))
12041 {
12042 rtx rax;
12043
12044 rax = gen_rtx_REG (word_mode, AX_REG);
12045 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12046 use_reg (&call_fusage, rax);
12047 }
12048
12049 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12050 && !TARGET_PECOFF)
12051 {
12052 HOST_WIDE_INT argval;
12053
12054 gcc_assert (Pmode == DImode);
12055 /* When using the large model we need to load the address
12056 into a register, and we've run out of registers. So we
12057 switch to a different calling convention, and we call a
12058 different function: __morestack_large. We pass the
12059 argument size in the upper 32 bits of r10 and pass the
12060 frame size in the lower 32 bits. */
12061 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12062 gcc_assert ((args_size & 0xffffffff) == args_size);
12063
12064 if (split_stack_fn_large == NULL_RTX)
12065 {
12066 split_stack_fn_large =
12067 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12068 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12069 }
12070 if (ix86_cmodel == CM_LARGE_PIC)
12071 {
12072 rtx_code_label *label;
12073 rtx x;
12074
12075 label = gen_label_rtx ();
12076 emit_label (label);
12077 LABEL_PRESERVE_P (label) = 1;
12078 emit_insn (gen_set_rip_rex64 (reg10, label));
12079 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12080 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12081 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12082 UNSPEC_GOT);
12083 x = gen_rtx_CONST (Pmode, x);
12084 emit_move_insn (reg11, x);
12085 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12086 x = gen_const_mem (Pmode, x);
12087 emit_move_insn (reg11, x);
12088 }
12089 else
12090 emit_move_insn (reg11, split_stack_fn_large);
12091
12092 fn = reg11;
12093
12094 argval = ((args_size << 16) << 16) + allocate;
12095 emit_move_insn (reg10, GEN_INT (argval));
12096 }
12097 else
12098 {
12099 emit_move_insn (reg10, allocate_rtx);
12100 emit_move_insn (reg11, GEN_INT (args_size));
12101 use_reg (&call_fusage, reg11);
12102 }
12103
12104 use_reg (&call_fusage, reg10);
12105 }
12106 else
12107 {
12108 emit_insn (gen_push (GEN_INT (args_size)));
12109 emit_insn (gen_push (allocate_rtx));
12110 }
12111 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12112 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12113 NULL_RTX, false);
12114 add_function_usage_to (call_insn, call_fusage);
12115
12116 /* In order to make call/return prediction work right, we now need
12117 to execute a return instruction. See
12118 libgcc/config/i386/morestack.S for the details on how this works.
12119
12120 For flow purposes gcc must not see this as a return
12121 instruction--we need control flow to continue at the subsequent
12122 label. Therefore, we use an unspec. */
12123 gcc_assert (crtl->args.pops_args < 65536);
12124 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12125
12126 /* If we are in 64-bit mode and this function uses a static chain,
12127 we saved %r10 in %rax before calling _morestack. */
12128 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12129 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12130 gen_rtx_REG (word_mode, AX_REG));
12131
12132 /* If this function calls va_start, we need to store a pointer to
12133 the arguments on the old stack, because they may not have been
12134 all copied to the new stack. At this point the old stack can be
12135 found at the frame pointer value used by __morestack, because
12136 __morestack has set that up before calling back to us. Here we
12137 store that pointer in a scratch register, and in
12138 ix86_expand_prologue we store the scratch register in a stack
12139 slot. */
12140 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12141 {
12142 unsigned int scratch_regno;
12143 rtx frame_reg;
12144 int words;
12145
12146 scratch_regno = split_stack_prologue_scratch_regno ();
12147 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12148 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12149
12150 /* 64-bit:
12151 fp -> old fp value
12152 return address within this function
12153 return address of caller of this function
12154 stack arguments
12155 So we add three words to get to the stack arguments.
12156
12157 32-bit:
12158 fp -> old fp value
12159 return address within this function
12160 first argument to __morestack
12161 second argument to __morestack
12162 return address of caller of this function
12163 stack arguments
12164 So we add five words to get to the stack arguments.
12165 */
12166 words = TARGET_64BIT ? 3 : 5;
12167 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12168 gen_rtx_PLUS (Pmode, frame_reg,
12169 GEN_INT (words * UNITS_PER_WORD))));
12170
12171 varargs_label = gen_label_rtx ();
12172 emit_jump_insn (gen_jump (varargs_label));
12173 JUMP_LABEL (get_last_insn ()) = varargs_label;
12174
12175 emit_barrier ();
12176 }
12177
12178 emit_label (label);
12179 LABEL_NUSES (label) = 1;
12180
12181 /* If this function calls va_start, we now have to set the scratch
12182 register for the case where we do not call __morestack. In this
12183 case we need to set it based on the stack pointer. */
12184 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12185 {
12186 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12187 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12188 GEN_INT (UNITS_PER_WORD))));
12189
12190 emit_label (varargs_label);
12191 LABEL_NUSES (varargs_label) = 1;
12192 }
12193 }
12194
12195 /* We may have to tell the dataflow pass that the split stack prologue
12196 is initializing a scratch register. */
12197
12198 static void
12199 ix86_live_on_entry (bitmap regs)
12200 {
12201 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12202 {
12203 gcc_assert (flag_split_stack);
12204 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12205 }
12206 }
12207 \f
12208 /* Extract the parts of an RTL expression that is a valid memory address
12209 for an instruction. Return 0 if the structure of the address is
12210 grossly off. Return -1 if the address contains ASHIFT, so it is not
12211 strictly valid, but still used for computing length of lea instruction. */
12212
12213 int
12214 ix86_decompose_address (rtx addr, struct ix86_address *out)
12215 {
12216 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12217 rtx base_reg, index_reg;
12218 HOST_WIDE_INT scale = 1;
12219 rtx scale_rtx = NULL_RTX;
12220 rtx tmp;
12221 int retval = 1;
12222 enum ix86_address_seg seg = SEG_DEFAULT;
12223
12224 /* Allow zero-extended SImode addresses,
12225 they will be emitted with addr32 prefix. */
12226 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12227 {
12228 if (GET_CODE (addr) == ZERO_EXTEND
12229 && GET_MODE (XEXP (addr, 0)) == SImode)
12230 {
12231 addr = XEXP (addr, 0);
12232 if (CONST_INT_P (addr))
12233 return 0;
12234 }
12235 else if (GET_CODE (addr) == AND
12236 && const_32bit_mask (XEXP (addr, 1), DImode))
12237 {
12238 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12239 if (addr == NULL_RTX)
12240 return 0;
12241
12242 if (CONST_INT_P (addr))
12243 return 0;
12244 }
12245 }
12246
12247 /* Allow SImode subregs of DImode addresses,
12248 they will be emitted with addr32 prefix. */
12249 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12250 {
12251 if (GET_CODE (addr) == SUBREG
12252 && GET_MODE (SUBREG_REG (addr)) == DImode)
12253 {
12254 addr = SUBREG_REG (addr);
12255 if (CONST_INT_P (addr))
12256 return 0;
12257 }
12258 }
12259
12260 if (REG_P (addr))
12261 base = addr;
12262 else if (GET_CODE (addr) == SUBREG)
12263 {
12264 if (REG_P (SUBREG_REG (addr)))
12265 base = addr;
12266 else
12267 return 0;
12268 }
12269 else if (GET_CODE (addr) == PLUS)
12270 {
12271 rtx addends[4], op;
12272 int n = 0, i;
12273
12274 op = addr;
12275 do
12276 {
12277 if (n >= 4)
12278 return 0;
12279 addends[n++] = XEXP (op, 1);
12280 op = XEXP (op, 0);
12281 }
12282 while (GET_CODE (op) == PLUS);
12283 if (n >= 4)
12284 return 0;
12285 addends[n] = op;
12286
12287 for (i = n; i >= 0; --i)
12288 {
12289 op = addends[i];
12290 switch (GET_CODE (op))
12291 {
12292 case MULT:
12293 if (index)
12294 return 0;
12295 index = XEXP (op, 0);
12296 scale_rtx = XEXP (op, 1);
12297 break;
12298
12299 case ASHIFT:
12300 if (index)
12301 return 0;
12302 index = XEXP (op, 0);
12303 tmp = XEXP (op, 1);
12304 if (!CONST_INT_P (tmp))
12305 return 0;
12306 scale = INTVAL (tmp);
12307 if ((unsigned HOST_WIDE_INT) scale > 3)
12308 return 0;
12309 scale = 1 << scale;
12310 break;
12311
12312 case ZERO_EXTEND:
12313 op = XEXP (op, 0);
12314 if (GET_CODE (op) != UNSPEC)
12315 return 0;
12316 /* FALLTHRU */
12317
12318 case UNSPEC:
12319 if (XINT (op, 1) == UNSPEC_TP
12320 && TARGET_TLS_DIRECT_SEG_REFS
12321 && seg == SEG_DEFAULT)
12322 seg = DEFAULT_TLS_SEG_REG;
12323 else
12324 return 0;
12325 break;
12326
12327 case SUBREG:
12328 if (!REG_P (SUBREG_REG (op)))
12329 return 0;
12330 /* FALLTHRU */
12331
12332 case REG:
12333 if (!base)
12334 base = op;
12335 else if (!index)
12336 index = op;
12337 else
12338 return 0;
12339 break;
12340
12341 case CONST:
12342 case CONST_INT:
12343 case SYMBOL_REF:
12344 case LABEL_REF:
12345 if (disp)
12346 return 0;
12347 disp = op;
12348 break;
12349
12350 default:
12351 return 0;
12352 }
12353 }
12354 }
12355 else if (GET_CODE (addr) == MULT)
12356 {
12357 index = XEXP (addr, 0); /* index*scale */
12358 scale_rtx = XEXP (addr, 1);
12359 }
12360 else if (GET_CODE (addr) == ASHIFT)
12361 {
12362 /* We're called for lea too, which implements ashift on occasion. */
12363 index = XEXP (addr, 0);
12364 tmp = XEXP (addr, 1);
12365 if (!CONST_INT_P (tmp))
12366 return 0;
12367 scale = INTVAL (tmp);
12368 if ((unsigned HOST_WIDE_INT) scale > 3)
12369 return 0;
12370 scale = 1 << scale;
12371 retval = -1;
12372 }
12373 else
12374 disp = addr; /* displacement */
12375
12376 if (index)
12377 {
12378 if (REG_P (index))
12379 ;
12380 else if (GET_CODE (index) == SUBREG
12381 && REG_P (SUBREG_REG (index)))
12382 ;
12383 else
12384 return 0;
12385 }
12386
12387 /* Extract the integral value of scale. */
12388 if (scale_rtx)
12389 {
12390 if (!CONST_INT_P (scale_rtx))
12391 return 0;
12392 scale = INTVAL (scale_rtx);
12393 }
12394
12395 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12396 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12397
12398 /* Avoid useless 0 displacement. */
12399 if (disp == const0_rtx && (base || index))
12400 disp = NULL_RTX;
12401
12402 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12403 if (base_reg && index_reg && scale == 1
12404 && (index_reg == arg_pointer_rtx
12405 || index_reg == frame_pointer_rtx
12406 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12407 {
12408 rtx tmp;
12409 tmp = base, base = index, index = tmp;
12410 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12411 }
12412
12413 /* Special case: %ebp cannot be encoded as a base without a displacement.
12414 Similarly %r13. */
12415 if (!disp
12416 && base_reg
12417 && (base_reg == hard_frame_pointer_rtx
12418 || base_reg == frame_pointer_rtx
12419 || base_reg == arg_pointer_rtx
12420 || (REG_P (base_reg)
12421 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12422 || REGNO (base_reg) == R13_REG))))
12423 disp = const0_rtx;
12424
12425 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12426 Avoid this by transforming to [%esi+0].
12427 Reload calls address legitimization without cfun defined, so we need
12428 to test cfun for being non-NULL. */
12429 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12430 && base_reg && !index_reg && !disp
12431 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12432 disp = const0_rtx;
12433
12434 /* Special case: encode reg+reg instead of reg*2. */
12435 if (!base && index && scale == 2)
12436 base = index, base_reg = index_reg, scale = 1;
12437
12438 /* Special case: scaling cannot be encoded without base or displacement. */
12439 if (!base && !disp && index && scale != 1)
12440 disp = const0_rtx;
12441
12442 out->base = base;
12443 out->index = index;
12444 out->disp = disp;
12445 out->scale = scale;
12446 out->seg = seg;
12447
12448 return retval;
12449 }
12450 \f
12451 /* Return cost of the memory address x.
12452 For i386, it is better to use a complex address than let gcc copy
12453 the address into a reg and make a new pseudo. But not if the address
12454 requires to two regs - that would mean more pseudos with longer
12455 lifetimes. */
12456 static int
12457 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12458 {
12459 struct ix86_address parts;
12460 int cost = 1;
12461 int ok = ix86_decompose_address (x, &parts);
12462
12463 gcc_assert (ok);
12464
12465 if (parts.base && GET_CODE (parts.base) == SUBREG)
12466 parts.base = SUBREG_REG (parts.base);
12467 if (parts.index && GET_CODE (parts.index) == SUBREG)
12468 parts.index = SUBREG_REG (parts.index);
12469
12470 /* Attempt to minimize number of registers in the address. */
12471 if ((parts.base
12472 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12473 || (parts.index
12474 && (!REG_P (parts.index)
12475 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12476 cost++;
12477
12478 /* When address base or index is "pic_offset_table_rtx" we don't increase
12479 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12480 itself it most likely means that base or index is not invariant.
12481 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12482 profitable for x86. */
12483 if (parts.base
12484 && (!pic_offset_table_rtx
12485 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12486 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12487 && parts.index
12488 && (!pic_offset_table_rtx
12489 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12490 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12491 && parts.base != parts.index)
12492 cost++;
12493
12494 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12495 since it's predecode logic can't detect the length of instructions
12496 and it degenerates to vector decoded. Increase cost of such
12497 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12498 to split such addresses or even refuse such addresses at all.
12499
12500 Following addressing modes are affected:
12501 [base+scale*index]
12502 [scale*index+disp]
12503 [base+index]
12504
12505 The first and last case may be avoidable by explicitly coding the zero in
12506 memory address, but I don't have AMD-K6 machine handy to check this
12507 theory. */
12508
12509 if (TARGET_K6
12510 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12511 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12512 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12513 cost += 10;
12514
12515 return cost;
12516 }
12517 \f
12518 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12519 this is used for to form addresses to local data when -fPIC is in
12520 use. */
12521
12522 static bool
12523 darwin_local_data_pic (rtx disp)
12524 {
12525 return (GET_CODE (disp) == UNSPEC
12526 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12527 }
12528
12529 /* Determine if a given RTX is a valid constant. We already know this
12530 satisfies CONSTANT_P. */
12531
12532 static bool
12533 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12534 {
12535 switch (GET_CODE (x))
12536 {
12537 case CONST:
12538 x = XEXP (x, 0);
12539
12540 if (GET_CODE (x) == PLUS)
12541 {
12542 if (!CONST_INT_P (XEXP (x, 1)))
12543 return false;
12544 x = XEXP (x, 0);
12545 }
12546
12547 if (TARGET_MACHO && darwin_local_data_pic (x))
12548 return true;
12549
12550 /* Only some unspecs are valid as "constants". */
12551 if (GET_CODE (x) == UNSPEC)
12552 switch (XINT (x, 1))
12553 {
12554 case UNSPEC_GOT:
12555 case UNSPEC_GOTOFF:
12556 case UNSPEC_PLTOFF:
12557 return TARGET_64BIT;
12558 case UNSPEC_TPOFF:
12559 case UNSPEC_NTPOFF:
12560 x = XVECEXP (x, 0, 0);
12561 return (GET_CODE (x) == SYMBOL_REF
12562 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12563 case UNSPEC_DTPOFF:
12564 x = XVECEXP (x, 0, 0);
12565 return (GET_CODE (x) == SYMBOL_REF
12566 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12567 default:
12568 return false;
12569 }
12570
12571 /* We must have drilled down to a symbol. */
12572 if (GET_CODE (x) == LABEL_REF)
12573 return true;
12574 if (GET_CODE (x) != SYMBOL_REF)
12575 return false;
12576 /* FALLTHRU */
12577
12578 case SYMBOL_REF:
12579 /* TLS symbols are never valid. */
12580 if (SYMBOL_REF_TLS_MODEL (x))
12581 return false;
12582
12583 /* DLLIMPORT symbols are never valid. */
12584 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12585 && SYMBOL_REF_DLLIMPORT_P (x))
12586 return false;
12587
12588 #if TARGET_MACHO
12589 /* mdynamic-no-pic */
12590 if (MACHO_DYNAMIC_NO_PIC_P)
12591 return machopic_symbol_defined_p (x);
12592 #endif
12593 break;
12594
12595 case CONST_DOUBLE:
12596 if (GET_MODE (x) == TImode
12597 && x != CONST0_RTX (TImode)
12598 && !TARGET_64BIT)
12599 return false;
12600 break;
12601
12602 case CONST_VECTOR:
12603 if (!standard_sse_constant_p (x))
12604 return false;
12605
12606 default:
12607 break;
12608 }
12609
12610 /* Otherwise we handle everything else in the move patterns. */
12611 return true;
12612 }
12613
12614 /* Determine if it's legal to put X into the constant pool. This
12615 is not possible for the address of thread-local symbols, which
12616 is checked above. */
12617
12618 static bool
12619 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12620 {
12621 /* We can always put integral constants and vectors in memory. */
12622 switch (GET_CODE (x))
12623 {
12624 case CONST_INT:
12625 case CONST_DOUBLE:
12626 case CONST_VECTOR:
12627 return false;
12628
12629 default:
12630 break;
12631 }
12632 return !ix86_legitimate_constant_p (mode, x);
12633 }
12634
12635 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12636 otherwise zero. */
12637
12638 static bool
12639 is_imported_p (rtx x)
12640 {
12641 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12642 || GET_CODE (x) != SYMBOL_REF)
12643 return false;
12644
12645 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12646 }
12647
12648
12649 /* Nonzero if the constant value X is a legitimate general operand
12650 when generating PIC code. It is given that flag_pic is on and
12651 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12652
12653 bool
12654 legitimate_pic_operand_p (rtx x)
12655 {
12656 rtx inner;
12657
12658 switch (GET_CODE (x))
12659 {
12660 case CONST:
12661 inner = XEXP (x, 0);
12662 if (GET_CODE (inner) == PLUS
12663 && CONST_INT_P (XEXP (inner, 1)))
12664 inner = XEXP (inner, 0);
12665
12666 /* Only some unspecs are valid as "constants". */
12667 if (GET_CODE (inner) == UNSPEC)
12668 switch (XINT (inner, 1))
12669 {
12670 case UNSPEC_GOT:
12671 case UNSPEC_GOTOFF:
12672 case UNSPEC_PLTOFF:
12673 return TARGET_64BIT;
12674 case UNSPEC_TPOFF:
12675 x = XVECEXP (inner, 0, 0);
12676 return (GET_CODE (x) == SYMBOL_REF
12677 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12678 case UNSPEC_MACHOPIC_OFFSET:
12679 return legitimate_pic_address_disp_p (x);
12680 default:
12681 return false;
12682 }
12683 /* FALLTHRU */
12684
12685 case SYMBOL_REF:
12686 case LABEL_REF:
12687 return legitimate_pic_address_disp_p (x);
12688
12689 default:
12690 return true;
12691 }
12692 }
12693
12694 /* Determine if a given CONST RTX is a valid memory displacement
12695 in PIC mode. */
12696
12697 bool
12698 legitimate_pic_address_disp_p (rtx disp)
12699 {
12700 bool saw_plus;
12701
12702 /* In 64bit mode we can allow direct addresses of symbols and labels
12703 when they are not dynamic symbols. */
12704 if (TARGET_64BIT)
12705 {
12706 rtx op0 = disp, op1;
12707
12708 switch (GET_CODE (disp))
12709 {
12710 case LABEL_REF:
12711 return true;
12712
12713 case CONST:
12714 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12715 break;
12716 op0 = XEXP (XEXP (disp, 0), 0);
12717 op1 = XEXP (XEXP (disp, 0), 1);
12718 if (!CONST_INT_P (op1)
12719 || INTVAL (op1) >= 16*1024*1024
12720 || INTVAL (op1) < -16*1024*1024)
12721 break;
12722 if (GET_CODE (op0) == LABEL_REF)
12723 return true;
12724 if (GET_CODE (op0) == CONST
12725 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12726 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12727 return true;
12728 if (GET_CODE (op0) == UNSPEC
12729 && XINT (op0, 1) == UNSPEC_PCREL)
12730 return true;
12731 if (GET_CODE (op0) != SYMBOL_REF)
12732 break;
12733 /* FALLTHRU */
12734
12735 case SYMBOL_REF:
12736 /* TLS references should always be enclosed in UNSPEC.
12737 The dllimported symbol needs always to be resolved. */
12738 if (SYMBOL_REF_TLS_MODEL (op0)
12739 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12740 return false;
12741
12742 if (TARGET_PECOFF)
12743 {
12744 if (is_imported_p (op0))
12745 return true;
12746
12747 if (SYMBOL_REF_FAR_ADDR_P (op0)
12748 || !SYMBOL_REF_LOCAL_P (op0))
12749 break;
12750
12751 /* Function-symbols need to be resolved only for
12752 large-model.
12753 For the small-model we don't need to resolve anything
12754 here. */
12755 if ((ix86_cmodel != CM_LARGE_PIC
12756 && SYMBOL_REF_FUNCTION_P (op0))
12757 || ix86_cmodel == CM_SMALL_PIC)
12758 return true;
12759 /* Non-external symbols don't need to be resolved for
12760 large, and medium-model. */
12761 if ((ix86_cmodel == CM_LARGE_PIC
12762 || ix86_cmodel == CM_MEDIUM_PIC)
12763 && !SYMBOL_REF_EXTERNAL_P (op0))
12764 return true;
12765 }
12766 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12767 && SYMBOL_REF_LOCAL_P (op0)
12768 && ix86_cmodel != CM_LARGE_PIC)
12769 return true;
12770 break;
12771
12772 default:
12773 break;
12774 }
12775 }
12776 if (GET_CODE (disp) != CONST)
12777 return false;
12778 disp = XEXP (disp, 0);
12779
12780 if (TARGET_64BIT)
12781 {
12782 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12783 of GOT tables. We should not need these anyway. */
12784 if (GET_CODE (disp) != UNSPEC
12785 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12786 && XINT (disp, 1) != UNSPEC_GOTOFF
12787 && XINT (disp, 1) != UNSPEC_PCREL
12788 && XINT (disp, 1) != UNSPEC_PLTOFF))
12789 return false;
12790
12791 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12792 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12793 return false;
12794 return true;
12795 }
12796
12797 saw_plus = false;
12798 if (GET_CODE (disp) == PLUS)
12799 {
12800 if (!CONST_INT_P (XEXP (disp, 1)))
12801 return false;
12802 disp = XEXP (disp, 0);
12803 saw_plus = true;
12804 }
12805
12806 if (TARGET_MACHO && darwin_local_data_pic (disp))
12807 return true;
12808
12809 if (GET_CODE (disp) != UNSPEC)
12810 return false;
12811
12812 switch (XINT (disp, 1))
12813 {
12814 case UNSPEC_GOT:
12815 if (saw_plus)
12816 return false;
12817 /* We need to check for both symbols and labels because VxWorks loads
12818 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12819 details. */
12820 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12821 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12822 case UNSPEC_GOTOFF:
12823 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12824 While ABI specify also 32bit relocation but we don't produce it in
12825 small PIC model at all. */
12826 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12827 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12828 && !TARGET_64BIT)
12829 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12830 return false;
12831 case UNSPEC_GOTTPOFF:
12832 case UNSPEC_GOTNTPOFF:
12833 case UNSPEC_INDNTPOFF:
12834 if (saw_plus)
12835 return false;
12836 disp = XVECEXP (disp, 0, 0);
12837 return (GET_CODE (disp) == SYMBOL_REF
12838 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12839 case UNSPEC_NTPOFF:
12840 disp = XVECEXP (disp, 0, 0);
12841 return (GET_CODE (disp) == SYMBOL_REF
12842 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12843 case UNSPEC_DTPOFF:
12844 disp = XVECEXP (disp, 0, 0);
12845 return (GET_CODE (disp) == SYMBOL_REF
12846 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12847 }
12848
12849 return false;
12850 }
12851
12852 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12853 replace the input X, or the original X if no replacement is called for.
12854 The output parameter *WIN is 1 if the calling macro should goto WIN,
12855 0 if it should not. */
12856
12857 bool
12858 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12859 int)
12860 {
12861 /* Reload can generate:
12862
12863 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12864 (reg:DI 97))
12865 (reg:DI 2 cx))
12866
12867 This RTX is rejected from ix86_legitimate_address_p due to
12868 non-strictness of base register 97. Following this rejection,
12869 reload pushes all three components into separate registers,
12870 creating invalid memory address RTX.
12871
12872 Following code reloads only the invalid part of the
12873 memory address RTX. */
12874
12875 if (GET_CODE (x) == PLUS
12876 && REG_P (XEXP (x, 1))
12877 && GET_CODE (XEXP (x, 0)) == PLUS
12878 && REG_P (XEXP (XEXP (x, 0), 1)))
12879 {
12880 rtx base, index;
12881 bool something_reloaded = false;
12882
12883 base = XEXP (XEXP (x, 0), 1);
12884 if (!REG_OK_FOR_BASE_STRICT_P (base))
12885 {
12886 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12887 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12888 opnum, (enum reload_type) type);
12889 something_reloaded = true;
12890 }
12891
12892 index = XEXP (x, 1);
12893 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12894 {
12895 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12896 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12897 opnum, (enum reload_type) type);
12898 something_reloaded = true;
12899 }
12900
12901 gcc_assert (something_reloaded);
12902 return true;
12903 }
12904
12905 return false;
12906 }
12907
12908 /* Determine if op is suitable RTX for an address register.
12909 Return naked register if a register or a register subreg is
12910 found, otherwise return NULL_RTX. */
12911
12912 static rtx
12913 ix86_validate_address_register (rtx op)
12914 {
12915 enum machine_mode mode = GET_MODE (op);
12916
12917 /* Only SImode or DImode registers can form the address. */
12918 if (mode != SImode && mode != DImode)
12919 return NULL_RTX;
12920
12921 if (REG_P (op))
12922 return op;
12923 else if (GET_CODE (op) == SUBREG)
12924 {
12925 rtx reg = SUBREG_REG (op);
12926
12927 if (!REG_P (reg))
12928 return NULL_RTX;
12929
12930 mode = GET_MODE (reg);
12931
12932 /* Don't allow SUBREGs that span more than a word. It can
12933 lead to spill failures when the register is one word out
12934 of a two word structure. */
12935 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12936 return NULL_RTX;
12937
12938 /* Allow only SUBREGs of non-eliminable hard registers. */
12939 if (register_no_elim_operand (reg, mode))
12940 return reg;
12941 }
12942
12943 /* Op is not a register. */
12944 return NULL_RTX;
12945 }
12946
12947 /* Recognizes RTL expressions that are valid memory addresses for an
12948 instruction. The MODE argument is the machine mode for the MEM
12949 expression that wants to use this address.
12950
12951 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12952 convert common non-canonical forms to canonical form so that they will
12953 be recognized. */
12954
12955 static bool
12956 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12957 {
12958 struct ix86_address parts;
12959 rtx base, index, disp;
12960 HOST_WIDE_INT scale;
12961 enum ix86_address_seg seg;
12962
12963 if (ix86_decompose_address (addr, &parts) <= 0)
12964 /* Decomposition failed. */
12965 return false;
12966
12967 base = parts.base;
12968 index = parts.index;
12969 disp = parts.disp;
12970 scale = parts.scale;
12971 seg = parts.seg;
12972
12973 /* Validate base register. */
12974 if (base)
12975 {
12976 rtx reg = ix86_validate_address_register (base);
12977
12978 if (reg == NULL_RTX)
12979 return false;
12980
12981 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12982 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12983 /* Base is not valid. */
12984 return false;
12985 }
12986
12987 /* Validate index register. */
12988 if (index)
12989 {
12990 rtx reg = ix86_validate_address_register (index);
12991
12992 if (reg == NULL_RTX)
12993 return false;
12994
12995 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12996 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12997 /* Index is not valid. */
12998 return false;
12999 }
13000
13001 /* Index and base should have the same mode. */
13002 if (base && index
13003 && GET_MODE (base) != GET_MODE (index))
13004 return false;
13005
13006 /* Address override works only on the (%reg) part of %fs:(%reg). */
13007 if (seg != SEG_DEFAULT
13008 && ((base && GET_MODE (base) != word_mode)
13009 || (index && GET_MODE (index) != word_mode)))
13010 return false;
13011
13012 /* Validate scale factor. */
13013 if (scale != 1)
13014 {
13015 if (!index)
13016 /* Scale without index. */
13017 return false;
13018
13019 if (scale != 2 && scale != 4 && scale != 8)
13020 /* Scale is not a valid multiplier. */
13021 return false;
13022 }
13023
13024 /* Validate displacement. */
13025 if (disp)
13026 {
13027 if (GET_CODE (disp) == CONST
13028 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13029 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13030 switch (XINT (XEXP (disp, 0), 1))
13031 {
13032 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13033 used. While ABI specify also 32bit relocations, we don't produce
13034 them at all and use IP relative instead. */
13035 case UNSPEC_GOT:
13036 case UNSPEC_GOTOFF:
13037 gcc_assert (flag_pic);
13038 if (!TARGET_64BIT)
13039 goto is_legitimate_pic;
13040
13041 /* 64bit address unspec. */
13042 return false;
13043
13044 case UNSPEC_GOTPCREL:
13045 case UNSPEC_PCREL:
13046 gcc_assert (flag_pic);
13047 goto is_legitimate_pic;
13048
13049 case UNSPEC_GOTTPOFF:
13050 case UNSPEC_GOTNTPOFF:
13051 case UNSPEC_INDNTPOFF:
13052 case UNSPEC_NTPOFF:
13053 case UNSPEC_DTPOFF:
13054 break;
13055
13056 case UNSPEC_STACK_CHECK:
13057 gcc_assert (flag_split_stack);
13058 break;
13059
13060 default:
13061 /* Invalid address unspec. */
13062 return false;
13063 }
13064
13065 else if (SYMBOLIC_CONST (disp)
13066 && (flag_pic
13067 || (TARGET_MACHO
13068 #if TARGET_MACHO
13069 && MACHOPIC_INDIRECT
13070 && !machopic_operand_p (disp)
13071 #endif
13072 )))
13073 {
13074
13075 is_legitimate_pic:
13076 if (TARGET_64BIT && (index || base))
13077 {
13078 /* foo@dtpoff(%rX) is ok. */
13079 if (GET_CODE (disp) != CONST
13080 || GET_CODE (XEXP (disp, 0)) != PLUS
13081 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13082 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13083 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13084 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13085 /* Non-constant pic memory reference. */
13086 return false;
13087 }
13088 else if ((!TARGET_MACHO || flag_pic)
13089 && ! legitimate_pic_address_disp_p (disp))
13090 /* Displacement is an invalid pic construct. */
13091 return false;
13092 #if TARGET_MACHO
13093 else if (MACHO_DYNAMIC_NO_PIC_P
13094 && !ix86_legitimate_constant_p (Pmode, disp))
13095 /* displacment must be referenced via non_lazy_pointer */
13096 return false;
13097 #endif
13098
13099 /* This code used to verify that a symbolic pic displacement
13100 includes the pic_offset_table_rtx register.
13101
13102 While this is good idea, unfortunately these constructs may
13103 be created by "adds using lea" optimization for incorrect
13104 code like:
13105
13106 int a;
13107 int foo(int i)
13108 {
13109 return *(&a+i);
13110 }
13111
13112 This code is nonsensical, but results in addressing
13113 GOT table with pic_offset_table_rtx base. We can't
13114 just refuse it easily, since it gets matched by
13115 "addsi3" pattern, that later gets split to lea in the
13116 case output register differs from input. While this
13117 can be handled by separate addsi pattern for this case
13118 that never results in lea, this seems to be easier and
13119 correct fix for crash to disable this test. */
13120 }
13121 else if (GET_CODE (disp) != LABEL_REF
13122 && !CONST_INT_P (disp)
13123 && (GET_CODE (disp) != CONST
13124 || !ix86_legitimate_constant_p (Pmode, disp))
13125 && (GET_CODE (disp) != SYMBOL_REF
13126 || !ix86_legitimate_constant_p (Pmode, disp)))
13127 /* Displacement is not constant. */
13128 return false;
13129 else if (TARGET_64BIT
13130 && !x86_64_immediate_operand (disp, VOIDmode))
13131 /* Displacement is out of range. */
13132 return false;
13133 /* In x32 mode, constant addresses are sign extended to 64bit, so
13134 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13135 else if (TARGET_X32 && !(index || base)
13136 && CONST_INT_P (disp)
13137 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13138 return false;
13139 }
13140
13141 /* Everything looks valid. */
13142 return true;
13143 }
13144
13145 /* Determine if a given RTX is a valid constant address. */
13146
13147 bool
13148 constant_address_p (rtx x)
13149 {
13150 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13151 }
13152 \f
13153 /* Return a unique alias set for the GOT. */
13154
13155 static alias_set_type
13156 ix86_GOT_alias_set (void)
13157 {
13158 static alias_set_type set = -1;
13159 if (set == -1)
13160 set = new_alias_set ();
13161 return set;
13162 }
13163
13164 /* Set regs_ever_live for PIC base address register
13165 to true if required. */
13166 static void
13167 set_pic_reg_ever_live ()
13168 {
13169 if (reload_in_progress)
13170 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13171 }
13172
13173 /* Return a legitimate reference for ORIG (an address) using the
13174 register REG. If REG is 0, a new pseudo is generated.
13175
13176 There are two types of references that must be handled:
13177
13178 1. Global data references must load the address from the GOT, via
13179 the PIC reg. An insn is emitted to do this load, and the reg is
13180 returned.
13181
13182 2. Static data references, constant pool addresses, and code labels
13183 compute the address as an offset from the GOT, whose base is in
13184 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13185 differentiate them from global data objects. The returned
13186 address is the PIC reg + an unspec constant.
13187
13188 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13189 reg also appears in the address. */
13190
13191 static rtx
13192 legitimize_pic_address (rtx orig, rtx reg)
13193 {
13194 rtx addr = orig;
13195 rtx new_rtx = orig;
13196
13197 #if TARGET_MACHO
13198 if (TARGET_MACHO && !TARGET_64BIT)
13199 {
13200 if (reg == 0)
13201 reg = gen_reg_rtx (Pmode);
13202 /* Use the generic Mach-O PIC machinery. */
13203 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13204 }
13205 #endif
13206
13207 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13208 {
13209 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13210 if (tmp)
13211 return tmp;
13212 }
13213
13214 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13215 new_rtx = addr;
13216 else if (TARGET_64BIT && !TARGET_PECOFF
13217 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13218 {
13219 rtx tmpreg;
13220 /* This symbol may be referenced via a displacement from the PIC
13221 base address (@GOTOFF). */
13222
13223 set_pic_reg_ever_live ();
13224 if (GET_CODE (addr) == CONST)
13225 addr = XEXP (addr, 0);
13226 if (GET_CODE (addr) == PLUS)
13227 {
13228 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13229 UNSPEC_GOTOFF);
13230 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13231 }
13232 else
13233 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13234 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13235 if (!reg)
13236 tmpreg = gen_reg_rtx (Pmode);
13237 else
13238 tmpreg = reg;
13239 emit_move_insn (tmpreg, new_rtx);
13240
13241 if (reg != 0)
13242 {
13243 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13244 tmpreg, 1, OPTAB_DIRECT);
13245 new_rtx = reg;
13246 }
13247 else
13248 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13249 }
13250 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13251 {
13252 /* This symbol may be referenced via a displacement from the PIC
13253 base address (@GOTOFF). */
13254
13255 set_pic_reg_ever_live ();
13256 if (GET_CODE (addr) == CONST)
13257 addr = XEXP (addr, 0);
13258 if (GET_CODE (addr) == PLUS)
13259 {
13260 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13261 UNSPEC_GOTOFF);
13262 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13263 }
13264 else
13265 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13266 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13267 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13268
13269 if (reg != 0)
13270 {
13271 emit_move_insn (reg, new_rtx);
13272 new_rtx = reg;
13273 }
13274 }
13275 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13276 /* We can't use @GOTOFF for text labels on VxWorks;
13277 see gotoff_operand. */
13278 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13279 {
13280 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13281 if (tmp)
13282 return tmp;
13283
13284 /* For x64 PE-COFF there is no GOT table. So we use address
13285 directly. */
13286 if (TARGET_64BIT && TARGET_PECOFF)
13287 {
13288 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13289 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13290
13291 if (reg == 0)
13292 reg = gen_reg_rtx (Pmode);
13293 emit_move_insn (reg, new_rtx);
13294 new_rtx = reg;
13295 }
13296 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13297 {
13298 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13299 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13300 new_rtx = gen_const_mem (Pmode, new_rtx);
13301 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13302
13303 if (reg == 0)
13304 reg = gen_reg_rtx (Pmode);
13305 /* Use directly gen_movsi, otherwise the address is loaded
13306 into register for CSE. We don't want to CSE this addresses,
13307 instead we CSE addresses from the GOT table, so skip this. */
13308 emit_insn (gen_movsi (reg, new_rtx));
13309 new_rtx = reg;
13310 }
13311 else
13312 {
13313 /* This symbol must be referenced via a load from the
13314 Global Offset Table (@GOT). */
13315
13316 set_pic_reg_ever_live ();
13317 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13318 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13319 if (TARGET_64BIT)
13320 new_rtx = force_reg (Pmode, new_rtx);
13321 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13322 new_rtx = gen_const_mem (Pmode, new_rtx);
13323 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13324
13325 if (reg == 0)
13326 reg = gen_reg_rtx (Pmode);
13327 emit_move_insn (reg, new_rtx);
13328 new_rtx = reg;
13329 }
13330 }
13331 else
13332 {
13333 if (CONST_INT_P (addr)
13334 && !x86_64_immediate_operand (addr, VOIDmode))
13335 {
13336 if (reg)
13337 {
13338 emit_move_insn (reg, addr);
13339 new_rtx = reg;
13340 }
13341 else
13342 new_rtx = force_reg (Pmode, addr);
13343 }
13344 else if (GET_CODE (addr) == CONST)
13345 {
13346 addr = XEXP (addr, 0);
13347
13348 /* We must match stuff we generate before. Assume the only
13349 unspecs that can get here are ours. Not that we could do
13350 anything with them anyway.... */
13351 if (GET_CODE (addr) == UNSPEC
13352 || (GET_CODE (addr) == PLUS
13353 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13354 return orig;
13355 gcc_assert (GET_CODE (addr) == PLUS);
13356 }
13357 if (GET_CODE (addr) == PLUS)
13358 {
13359 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13360
13361 /* Check first to see if this is a constant offset from a @GOTOFF
13362 symbol reference. */
13363 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13364 && CONST_INT_P (op1))
13365 {
13366 if (!TARGET_64BIT)
13367 {
13368 set_pic_reg_ever_live ();
13369 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13370 UNSPEC_GOTOFF);
13371 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13372 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13373 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13374
13375 if (reg != 0)
13376 {
13377 emit_move_insn (reg, new_rtx);
13378 new_rtx = reg;
13379 }
13380 }
13381 else
13382 {
13383 if (INTVAL (op1) < -16*1024*1024
13384 || INTVAL (op1) >= 16*1024*1024)
13385 {
13386 if (!x86_64_immediate_operand (op1, Pmode))
13387 op1 = force_reg (Pmode, op1);
13388 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13389 }
13390 }
13391 }
13392 else
13393 {
13394 rtx base = legitimize_pic_address (op0, reg);
13395 enum machine_mode mode = GET_MODE (base);
13396 new_rtx
13397 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13398
13399 if (CONST_INT_P (new_rtx))
13400 {
13401 if (INTVAL (new_rtx) < -16*1024*1024
13402 || INTVAL (new_rtx) >= 16*1024*1024)
13403 {
13404 if (!x86_64_immediate_operand (new_rtx, mode))
13405 new_rtx = force_reg (mode, new_rtx);
13406 new_rtx
13407 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13408 }
13409 else
13410 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13411 }
13412 else
13413 {
13414 if (GET_CODE (new_rtx) == PLUS
13415 && CONSTANT_P (XEXP (new_rtx, 1)))
13416 {
13417 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13418 new_rtx = XEXP (new_rtx, 1);
13419 }
13420 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13421 }
13422 }
13423 }
13424 }
13425 return new_rtx;
13426 }
13427 \f
13428 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13429
13430 static rtx
13431 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13432 {
13433 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13434
13435 if (GET_MODE (tp) != tp_mode)
13436 {
13437 gcc_assert (GET_MODE (tp) == SImode);
13438 gcc_assert (tp_mode == DImode);
13439
13440 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13441 }
13442
13443 if (to_reg)
13444 tp = copy_to_mode_reg (tp_mode, tp);
13445
13446 return tp;
13447 }
13448
13449 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13450
13451 static GTY(()) rtx ix86_tls_symbol;
13452
13453 static rtx
13454 ix86_tls_get_addr (void)
13455 {
13456 if (!ix86_tls_symbol)
13457 {
13458 const char *sym
13459 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13460 ? "___tls_get_addr" : "__tls_get_addr");
13461
13462 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13463 }
13464
13465 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13466 {
13467 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13468 UNSPEC_PLTOFF);
13469 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13470 gen_rtx_CONST (Pmode, unspec));
13471 }
13472
13473 return ix86_tls_symbol;
13474 }
13475
13476 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13477
13478 static GTY(()) rtx ix86_tls_module_base_symbol;
13479
13480 rtx
13481 ix86_tls_module_base (void)
13482 {
13483 if (!ix86_tls_module_base_symbol)
13484 {
13485 ix86_tls_module_base_symbol
13486 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13487
13488 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13489 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13490 }
13491
13492 return ix86_tls_module_base_symbol;
13493 }
13494
13495 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13496 false if we expect this to be used for a memory address and true if
13497 we expect to load the address into a register. */
13498
13499 static rtx
13500 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13501 {
13502 rtx dest, base, off;
13503 rtx pic = NULL_RTX, tp = NULL_RTX;
13504 enum machine_mode tp_mode = Pmode;
13505 int type;
13506
13507 /* Fall back to global dynamic model if tool chain cannot support local
13508 dynamic. */
13509 if (TARGET_SUN_TLS && !TARGET_64BIT
13510 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13511 && model == TLS_MODEL_LOCAL_DYNAMIC)
13512 model = TLS_MODEL_GLOBAL_DYNAMIC;
13513
13514 switch (model)
13515 {
13516 case TLS_MODEL_GLOBAL_DYNAMIC:
13517 dest = gen_reg_rtx (Pmode);
13518
13519 if (!TARGET_64BIT)
13520 {
13521 if (flag_pic && !TARGET_PECOFF)
13522 pic = pic_offset_table_rtx;
13523 else
13524 {
13525 pic = gen_reg_rtx (Pmode);
13526 emit_insn (gen_set_got (pic));
13527 }
13528 }
13529
13530 if (TARGET_GNU2_TLS)
13531 {
13532 if (TARGET_64BIT)
13533 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13534 else
13535 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13536
13537 tp = get_thread_pointer (Pmode, true);
13538 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13539
13540 if (GET_MODE (x) != Pmode)
13541 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13542
13543 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13544 }
13545 else
13546 {
13547 rtx caddr = ix86_tls_get_addr ();
13548
13549 if (TARGET_64BIT)
13550 {
13551 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13552 rtx_insn *insns;
13553
13554 start_sequence ();
13555 emit_call_insn
13556 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13557 insns = get_insns ();
13558 end_sequence ();
13559
13560 if (GET_MODE (x) != Pmode)
13561 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13562
13563 RTL_CONST_CALL_P (insns) = 1;
13564 emit_libcall_block (insns, dest, rax, x);
13565 }
13566 else
13567 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13568 }
13569 break;
13570
13571 case TLS_MODEL_LOCAL_DYNAMIC:
13572 base = gen_reg_rtx (Pmode);
13573
13574 if (!TARGET_64BIT)
13575 {
13576 if (flag_pic)
13577 pic = pic_offset_table_rtx;
13578 else
13579 {
13580 pic = gen_reg_rtx (Pmode);
13581 emit_insn (gen_set_got (pic));
13582 }
13583 }
13584
13585 if (TARGET_GNU2_TLS)
13586 {
13587 rtx tmp = ix86_tls_module_base ();
13588
13589 if (TARGET_64BIT)
13590 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13591 else
13592 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13593
13594 tp = get_thread_pointer (Pmode, true);
13595 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13596 gen_rtx_MINUS (Pmode, tmp, tp));
13597 }
13598 else
13599 {
13600 rtx caddr = ix86_tls_get_addr ();
13601
13602 if (TARGET_64BIT)
13603 {
13604 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13605 rtx_insn *insns;
13606 rtx eqv;
13607
13608 start_sequence ();
13609 emit_call_insn
13610 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13611 insns = get_insns ();
13612 end_sequence ();
13613
13614 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13615 share the LD_BASE result with other LD model accesses. */
13616 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13617 UNSPEC_TLS_LD_BASE);
13618
13619 RTL_CONST_CALL_P (insns) = 1;
13620 emit_libcall_block (insns, base, rax, eqv);
13621 }
13622 else
13623 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13624 }
13625
13626 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13627 off = gen_rtx_CONST (Pmode, off);
13628
13629 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13630
13631 if (TARGET_GNU2_TLS)
13632 {
13633 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13634
13635 if (GET_MODE (x) != Pmode)
13636 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13637
13638 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13639 }
13640 break;
13641
13642 case TLS_MODEL_INITIAL_EXEC:
13643 if (TARGET_64BIT)
13644 {
13645 if (TARGET_SUN_TLS && !TARGET_X32)
13646 {
13647 /* The Sun linker took the AMD64 TLS spec literally
13648 and can only handle %rax as destination of the
13649 initial executable code sequence. */
13650
13651 dest = gen_reg_rtx (DImode);
13652 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13653 return dest;
13654 }
13655
13656 /* Generate DImode references to avoid %fs:(%reg32)
13657 problems and linker IE->LE relaxation bug. */
13658 tp_mode = DImode;
13659 pic = NULL;
13660 type = UNSPEC_GOTNTPOFF;
13661 }
13662 else if (flag_pic)
13663 {
13664 set_pic_reg_ever_live ();
13665 pic = pic_offset_table_rtx;
13666 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13667 }
13668 else if (!TARGET_ANY_GNU_TLS)
13669 {
13670 pic = gen_reg_rtx (Pmode);
13671 emit_insn (gen_set_got (pic));
13672 type = UNSPEC_GOTTPOFF;
13673 }
13674 else
13675 {
13676 pic = NULL;
13677 type = UNSPEC_INDNTPOFF;
13678 }
13679
13680 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13681 off = gen_rtx_CONST (tp_mode, off);
13682 if (pic)
13683 off = gen_rtx_PLUS (tp_mode, pic, off);
13684 off = gen_const_mem (tp_mode, off);
13685 set_mem_alias_set (off, ix86_GOT_alias_set ());
13686
13687 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13688 {
13689 base = get_thread_pointer (tp_mode,
13690 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13691 off = force_reg (tp_mode, off);
13692 return gen_rtx_PLUS (tp_mode, base, off);
13693 }
13694 else
13695 {
13696 base = get_thread_pointer (Pmode, true);
13697 dest = gen_reg_rtx (Pmode);
13698 emit_insn (ix86_gen_sub3 (dest, base, off));
13699 }
13700 break;
13701
13702 case TLS_MODEL_LOCAL_EXEC:
13703 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13704 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13705 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13706 off = gen_rtx_CONST (Pmode, off);
13707
13708 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13709 {
13710 base = get_thread_pointer (Pmode,
13711 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13712 return gen_rtx_PLUS (Pmode, base, off);
13713 }
13714 else
13715 {
13716 base = get_thread_pointer (Pmode, true);
13717 dest = gen_reg_rtx (Pmode);
13718 emit_insn (ix86_gen_sub3 (dest, base, off));
13719 }
13720 break;
13721
13722 default:
13723 gcc_unreachable ();
13724 }
13725
13726 return dest;
13727 }
13728
13729 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13730 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13731 unique refptr-DECL symbol corresponding to symbol DECL. */
13732
13733 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13734 htab_t dllimport_map;
13735
13736 static tree
13737 get_dllimport_decl (tree decl, bool beimport)
13738 {
13739 struct tree_map *h, in;
13740 void **loc;
13741 const char *name;
13742 const char *prefix;
13743 size_t namelen, prefixlen;
13744 char *imp_name;
13745 tree to;
13746 rtx rtl;
13747
13748 if (!dllimport_map)
13749 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13750
13751 in.hash = htab_hash_pointer (decl);
13752 in.base.from = decl;
13753 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13754 h = (struct tree_map *) *loc;
13755 if (h)
13756 return h->to;
13757
13758 *loc = h = ggc_alloc<tree_map> ();
13759 h->hash = in.hash;
13760 h->base.from = decl;
13761 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13762 VAR_DECL, NULL, ptr_type_node);
13763 DECL_ARTIFICIAL (to) = 1;
13764 DECL_IGNORED_P (to) = 1;
13765 DECL_EXTERNAL (to) = 1;
13766 TREE_READONLY (to) = 1;
13767
13768 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13769 name = targetm.strip_name_encoding (name);
13770 if (beimport)
13771 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13772 ? "*__imp_" : "*__imp__";
13773 else
13774 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13775 namelen = strlen (name);
13776 prefixlen = strlen (prefix);
13777 imp_name = (char *) alloca (namelen + prefixlen + 1);
13778 memcpy (imp_name, prefix, prefixlen);
13779 memcpy (imp_name + prefixlen, name, namelen + 1);
13780
13781 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13782 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13783 SET_SYMBOL_REF_DECL (rtl, to);
13784 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13785 if (!beimport)
13786 {
13787 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13788 #ifdef SUB_TARGET_RECORD_STUB
13789 SUB_TARGET_RECORD_STUB (name);
13790 #endif
13791 }
13792
13793 rtl = gen_const_mem (Pmode, rtl);
13794 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13795
13796 SET_DECL_RTL (to, rtl);
13797 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13798
13799 return to;
13800 }
13801
13802 /* Expand SYMBOL into its corresponding far-addresse symbol.
13803 WANT_REG is true if we require the result be a register. */
13804
13805 static rtx
13806 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13807 {
13808 tree imp_decl;
13809 rtx x;
13810
13811 gcc_assert (SYMBOL_REF_DECL (symbol));
13812 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13813
13814 x = DECL_RTL (imp_decl);
13815 if (want_reg)
13816 x = force_reg (Pmode, x);
13817 return x;
13818 }
13819
13820 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13821 true if we require the result be a register. */
13822
13823 static rtx
13824 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13825 {
13826 tree imp_decl;
13827 rtx x;
13828
13829 gcc_assert (SYMBOL_REF_DECL (symbol));
13830 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13831
13832 x = DECL_RTL (imp_decl);
13833 if (want_reg)
13834 x = force_reg (Pmode, x);
13835 return x;
13836 }
13837
13838 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13839 is true if we require the result be a register. */
13840
13841 static rtx
13842 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13843 {
13844 if (!TARGET_PECOFF)
13845 return NULL_RTX;
13846
13847 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13848 {
13849 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13850 return legitimize_dllimport_symbol (addr, inreg);
13851 if (GET_CODE (addr) == CONST
13852 && GET_CODE (XEXP (addr, 0)) == PLUS
13853 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13854 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13855 {
13856 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13857 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13858 }
13859 }
13860
13861 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13862 return NULL_RTX;
13863 if (GET_CODE (addr) == SYMBOL_REF
13864 && !is_imported_p (addr)
13865 && SYMBOL_REF_EXTERNAL_P (addr)
13866 && SYMBOL_REF_DECL (addr))
13867 return legitimize_pe_coff_extern_decl (addr, inreg);
13868
13869 if (GET_CODE (addr) == CONST
13870 && GET_CODE (XEXP (addr, 0)) == PLUS
13871 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13872 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13873 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13874 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13875 {
13876 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13877 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13878 }
13879 return NULL_RTX;
13880 }
13881
13882 /* Try machine-dependent ways of modifying an illegitimate address
13883 to be legitimate. If we find one, return the new, valid address.
13884 This macro is used in only one place: `memory_address' in explow.c.
13885
13886 OLDX is the address as it was before break_out_memory_refs was called.
13887 In some cases it is useful to look at this to decide what needs to be done.
13888
13889 It is always safe for this macro to do nothing. It exists to recognize
13890 opportunities to optimize the output.
13891
13892 For the 80386, we handle X+REG by loading X into a register R and
13893 using R+REG. R will go in a general reg and indexing will be used.
13894 However, if REG is a broken-out memory address or multiplication,
13895 nothing needs to be done because REG can certainly go in a general reg.
13896
13897 When -fpic is used, special handling is needed for symbolic references.
13898 See comments by legitimize_pic_address in i386.c for details. */
13899
13900 static rtx
13901 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13902 {
13903 int changed = 0;
13904 unsigned log;
13905
13906 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13907 if (log)
13908 return legitimize_tls_address (x, (enum tls_model) log, false);
13909 if (GET_CODE (x) == CONST
13910 && GET_CODE (XEXP (x, 0)) == PLUS
13911 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13912 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13913 {
13914 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13915 (enum tls_model) log, false);
13916 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13917 }
13918
13919 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13920 {
13921 rtx tmp = legitimize_pe_coff_symbol (x, true);
13922 if (tmp)
13923 return tmp;
13924 }
13925
13926 if (flag_pic && SYMBOLIC_CONST (x))
13927 return legitimize_pic_address (x, 0);
13928
13929 #if TARGET_MACHO
13930 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13931 return machopic_indirect_data_reference (x, 0);
13932 #endif
13933
13934 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13935 if (GET_CODE (x) == ASHIFT
13936 && CONST_INT_P (XEXP (x, 1))
13937 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13938 {
13939 changed = 1;
13940 log = INTVAL (XEXP (x, 1));
13941 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13942 GEN_INT (1 << log));
13943 }
13944
13945 if (GET_CODE (x) == PLUS)
13946 {
13947 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13948
13949 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13950 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13951 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13952 {
13953 changed = 1;
13954 log = INTVAL (XEXP (XEXP (x, 0), 1));
13955 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13956 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13957 GEN_INT (1 << log));
13958 }
13959
13960 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13961 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13962 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13963 {
13964 changed = 1;
13965 log = INTVAL (XEXP (XEXP (x, 1), 1));
13966 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13967 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13968 GEN_INT (1 << log));
13969 }
13970
13971 /* Put multiply first if it isn't already. */
13972 if (GET_CODE (XEXP (x, 1)) == MULT)
13973 {
13974 rtx tmp = XEXP (x, 0);
13975 XEXP (x, 0) = XEXP (x, 1);
13976 XEXP (x, 1) = tmp;
13977 changed = 1;
13978 }
13979
13980 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13981 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13982 created by virtual register instantiation, register elimination, and
13983 similar optimizations. */
13984 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13985 {
13986 changed = 1;
13987 x = gen_rtx_PLUS (Pmode,
13988 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13989 XEXP (XEXP (x, 1), 0)),
13990 XEXP (XEXP (x, 1), 1));
13991 }
13992
13993 /* Canonicalize
13994 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13995 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13996 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13997 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13998 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13999 && CONSTANT_P (XEXP (x, 1)))
14000 {
14001 rtx constant;
14002 rtx other = NULL_RTX;
14003
14004 if (CONST_INT_P (XEXP (x, 1)))
14005 {
14006 constant = XEXP (x, 1);
14007 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14008 }
14009 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14010 {
14011 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14012 other = XEXP (x, 1);
14013 }
14014 else
14015 constant = 0;
14016
14017 if (constant)
14018 {
14019 changed = 1;
14020 x = gen_rtx_PLUS (Pmode,
14021 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14022 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14023 plus_constant (Pmode, other,
14024 INTVAL (constant)));
14025 }
14026 }
14027
14028 if (changed && ix86_legitimate_address_p (mode, x, false))
14029 return x;
14030
14031 if (GET_CODE (XEXP (x, 0)) == MULT)
14032 {
14033 changed = 1;
14034 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14035 }
14036
14037 if (GET_CODE (XEXP (x, 1)) == MULT)
14038 {
14039 changed = 1;
14040 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14041 }
14042
14043 if (changed
14044 && REG_P (XEXP (x, 1))
14045 && REG_P (XEXP (x, 0)))
14046 return x;
14047
14048 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14049 {
14050 changed = 1;
14051 x = legitimize_pic_address (x, 0);
14052 }
14053
14054 if (changed && ix86_legitimate_address_p (mode, x, false))
14055 return x;
14056
14057 if (REG_P (XEXP (x, 0)))
14058 {
14059 rtx temp = gen_reg_rtx (Pmode);
14060 rtx val = force_operand (XEXP (x, 1), temp);
14061 if (val != temp)
14062 {
14063 val = convert_to_mode (Pmode, val, 1);
14064 emit_move_insn (temp, val);
14065 }
14066
14067 XEXP (x, 1) = temp;
14068 return x;
14069 }
14070
14071 else if (REG_P (XEXP (x, 1)))
14072 {
14073 rtx temp = gen_reg_rtx (Pmode);
14074 rtx val = force_operand (XEXP (x, 0), temp);
14075 if (val != temp)
14076 {
14077 val = convert_to_mode (Pmode, val, 1);
14078 emit_move_insn (temp, val);
14079 }
14080
14081 XEXP (x, 0) = temp;
14082 return x;
14083 }
14084 }
14085
14086 return x;
14087 }
14088 \f
14089 /* Print an integer constant expression in assembler syntax. Addition
14090 and subtraction are the only arithmetic that may appear in these
14091 expressions. FILE is the stdio stream to write to, X is the rtx, and
14092 CODE is the operand print code from the output string. */
14093
14094 static void
14095 output_pic_addr_const (FILE *file, rtx x, int code)
14096 {
14097 char buf[256];
14098
14099 switch (GET_CODE (x))
14100 {
14101 case PC:
14102 gcc_assert (flag_pic);
14103 putc ('.', file);
14104 break;
14105
14106 case SYMBOL_REF:
14107 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14108 output_addr_const (file, x);
14109 else
14110 {
14111 const char *name = XSTR (x, 0);
14112
14113 /* Mark the decl as referenced so that cgraph will
14114 output the function. */
14115 if (SYMBOL_REF_DECL (x))
14116 mark_decl_referenced (SYMBOL_REF_DECL (x));
14117
14118 #if TARGET_MACHO
14119 if (MACHOPIC_INDIRECT
14120 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14121 name = machopic_indirection_name (x, /*stub_p=*/true);
14122 #endif
14123 assemble_name (file, name);
14124 }
14125 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14126 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14127 fputs ("@PLT", file);
14128 break;
14129
14130 case LABEL_REF:
14131 x = XEXP (x, 0);
14132 /* FALLTHRU */
14133 case CODE_LABEL:
14134 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14135 assemble_name (asm_out_file, buf);
14136 break;
14137
14138 case CONST_INT:
14139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14140 break;
14141
14142 case CONST:
14143 /* This used to output parentheses around the expression,
14144 but that does not work on the 386 (either ATT or BSD assembler). */
14145 output_pic_addr_const (file, XEXP (x, 0), code);
14146 break;
14147
14148 case CONST_DOUBLE:
14149 if (GET_MODE (x) == VOIDmode)
14150 {
14151 /* We can use %d if the number is <32 bits and positive. */
14152 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14153 fprintf (file, "0x%lx%08lx",
14154 (unsigned long) CONST_DOUBLE_HIGH (x),
14155 (unsigned long) CONST_DOUBLE_LOW (x));
14156 else
14157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14158 }
14159 else
14160 /* We can't handle floating point constants;
14161 TARGET_PRINT_OPERAND must handle them. */
14162 output_operand_lossage ("floating constant misused");
14163 break;
14164
14165 case PLUS:
14166 /* Some assemblers need integer constants to appear first. */
14167 if (CONST_INT_P (XEXP (x, 0)))
14168 {
14169 output_pic_addr_const (file, XEXP (x, 0), code);
14170 putc ('+', file);
14171 output_pic_addr_const (file, XEXP (x, 1), code);
14172 }
14173 else
14174 {
14175 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14176 output_pic_addr_const (file, XEXP (x, 1), code);
14177 putc ('+', file);
14178 output_pic_addr_const (file, XEXP (x, 0), code);
14179 }
14180 break;
14181
14182 case MINUS:
14183 if (!TARGET_MACHO)
14184 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14185 output_pic_addr_const (file, XEXP (x, 0), code);
14186 putc ('-', file);
14187 output_pic_addr_const (file, XEXP (x, 1), code);
14188 if (!TARGET_MACHO)
14189 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14190 break;
14191
14192 case UNSPEC:
14193 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14194 {
14195 bool f = i386_asm_output_addr_const_extra (file, x);
14196 gcc_assert (f);
14197 break;
14198 }
14199
14200 gcc_assert (XVECLEN (x, 0) == 1);
14201 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14202 switch (XINT (x, 1))
14203 {
14204 case UNSPEC_GOT:
14205 fputs ("@GOT", file);
14206 break;
14207 case UNSPEC_GOTOFF:
14208 fputs ("@GOTOFF", file);
14209 break;
14210 case UNSPEC_PLTOFF:
14211 fputs ("@PLTOFF", file);
14212 break;
14213 case UNSPEC_PCREL:
14214 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14215 "(%rip)" : "[rip]", file);
14216 break;
14217 case UNSPEC_GOTPCREL:
14218 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14219 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14220 break;
14221 case UNSPEC_GOTTPOFF:
14222 /* FIXME: This might be @TPOFF in Sun ld too. */
14223 fputs ("@gottpoff", file);
14224 break;
14225 case UNSPEC_TPOFF:
14226 fputs ("@tpoff", file);
14227 break;
14228 case UNSPEC_NTPOFF:
14229 if (TARGET_64BIT)
14230 fputs ("@tpoff", file);
14231 else
14232 fputs ("@ntpoff", file);
14233 break;
14234 case UNSPEC_DTPOFF:
14235 fputs ("@dtpoff", file);
14236 break;
14237 case UNSPEC_GOTNTPOFF:
14238 if (TARGET_64BIT)
14239 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14240 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14241 else
14242 fputs ("@gotntpoff", file);
14243 break;
14244 case UNSPEC_INDNTPOFF:
14245 fputs ("@indntpoff", file);
14246 break;
14247 #if TARGET_MACHO
14248 case UNSPEC_MACHOPIC_OFFSET:
14249 putc ('-', file);
14250 machopic_output_function_base_name (file);
14251 break;
14252 #endif
14253 default:
14254 output_operand_lossage ("invalid UNSPEC as operand");
14255 break;
14256 }
14257 break;
14258
14259 default:
14260 output_operand_lossage ("invalid expression as operand");
14261 }
14262 }
14263
14264 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14265 We need to emit DTP-relative relocations. */
14266
14267 static void ATTRIBUTE_UNUSED
14268 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14269 {
14270 fputs (ASM_LONG, file);
14271 output_addr_const (file, x);
14272 fputs ("@dtpoff", file);
14273 switch (size)
14274 {
14275 case 4:
14276 break;
14277 case 8:
14278 fputs (", 0", file);
14279 break;
14280 default:
14281 gcc_unreachable ();
14282 }
14283 }
14284
14285 /* Return true if X is a representation of the PIC register. This copes
14286 with calls from ix86_find_base_term, where the register might have
14287 been replaced by a cselib value. */
14288
14289 static bool
14290 ix86_pic_register_p (rtx x)
14291 {
14292 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14293 return (pic_offset_table_rtx
14294 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14295 else if (!REG_P (x))
14296 return false;
14297 else if (pic_offset_table_rtx)
14298 {
14299 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14300 return true;
14301 if (HARD_REGISTER_P (x)
14302 && !HARD_REGISTER_P (pic_offset_table_rtx)
14303 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14304 return true;
14305 return false;
14306 }
14307 else
14308 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14309 }
14310
14311 /* Helper function for ix86_delegitimize_address.
14312 Attempt to delegitimize TLS local-exec accesses. */
14313
14314 static rtx
14315 ix86_delegitimize_tls_address (rtx orig_x)
14316 {
14317 rtx x = orig_x, unspec;
14318 struct ix86_address addr;
14319
14320 if (!TARGET_TLS_DIRECT_SEG_REFS)
14321 return orig_x;
14322 if (MEM_P (x))
14323 x = XEXP (x, 0);
14324 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14325 return orig_x;
14326 if (ix86_decompose_address (x, &addr) == 0
14327 || addr.seg != DEFAULT_TLS_SEG_REG
14328 || addr.disp == NULL_RTX
14329 || GET_CODE (addr.disp) != CONST)
14330 return orig_x;
14331 unspec = XEXP (addr.disp, 0);
14332 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14333 unspec = XEXP (unspec, 0);
14334 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14335 return orig_x;
14336 x = XVECEXP (unspec, 0, 0);
14337 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14338 if (unspec != XEXP (addr.disp, 0))
14339 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14340 if (addr.index)
14341 {
14342 rtx idx = addr.index;
14343 if (addr.scale != 1)
14344 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14345 x = gen_rtx_PLUS (Pmode, idx, x);
14346 }
14347 if (addr.base)
14348 x = gen_rtx_PLUS (Pmode, addr.base, x);
14349 if (MEM_P (orig_x))
14350 x = replace_equiv_address_nv (orig_x, x);
14351 return x;
14352 }
14353
14354 /* In the name of slightly smaller debug output, and to cater to
14355 general assembler lossage, recognize PIC+GOTOFF and turn it back
14356 into a direct symbol reference.
14357
14358 On Darwin, this is necessary to avoid a crash, because Darwin
14359 has a different PIC label for each routine but the DWARF debugging
14360 information is not associated with any particular routine, so it's
14361 necessary to remove references to the PIC label from RTL stored by
14362 the DWARF output code. */
14363
14364 static rtx
14365 ix86_delegitimize_address (rtx x)
14366 {
14367 rtx orig_x = delegitimize_mem_from_attrs (x);
14368 /* addend is NULL or some rtx if x is something+GOTOFF where
14369 something doesn't include the PIC register. */
14370 rtx addend = NULL_RTX;
14371 /* reg_addend is NULL or a multiple of some register. */
14372 rtx reg_addend = NULL_RTX;
14373 /* const_addend is NULL or a const_int. */
14374 rtx const_addend = NULL_RTX;
14375 /* This is the result, or NULL. */
14376 rtx result = NULL_RTX;
14377
14378 x = orig_x;
14379
14380 if (MEM_P (x))
14381 x = XEXP (x, 0);
14382
14383 if (TARGET_64BIT)
14384 {
14385 if (GET_CODE (x) == CONST
14386 && GET_CODE (XEXP (x, 0)) == PLUS
14387 && GET_MODE (XEXP (x, 0)) == Pmode
14388 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14389 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14390 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14391 {
14392 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14393 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14394 if (MEM_P (orig_x))
14395 x = replace_equiv_address_nv (orig_x, x);
14396 return x;
14397 }
14398
14399 if (GET_CODE (x) == CONST
14400 && GET_CODE (XEXP (x, 0)) == UNSPEC
14401 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14402 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14403 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14404 {
14405 x = XVECEXP (XEXP (x, 0), 0, 0);
14406 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14407 {
14408 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14409 GET_MODE (x), 0);
14410 if (x == NULL_RTX)
14411 return orig_x;
14412 }
14413 return x;
14414 }
14415
14416 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14417 return ix86_delegitimize_tls_address (orig_x);
14418
14419 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14420 and -mcmodel=medium -fpic. */
14421 }
14422
14423 if (GET_CODE (x) != PLUS
14424 || GET_CODE (XEXP (x, 1)) != CONST)
14425 return ix86_delegitimize_tls_address (orig_x);
14426
14427 if (ix86_pic_register_p (XEXP (x, 0)))
14428 /* %ebx + GOT/GOTOFF */
14429 ;
14430 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14431 {
14432 /* %ebx + %reg * scale + GOT/GOTOFF */
14433 reg_addend = XEXP (x, 0);
14434 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14435 reg_addend = XEXP (reg_addend, 1);
14436 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14437 reg_addend = XEXP (reg_addend, 0);
14438 else
14439 {
14440 reg_addend = NULL_RTX;
14441 addend = XEXP (x, 0);
14442 }
14443 }
14444 else
14445 addend = XEXP (x, 0);
14446
14447 x = XEXP (XEXP (x, 1), 0);
14448 if (GET_CODE (x) == PLUS
14449 && CONST_INT_P (XEXP (x, 1)))
14450 {
14451 const_addend = XEXP (x, 1);
14452 x = XEXP (x, 0);
14453 }
14454
14455 if (GET_CODE (x) == UNSPEC
14456 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14457 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14458 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14459 && !MEM_P (orig_x) && !addend)))
14460 result = XVECEXP (x, 0, 0);
14461
14462 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14463 && !MEM_P (orig_x))
14464 result = XVECEXP (x, 0, 0);
14465
14466 if (! result)
14467 return ix86_delegitimize_tls_address (orig_x);
14468
14469 if (const_addend)
14470 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14471 if (reg_addend)
14472 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14473 if (addend)
14474 {
14475 /* If the rest of original X doesn't involve the PIC register, add
14476 addend and subtract pic_offset_table_rtx. This can happen e.g.
14477 for code like:
14478 leal (%ebx, %ecx, 4), %ecx
14479 ...
14480 movl foo@GOTOFF(%ecx), %edx
14481 in which case we return (%ecx - %ebx) + foo
14482 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14483 and reload has completed. */
14484 if (pic_offset_table_rtx
14485 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14486 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14487 pic_offset_table_rtx),
14488 result);
14489 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14490 {
14491 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14492 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14493 result = gen_rtx_PLUS (Pmode, tmp, result);
14494 }
14495 else
14496 return orig_x;
14497 }
14498 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14499 {
14500 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14501 if (result == NULL_RTX)
14502 return orig_x;
14503 }
14504 return result;
14505 }
14506
14507 /* If X is a machine specific address (i.e. a symbol or label being
14508 referenced as a displacement from the GOT implemented using an
14509 UNSPEC), then return the base term. Otherwise return X. */
14510
14511 rtx
14512 ix86_find_base_term (rtx x)
14513 {
14514 rtx term;
14515
14516 if (TARGET_64BIT)
14517 {
14518 if (GET_CODE (x) != CONST)
14519 return x;
14520 term = XEXP (x, 0);
14521 if (GET_CODE (term) == PLUS
14522 && (CONST_INT_P (XEXP (term, 1))
14523 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14524 term = XEXP (term, 0);
14525 if (GET_CODE (term) != UNSPEC
14526 || (XINT (term, 1) != UNSPEC_GOTPCREL
14527 && XINT (term, 1) != UNSPEC_PCREL))
14528 return x;
14529
14530 return XVECEXP (term, 0, 0);
14531 }
14532
14533 return ix86_delegitimize_address (x);
14534 }
14535 \f
14536 static void
14537 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14538 bool fp, FILE *file)
14539 {
14540 const char *suffix;
14541
14542 if (mode == CCFPmode || mode == CCFPUmode)
14543 {
14544 code = ix86_fp_compare_code_to_integer (code);
14545 mode = CCmode;
14546 }
14547 if (reverse)
14548 code = reverse_condition (code);
14549
14550 switch (code)
14551 {
14552 case EQ:
14553 switch (mode)
14554 {
14555 case CCAmode:
14556 suffix = "a";
14557 break;
14558
14559 case CCCmode:
14560 suffix = "c";
14561 break;
14562
14563 case CCOmode:
14564 suffix = "o";
14565 break;
14566
14567 case CCSmode:
14568 suffix = "s";
14569 break;
14570
14571 default:
14572 suffix = "e";
14573 }
14574 break;
14575 case NE:
14576 switch (mode)
14577 {
14578 case CCAmode:
14579 suffix = "na";
14580 break;
14581
14582 case CCCmode:
14583 suffix = "nc";
14584 break;
14585
14586 case CCOmode:
14587 suffix = "no";
14588 break;
14589
14590 case CCSmode:
14591 suffix = "ns";
14592 break;
14593
14594 default:
14595 suffix = "ne";
14596 }
14597 break;
14598 case GT:
14599 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14600 suffix = "g";
14601 break;
14602 case GTU:
14603 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14604 Those same assemblers have the same but opposite lossage on cmov. */
14605 if (mode == CCmode)
14606 suffix = fp ? "nbe" : "a";
14607 else
14608 gcc_unreachable ();
14609 break;
14610 case LT:
14611 switch (mode)
14612 {
14613 case CCNOmode:
14614 case CCGOCmode:
14615 suffix = "s";
14616 break;
14617
14618 case CCmode:
14619 case CCGCmode:
14620 suffix = "l";
14621 break;
14622
14623 default:
14624 gcc_unreachable ();
14625 }
14626 break;
14627 case LTU:
14628 if (mode == CCmode)
14629 suffix = "b";
14630 else if (mode == CCCmode)
14631 suffix = "c";
14632 else
14633 gcc_unreachable ();
14634 break;
14635 case GE:
14636 switch (mode)
14637 {
14638 case CCNOmode:
14639 case CCGOCmode:
14640 suffix = "ns";
14641 break;
14642
14643 case CCmode:
14644 case CCGCmode:
14645 suffix = "ge";
14646 break;
14647
14648 default:
14649 gcc_unreachable ();
14650 }
14651 break;
14652 case GEU:
14653 if (mode == CCmode)
14654 suffix = fp ? "nb" : "ae";
14655 else if (mode == CCCmode)
14656 suffix = "nc";
14657 else
14658 gcc_unreachable ();
14659 break;
14660 case LE:
14661 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14662 suffix = "le";
14663 break;
14664 case LEU:
14665 if (mode == CCmode)
14666 suffix = "be";
14667 else
14668 gcc_unreachable ();
14669 break;
14670 case UNORDERED:
14671 suffix = fp ? "u" : "p";
14672 break;
14673 case ORDERED:
14674 suffix = fp ? "nu" : "np";
14675 break;
14676 default:
14677 gcc_unreachable ();
14678 }
14679 fputs (suffix, file);
14680 }
14681
14682 /* Print the name of register X to FILE based on its machine mode and number.
14683 If CODE is 'w', pretend the mode is HImode.
14684 If CODE is 'b', pretend the mode is QImode.
14685 If CODE is 'k', pretend the mode is SImode.
14686 If CODE is 'q', pretend the mode is DImode.
14687 If CODE is 'x', pretend the mode is V4SFmode.
14688 If CODE is 't', pretend the mode is V8SFmode.
14689 If CODE is 'g', pretend the mode is V16SFmode.
14690 If CODE is 'h', pretend the reg is the 'high' byte register.
14691 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14692 If CODE is 'd', duplicate the operand for AVX instruction.
14693 */
14694
14695 void
14696 print_reg (rtx x, int code, FILE *file)
14697 {
14698 const char *reg;
14699 unsigned int regno;
14700 bool duplicated = code == 'd' && TARGET_AVX;
14701
14702 if (ASSEMBLER_DIALECT == ASM_ATT)
14703 putc ('%', file);
14704
14705 if (x == pc_rtx)
14706 {
14707 gcc_assert (TARGET_64BIT);
14708 fputs ("rip", file);
14709 return;
14710 }
14711
14712 regno = true_regnum (x);
14713 gcc_assert (regno != ARG_POINTER_REGNUM
14714 && regno != FRAME_POINTER_REGNUM
14715 && regno != FLAGS_REG
14716 && regno != FPSR_REG
14717 && regno != FPCR_REG);
14718
14719 if (code == 'w' || MMX_REG_P (x))
14720 code = 2;
14721 else if (code == 'b')
14722 code = 1;
14723 else if (code == 'k')
14724 code = 4;
14725 else if (code == 'q')
14726 code = 8;
14727 else if (code == 'y')
14728 code = 3;
14729 else if (code == 'h')
14730 code = 0;
14731 else if (code == 'x')
14732 code = 16;
14733 else if (code == 't')
14734 code = 32;
14735 else if (code == 'g')
14736 code = 64;
14737 else
14738 code = GET_MODE_SIZE (GET_MODE (x));
14739
14740 /* Irritatingly, AMD extended registers use different naming convention
14741 from the normal registers: "r%d[bwd]" */
14742 if (REX_INT_REGNO_P (regno))
14743 {
14744 gcc_assert (TARGET_64BIT);
14745 putc ('r', file);
14746 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14747 switch (code)
14748 {
14749 case 0:
14750 error ("extended registers have no high halves");
14751 break;
14752 case 1:
14753 putc ('b', file);
14754 break;
14755 case 2:
14756 putc ('w', file);
14757 break;
14758 case 4:
14759 putc ('d', file);
14760 break;
14761 case 8:
14762 /* no suffix */
14763 break;
14764 default:
14765 error ("unsupported operand size for extended register");
14766 break;
14767 }
14768 return;
14769 }
14770
14771 reg = NULL;
14772 switch (code)
14773 {
14774 case 3:
14775 if (STACK_TOP_P (x))
14776 {
14777 reg = "st(0)";
14778 break;
14779 }
14780 /* FALLTHRU */
14781 case 8:
14782 case 4:
14783 case 12:
14784 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14785 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14786 /* FALLTHRU */
14787 case 16:
14788 case 2:
14789 normal:
14790 reg = hi_reg_name[regno];
14791 break;
14792 case 1:
14793 if (regno >= ARRAY_SIZE (qi_reg_name))
14794 goto normal;
14795 reg = qi_reg_name[regno];
14796 break;
14797 case 0:
14798 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14799 goto normal;
14800 reg = qi_high_reg_name[regno];
14801 break;
14802 case 32:
14803 if (SSE_REG_P (x))
14804 {
14805 gcc_assert (!duplicated);
14806 putc ('y', file);
14807 fputs (hi_reg_name[regno] + 1, file);
14808 return;
14809 }
14810 case 64:
14811 if (SSE_REG_P (x))
14812 {
14813 gcc_assert (!duplicated);
14814 putc ('z', file);
14815 fputs (hi_reg_name[REGNO (x)] + 1, file);
14816 return;
14817 }
14818 break;
14819 default:
14820 gcc_unreachable ();
14821 }
14822
14823 fputs (reg, file);
14824 if (duplicated)
14825 {
14826 if (ASSEMBLER_DIALECT == ASM_ATT)
14827 fprintf (file, ", %%%s", reg);
14828 else
14829 fprintf (file, ", %s", reg);
14830 }
14831 }
14832
14833 /* Meaning of CODE:
14834 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14835 C -- print opcode suffix for set/cmov insn.
14836 c -- like C, but print reversed condition
14837 F,f -- likewise, but for floating-point.
14838 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14839 otherwise nothing
14840 R -- print embeded rounding and sae.
14841 r -- print only sae.
14842 z -- print the opcode suffix for the size of the current operand.
14843 Z -- likewise, with special suffixes for x87 instructions.
14844 * -- print a star (in certain assembler syntax)
14845 A -- print an absolute memory reference.
14846 E -- print address with DImode register names if TARGET_64BIT.
14847 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14848 s -- print a shift double count, followed by the assemblers argument
14849 delimiter.
14850 b -- print the QImode name of the register for the indicated operand.
14851 %b0 would print %al if operands[0] is reg 0.
14852 w -- likewise, print the HImode name of the register.
14853 k -- likewise, print the SImode name of the register.
14854 q -- likewise, print the DImode name of the register.
14855 x -- likewise, print the V4SFmode name of the register.
14856 t -- likewise, print the V8SFmode name of the register.
14857 g -- likewise, print the V16SFmode name of the register.
14858 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14859 y -- print "st(0)" instead of "st" as a register.
14860 d -- print duplicated register operand for AVX instruction.
14861 D -- print condition for SSE cmp instruction.
14862 P -- if PIC, print an @PLT suffix.
14863 p -- print raw symbol name.
14864 X -- don't print any sort of PIC '@' suffix for a symbol.
14865 & -- print some in-use local-dynamic symbol name.
14866 H -- print a memory address offset by 8; used for sse high-parts
14867 Y -- print condition for XOP pcom* instruction.
14868 + -- print a branch hint as 'cs' or 'ds' prefix
14869 ; -- print a semicolon (after prefixes due to bug in older gas).
14870 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14871 @ -- print a segment register of thread base pointer load
14872 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14873 */
14874
14875 void
14876 ix86_print_operand (FILE *file, rtx x, int code)
14877 {
14878 if (code)
14879 {
14880 switch (code)
14881 {
14882 case 'A':
14883 switch (ASSEMBLER_DIALECT)
14884 {
14885 case ASM_ATT:
14886 putc ('*', file);
14887 break;
14888
14889 case ASM_INTEL:
14890 /* Intel syntax. For absolute addresses, registers should not
14891 be surrounded by braces. */
14892 if (!REG_P (x))
14893 {
14894 putc ('[', file);
14895 ix86_print_operand (file, x, 0);
14896 putc (']', file);
14897 return;
14898 }
14899 break;
14900
14901 default:
14902 gcc_unreachable ();
14903 }
14904
14905 ix86_print_operand (file, x, 0);
14906 return;
14907
14908 case 'E':
14909 /* Wrap address in an UNSPEC to declare special handling. */
14910 if (TARGET_64BIT)
14911 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14912
14913 output_address (x);
14914 return;
14915
14916 case 'L':
14917 if (ASSEMBLER_DIALECT == ASM_ATT)
14918 putc ('l', file);
14919 return;
14920
14921 case 'W':
14922 if (ASSEMBLER_DIALECT == ASM_ATT)
14923 putc ('w', file);
14924 return;
14925
14926 case 'B':
14927 if (ASSEMBLER_DIALECT == ASM_ATT)
14928 putc ('b', file);
14929 return;
14930
14931 case 'Q':
14932 if (ASSEMBLER_DIALECT == ASM_ATT)
14933 putc ('l', file);
14934 return;
14935
14936 case 'S':
14937 if (ASSEMBLER_DIALECT == ASM_ATT)
14938 putc ('s', file);
14939 return;
14940
14941 case 'T':
14942 if (ASSEMBLER_DIALECT == ASM_ATT)
14943 putc ('t', file);
14944 return;
14945
14946 case 'O':
14947 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14948 if (ASSEMBLER_DIALECT != ASM_ATT)
14949 return;
14950
14951 switch (GET_MODE_SIZE (GET_MODE (x)))
14952 {
14953 case 2:
14954 putc ('w', file);
14955 break;
14956
14957 case 4:
14958 putc ('l', file);
14959 break;
14960
14961 case 8:
14962 putc ('q', file);
14963 break;
14964
14965 default:
14966 output_operand_lossage
14967 ("invalid operand size for operand code 'O'");
14968 return;
14969 }
14970
14971 putc ('.', file);
14972 #endif
14973 return;
14974
14975 case 'z':
14976 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14977 {
14978 /* Opcodes don't get size suffixes if using Intel opcodes. */
14979 if (ASSEMBLER_DIALECT == ASM_INTEL)
14980 return;
14981
14982 switch (GET_MODE_SIZE (GET_MODE (x)))
14983 {
14984 case 1:
14985 putc ('b', file);
14986 return;
14987
14988 case 2:
14989 putc ('w', file);
14990 return;
14991
14992 case 4:
14993 putc ('l', file);
14994 return;
14995
14996 case 8:
14997 putc ('q', file);
14998 return;
14999
15000 default:
15001 output_operand_lossage
15002 ("invalid operand size for operand code 'z'");
15003 return;
15004 }
15005 }
15006
15007 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15008 warning
15009 (0, "non-integer operand used with operand code 'z'");
15010 /* FALLTHRU */
15011
15012 case 'Z':
15013 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15014 if (ASSEMBLER_DIALECT == ASM_INTEL)
15015 return;
15016
15017 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15018 {
15019 switch (GET_MODE_SIZE (GET_MODE (x)))
15020 {
15021 case 2:
15022 #ifdef HAVE_AS_IX86_FILDS
15023 putc ('s', file);
15024 #endif
15025 return;
15026
15027 case 4:
15028 putc ('l', file);
15029 return;
15030
15031 case 8:
15032 #ifdef HAVE_AS_IX86_FILDQ
15033 putc ('q', file);
15034 #else
15035 fputs ("ll", file);
15036 #endif
15037 return;
15038
15039 default:
15040 break;
15041 }
15042 }
15043 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15044 {
15045 /* 387 opcodes don't get size suffixes
15046 if the operands are registers. */
15047 if (STACK_REG_P (x))
15048 return;
15049
15050 switch (GET_MODE_SIZE (GET_MODE (x)))
15051 {
15052 case 4:
15053 putc ('s', file);
15054 return;
15055
15056 case 8:
15057 putc ('l', file);
15058 return;
15059
15060 case 12:
15061 case 16:
15062 putc ('t', file);
15063 return;
15064
15065 default:
15066 break;
15067 }
15068 }
15069 else
15070 {
15071 output_operand_lossage
15072 ("invalid operand type used with operand code 'Z'");
15073 return;
15074 }
15075
15076 output_operand_lossage
15077 ("invalid operand size for operand code 'Z'");
15078 return;
15079
15080 case 'd':
15081 case 'b':
15082 case 'w':
15083 case 'k':
15084 case 'q':
15085 case 'h':
15086 case 't':
15087 case 'g':
15088 case 'y':
15089 case 'x':
15090 case 'X':
15091 case 'P':
15092 case 'p':
15093 break;
15094
15095 case 's':
15096 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15097 {
15098 ix86_print_operand (file, x, 0);
15099 fputs (", ", file);
15100 }
15101 return;
15102
15103 case 'Y':
15104 switch (GET_CODE (x))
15105 {
15106 case NE:
15107 fputs ("neq", file);
15108 break;
15109 case EQ:
15110 fputs ("eq", file);
15111 break;
15112 case GE:
15113 case GEU:
15114 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15115 break;
15116 case GT:
15117 case GTU:
15118 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15119 break;
15120 case LE:
15121 case LEU:
15122 fputs ("le", file);
15123 break;
15124 case LT:
15125 case LTU:
15126 fputs ("lt", file);
15127 break;
15128 case UNORDERED:
15129 fputs ("unord", file);
15130 break;
15131 case ORDERED:
15132 fputs ("ord", file);
15133 break;
15134 case UNEQ:
15135 fputs ("ueq", file);
15136 break;
15137 case UNGE:
15138 fputs ("nlt", file);
15139 break;
15140 case UNGT:
15141 fputs ("nle", file);
15142 break;
15143 case UNLE:
15144 fputs ("ule", file);
15145 break;
15146 case UNLT:
15147 fputs ("ult", file);
15148 break;
15149 case LTGT:
15150 fputs ("une", file);
15151 break;
15152 default:
15153 output_operand_lossage ("operand is not a condition code, "
15154 "invalid operand code 'Y'");
15155 return;
15156 }
15157 return;
15158
15159 case 'D':
15160 /* Little bit of braindamage here. The SSE compare instructions
15161 does use completely different names for the comparisons that the
15162 fp conditional moves. */
15163 switch (GET_CODE (x))
15164 {
15165 case UNEQ:
15166 if (TARGET_AVX)
15167 {
15168 fputs ("eq_us", file);
15169 break;
15170 }
15171 case EQ:
15172 fputs ("eq", file);
15173 break;
15174 case UNLT:
15175 if (TARGET_AVX)
15176 {
15177 fputs ("nge", file);
15178 break;
15179 }
15180 case LT:
15181 fputs ("lt", file);
15182 break;
15183 case UNLE:
15184 if (TARGET_AVX)
15185 {
15186 fputs ("ngt", file);
15187 break;
15188 }
15189 case LE:
15190 fputs ("le", file);
15191 break;
15192 case UNORDERED:
15193 fputs ("unord", file);
15194 break;
15195 case LTGT:
15196 if (TARGET_AVX)
15197 {
15198 fputs ("neq_oq", file);
15199 break;
15200 }
15201 case NE:
15202 fputs ("neq", file);
15203 break;
15204 case GE:
15205 if (TARGET_AVX)
15206 {
15207 fputs ("ge", file);
15208 break;
15209 }
15210 case UNGE:
15211 fputs ("nlt", file);
15212 break;
15213 case GT:
15214 if (TARGET_AVX)
15215 {
15216 fputs ("gt", file);
15217 break;
15218 }
15219 case UNGT:
15220 fputs ("nle", file);
15221 break;
15222 case ORDERED:
15223 fputs ("ord", file);
15224 break;
15225 default:
15226 output_operand_lossage ("operand is not a condition code, "
15227 "invalid operand code 'D'");
15228 return;
15229 }
15230 return;
15231
15232 case 'F':
15233 case 'f':
15234 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15235 if (ASSEMBLER_DIALECT == ASM_ATT)
15236 putc ('.', file);
15237 #endif
15238
15239 case 'C':
15240 case 'c':
15241 if (!COMPARISON_P (x))
15242 {
15243 output_operand_lossage ("operand is not a condition code, "
15244 "invalid operand code '%c'", code);
15245 return;
15246 }
15247 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15248 code == 'c' || code == 'f',
15249 code == 'F' || code == 'f',
15250 file);
15251 return;
15252
15253 case 'H':
15254 if (!offsettable_memref_p (x))
15255 {
15256 output_operand_lossage ("operand is not an offsettable memory "
15257 "reference, invalid operand code 'H'");
15258 return;
15259 }
15260 /* It doesn't actually matter what mode we use here, as we're
15261 only going to use this for printing. */
15262 x = adjust_address_nv (x, DImode, 8);
15263 /* Output 'qword ptr' for intel assembler dialect. */
15264 if (ASSEMBLER_DIALECT == ASM_INTEL)
15265 code = 'q';
15266 break;
15267
15268 case 'K':
15269 gcc_assert (CONST_INT_P (x));
15270
15271 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15272 #ifdef HAVE_AS_IX86_HLE
15273 fputs ("xacquire ", file);
15274 #else
15275 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15276 #endif
15277 else if (INTVAL (x) & IX86_HLE_RELEASE)
15278 #ifdef HAVE_AS_IX86_HLE
15279 fputs ("xrelease ", file);
15280 #else
15281 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15282 #endif
15283 /* We do not want to print value of the operand. */
15284 return;
15285
15286 case 'N':
15287 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15288 fputs ("{z}", file);
15289 return;
15290
15291 case 'r':
15292 gcc_assert (CONST_INT_P (x));
15293 gcc_assert (INTVAL (x) == ROUND_SAE);
15294
15295 if (ASSEMBLER_DIALECT == ASM_INTEL)
15296 fputs (", ", file);
15297
15298 fputs ("{sae}", file);
15299
15300 if (ASSEMBLER_DIALECT == ASM_ATT)
15301 fputs (", ", file);
15302
15303 return;
15304
15305 case 'R':
15306 gcc_assert (CONST_INT_P (x));
15307
15308 if (ASSEMBLER_DIALECT == ASM_INTEL)
15309 fputs (", ", file);
15310
15311 switch (INTVAL (x))
15312 {
15313 case ROUND_NEAREST_INT | ROUND_SAE:
15314 fputs ("{rn-sae}", file);
15315 break;
15316 case ROUND_NEG_INF | ROUND_SAE:
15317 fputs ("{rd-sae}", file);
15318 break;
15319 case ROUND_POS_INF | ROUND_SAE:
15320 fputs ("{ru-sae}", file);
15321 break;
15322 case ROUND_ZERO | ROUND_SAE:
15323 fputs ("{rz-sae}", file);
15324 break;
15325 default:
15326 gcc_unreachable ();
15327 }
15328
15329 if (ASSEMBLER_DIALECT == ASM_ATT)
15330 fputs (", ", file);
15331
15332 return;
15333
15334 case '*':
15335 if (ASSEMBLER_DIALECT == ASM_ATT)
15336 putc ('*', file);
15337 return;
15338
15339 case '&':
15340 {
15341 const char *name = get_some_local_dynamic_name ();
15342 if (name == NULL)
15343 output_operand_lossage ("'%%&' used without any "
15344 "local dynamic TLS references");
15345 else
15346 assemble_name (file, name);
15347 return;
15348 }
15349
15350 case '+':
15351 {
15352 rtx x;
15353
15354 if (!optimize
15355 || optimize_function_for_size_p (cfun)
15356 || !TARGET_BRANCH_PREDICTION_HINTS)
15357 return;
15358
15359 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15360 if (x)
15361 {
15362 int pred_val = XINT (x, 0);
15363
15364 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15365 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15366 {
15367 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15368 bool cputaken
15369 = final_forward_branch_p (current_output_insn) == 0;
15370
15371 /* Emit hints only in the case default branch prediction
15372 heuristics would fail. */
15373 if (taken != cputaken)
15374 {
15375 /* We use 3e (DS) prefix for taken branches and
15376 2e (CS) prefix for not taken branches. */
15377 if (taken)
15378 fputs ("ds ; ", file);
15379 else
15380 fputs ("cs ; ", file);
15381 }
15382 }
15383 }
15384 return;
15385 }
15386
15387 case ';':
15388 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15389 putc (';', file);
15390 #endif
15391 return;
15392
15393 case '@':
15394 if (ASSEMBLER_DIALECT == ASM_ATT)
15395 putc ('%', file);
15396
15397 /* The kernel uses a different segment register for performance
15398 reasons; a system call would not have to trash the userspace
15399 segment register, which would be expensive. */
15400 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15401 fputs ("fs", file);
15402 else
15403 fputs ("gs", file);
15404 return;
15405
15406 case '~':
15407 putc (TARGET_AVX2 ? 'i' : 'f', file);
15408 return;
15409
15410 case '^':
15411 if (TARGET_64BIT && Pmode != word_mode)
15412 fputs ("addr32 ", file);
15413 return;
15414
15415 default:
15416 output_operand_lossage ("invalid operand code '%c'", code);
15417 }
15418 }
15419
15420 if (REG_P (x))
15421 print_reg (x, code, file);
15422
15423 else if (MEM_P (x))
15424 {
15425 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15426 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15427 && GET_MODE (x) != BLKmode)
15428 {
15429 const char * size;
15430 switch (GET_MODE_SIZE (GET_MODE (x)))
15431 {
15432 case 1: size = "BYTE"; break;
15433 case 2: size = "WORD"; break;
15434 case 4: size = "DWORD"; break;
15435 case 8: size = "QWORD"; break;
15436 case 12: size = "TBYTE"; break;
15437 case 16:
15438 if (GET_MODE (x) == XFmode)
15439 size = "TBYTE";
15440 else
15441 size = "XMMWORD";
15442 break;
15443 case 32: size = "YMMWORD"; break;
15444 case 64: size = "ZMMWORD"; break;
15445 default:
15446 gcc_unreachable ();
15447 }
15448
15449 /* Check for explicit size override (codes 'b', 'w', 'k',
15450 'q' and 'x') */
15451 if (code == 'b')
15452 size = "BYTE";
15453 else if (code == 'w')
15454 size = "WORD";
15455 else if (code == 'k')
15456 size = "DWORD";
15457 else if (code == 'q')
15458 size = "QWORD";
15459 else if (code == 'x')
15460 size = "XMMWORD";
15461
15462 fputs (size, file);
15463 fputs (" PTR ", file);
15464 }
15465
15466 x = XEXP (x, 0);
15467 /* Avoid (%rip) for call operands. */
15468 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15469 && !CONST_INT_P (x))
15470 output_addr_const (file, x);
15471 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15472 output_operand_lossage ("invalid constraints for operand");
15473 else
15474 output_address (x);
15475 }
15476
15477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15478 {
15479 REAL_VALUE_TYPE r;
15480 long l;
15481
15482 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15483 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15484
15485 if (ASSEMBLER_DIALECT == ASM_ATT)
15486 putc ('$', file);
15487 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15488 if (code == 'q')
15489 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15490 (unsigned long long) (int) l);
15491 else
15492 fprintf (file, "0x%08x", (unsigned int) l);
15493 }
15494
15495 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15496 {
15497 REAL_VALUE_TYPE r;
15498 long l[2];
15499
15500 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15501 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15502
15503 if (ASSEMBLER_DIALECT == ASM_ATT)
15504 putc ('$', file);
15505 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15506 }
15507
15508 /* These float cases don't actually occur as immediate operands. */
15509 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15510 {
15511 char dstr[30];
15512
15513 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15514 fputs (dstr, file);
15515 }
15516
15517 else
15518 {
15519 /* We have patterns that allow zero sets of memory, for instance.
15520 In 64-bit mode, we should probably support all 8-byte vectors,
15521 since we can in fact encode that into an immediate. */
15522 if (GET_CODE (x) == CONST_VECTOR)
15523 {
15524 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15525 x = const0_rtx;
15526 }
15527
15528 if (code != 'P' && code != 'p')
15529 {
15530 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15531 {
15532 if (ASSEMBLER_DIALECT == ASM_ATT)
15533 putc ('$', file);
15534 }
15535 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15536 || GET_CODE (x) == LABEL_REF)
15537 {
15538 if (ASSEMBLER_DIALECT == ASM_ATT)
15539 putc ('$', file);
15540 else
15541 fputs ("OFFSET FLAT:", file);
15542 }
15543 }
15544 if (CONST_INT_P (x))
15545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15546 else if (flag_pic || MACHOPIC_INDIRECT)
15547 output_pic_addr_const (file, x, code);
15548 else
15549 output_addr_const (file, x);
15550 }
15551 }
15552
15553 static bool
15554 ix86_print_operand_punct_valid_p (unsigned char code)
15555 {
15556 return (code == '@' || code == '*' || code == '+' || code == '&'
15557 || code == ';' || code == '~' || code == '^');
15558 }
15559 \f
15560 /* Print a memory operand whose address is ADDR. */
15561
15562 static void
15563 ix86_print_operand_address (FILE *file, rtx addr)
15564 {
15565 struct ix86_address parts;
15566 rtx base, index, disp;
15567 int scale;
15568 int ok;
15569 bool vsib = false;
15570 int code = 0;
15571
15572 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15573 {
15574 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15575 gcc_assert (parts.index == NULL_RTX);
15576 parts.index = XVECEXP (addr, 0, 1);
15577 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15578 addr = XVECEXP (addr, 0, 0);
15579 vsib = true;
15580 }
15581 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15582 {
15583 gcc_assert (TARGET_64BIT);
15584 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15585 code = 'q';
15586 }
15587 else
15588 ok = ix86_decompose_address (addr, &parts);
15589
15590 gcc_assert (ok);
15591
15592 base = parts.base;
15593 index = parts.index;
15594 disp = parts.disp;
15595 scale = parts.scale;
15596
15597 switch (parts.seg)
15598 {
15599 case SEG_DEFAULT:
15600 break;
15601 case SEG_FS:
15602 case SEG_GS:
15603 if (ASSEMBLER_DIALECT == ASM_ATT)
15604 putc ('%', file);
15605 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15606 break;
15607 default:
15608 gcc_unreachable ();
15609 }
15610
15611 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15612 if (TARGET_64BIT && !base && !index)
15613 {
15614 rtx symbol = disp;
15615
15616 if (GET_CODE (disp) == CONST
15617 && GET_CODE (XEXP (disp, 0)) == PLUS
15618 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15619 symbol = XEXP (XEXP (disp, 0), 0);
15620
15621 if (GET_CODE (symbol) == LABEL_REF
15622 || (GET_CODE (symbol) == SYMBOL_REF
15623 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15624 base = pc_rtx;
15625 }
15626 if (!base && !index)
15627 {
15628 /* Displacement only requires special attention. */
15629
15630 if (CONST_INT_P (disp))
15631 {
15632 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15633 fputs ("ds:", file);
15634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15635 }
15636 else if (flag_pic)
15637 output_pic_addr_const (file, disp, 0);
15638 else
15639 output_addr_const (file, disp);
15640 }
15641 else
15642 {
15643 /* Print SImode register names to force addr32 prefix. */
15644 if (SImode_address_operand (addr, VOIDmode))
15645 {
15646 #ifdef ENABLE_CHECKING
15647 gcc_assert (TARGET_64BIT);
15648 switch (GET_CODE (addr))
15649 {
15650 case SUBREG:
15651 gcc_assert (GET_MODE (addr) == SImode);
15652 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15653 break;
15654 case ZERO_EXTEND:
15655 case AND:
15656 gcc_assert (GET_MODE (addr) == DImode);
15657 break;
15658 default:
15659 gcc_unreachable ();
15660 }
15661 #endif
15662 gcc_assert (!code);
15663 code = 'k';
15664 }
15665 else if (code == 0
15666 && TARGET_X32
15667 && disp
15668 && CONST_INT_P (disp)
15669 && INTVAL (disp) < -16*1024*1024)
15670 {
15671 /* X32 runs in 64-bit mode, where displacement, DISP, in
15672 address DISP(%r64), is encoded as 32-bit immediate sign-
15673 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15674 address is %r64 + 0xffffffffbffffd00. When %r64 <
15675 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15676 which is invalid for x32. The correct address is %r64
15677 - 0x40000300 == 0xf7ffdd64. To properly encode
15678 -0x40000300(%r64) for x32, we zero-extend negative
15679 displacement by forcing addr32 prefix which truncates
15680 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15681 zero-extend all negative displacements, including -1(%rsp).
15682 However, for small negative displacements, sign-extension
15683 won't cause overflow. We only zero-extend negative
15684 displacements if they < -16*1024*1024, which is also used
15685 to check legitimate address displacements for PIC. */
15686 code = 'k';
15687 }
15688
15689 if (ASSEMBLER_DIALECT == ASM_ATT)
15690 {
15691 if (disp)
15692 {
15693 if (flag_pic)
15694 output_pic_addr_const (file, disp, 0);
15695 else if (GET_CODE (disp) == LABEL_REF)
15696 output_asm_label (disp);
15697 else
15698 output_addr_const (file, disp);
15699 }
15700
15701 putc ('(', file);
15702 if (base)
15703 print_reg (base, code, file);
15704 if (index)
15705 {
15706 putc (',', file);
15707 print_reg (index, vsib ? 0 : code, file);
15708 if (scale != 1 || vsib)
15709 fprintf (file, ",%d", scale);
15710 }
15711 putc (')', file);
15712 }
15713 else
15714 {
15715 rtx offset = NULL_RTX;
15716
15717 if (disp)
15718 {
15719 /* Pull out the offset of a symbol; print any symbol itself. */
15720 if (GET_CODE (disp) == CONST
15721 && GET_CODE (XEXP (disp, 0)) == PLUS
15722 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15723 {
15724 offset = XEXP (XEXP (disp, 0), 1);
15725 disp = gen_rtx_CONST (VOIDmode,
15726 XEXP (XEXP (disp, 0), 0));
15727 }
15728
15729 if (flag_pic)
15730 output_pic_addr_const (file, disp, 0);
15731 else if (GET_CODE (disp) == LABEL_REF)
15732 output_asm_label (disp);
15733 else if (CONST_INT_P (disp))
15734 offset = disp;
15735 else
15736 output_addr_const (file, disp);
15737 }
15738
15739 putc ('[', file);
15740 if (base)
15741 {
15742 print_reg (base, code, file);
15743 if (offset)
15744 {
15745 if (INTVAL (offset) >= 0)
15746 putc ('+', file);
15747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15748 }
15749 }
15750 else if (offset)
15751 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15752 else
15753 putc ('0', file);
15754
15755 if (index)
15756 {
15757 putc ('+', file);
15758 print_reg (index, vsib ? 0 : code, file);
15759 if (scale != 1 || vsib)
15760 fprintf (file, "*%d", scale);
15761 }
15762 putc (']', file);
15763 }
15764 }
15765 }
15766
15767 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15768
15769 static bool
15770 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15771 {
15772 rtx op;
15773
15774 if (GET_CODE (x) != UNSPEC)
15775 return false;
15776
15777 op = XVECEXP (x, 0, 0);
15778 switch (XINT (x, 1))
15779 {
15780 case UNSPEC_GOTTPOFF:
15781 output_addr_const (file, op);
15782 /* FIXME: This might be @TPOFF in Sun ld. */
15783 fputs ("@gottpoff", file);
15784 break;
15785 case UNSPEC_TPOFF:
15786 output_addr_const (file, op);
15787 fputs ("@tpoff", file);
15788 break;
15789 case UNSPEC_NTPOFF:
15790 output_addr_const (file, op);
15791 if (TARGET_64BIT)
15792 fputs ("@tpoff", file);
15793 else
15794 fputs ("@ntpoff", file);
15795 break;
15796 case UNSPEC_DTPOFF:
15797 output_addr_const (file, op);
15798 fputs ("@dtpoff", file);
15799 break;
15800 case UNSPEC_GOTNTPOFF:
15801 output_addr_const (file, op);
15802 if (TARGET_64BIT)
15803 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15804 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15805 else
15806 fputs ("@gotntpoff", file);
15807 break;
15808 case UNSPEC_INDNTPOFF:
15809 output_addr_const (file, op);
15810 fputs ("@indntpoff", file);
15811 break;
15812 #if TARGET_MACHO
15813 case UNSPEC_MACHOPIC_OFFSET:
15814 output_addr_const (file, op);
15815 putc ('-', file);
15816 machopic_output_function_base_name (file);
15817 break;
15818 #endif
15819
15820 case UNSPEC_STACK_CHECK:
15821 {
15822 int offset;
15823
15824 gcc_assert (flag_split_stack);
15825
15826 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15827 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15828 #else
15829 gcc_unreachable ();
15830 #endif
15831
15832 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15833 }
15834 break;
15835
15836 default:
15837 return false;
15838 }
15839
15840 return true;
15841 }
15842 \f
15843 /* Split one or more double-mode RTL references into pairs of half-mode
15844 references. The RTL can be REG, offsettable MEM, integer constant, or
15845 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15846 split and "num" is its length. lo_half and hi_half are output arrays
15847 that parallel "operands". */
15848
15849 void
15850 split_double_mode (enum machine_mode mode, rtx operands[],
15851 int num, rtx lo_half[], rtx hi_half[])
15852 {
15853 enum machine_mode half_mode;
15854 unsigned int byte;
15855
15856 switch (mode)
15857 {
15858 case TImode:
15859 half_mode = DImode;
15860 break;
15861 case DImode:
15862 half_mode = SImode;
15863 break;
15864 default:
15865 gcc_unreachable ();
15866 }
15867
15868 byte = GET_MODE_SIZE (half_mode);
15869
15870 while (num--)
15871 {
15872 rtx op = operands[num];
15873
15874 /* simplify_subreg refuse to split volatile memory addresses,
15875 but we still have to handle it. */
15876 if (MEM_P (op))
15877 {
15878 lo_half[num] = adjust_address (op, half_mode, 0);
15879 hi_half[num] = adjust_address (op, half_mode, byte);
15880 }
15881 else
15882 {
15883 lo_half[num] = simplify_gen_subreg (half_mode, op,
15884 GET_MODE (op) == VOIDmode
15885 ? mode : GET_MODE (op), 0);
15886 hi_half[num] = simplify_gen_subreg (half_mode, op,
15887 GET_MODE (op) == VOIDmode
15888 ? mode : GET_MODE (op), byte);
15889 }
15890 }
15891 }
15892 \f
15893 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15894 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15895 is the expression of the binary operation. The output may either be
15896 emitted here, or returned to the caller, like all output_* functions.
15897
15898 There is no guarantee that the operands are the same mode, as they
15899 might be within FLOAT or FLOAT_EXTEND expressions. */
15900
15901 #ifndef SYSV386_COMPAT
15902 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15903 wants to fix the assemblers because that causes incompatibility
15904 with gcc. No-one wants to fix gcc because that causes
15905 incompatibility with assemblers... You can use the option of
15906 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15907 #define SYSV386_COMPAT 1
15908 #endif
15909
15910 const char *
15911 output_387_binary_op (rtx insn, rtx *operands)
15912 {
15913 static char buf[40];
15914 const char *p;
15915 const char *ssep;
15916 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15917
15918 #ifdef ENABLE_CHECKING
15919 /* Even if we do not want to check the inputs, this documents input
15920 constraints. Which helps in understanding the following code. */
15921 if (STACK_REG_P (operands[0])
15922 && ((REG_P (operands[1])
15923 && REGNO (operands[0]) == REGNO (operands[1])
15924 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15925 || (REG_P (operands[2])
15926 && REGNO (operands[0]) == REGNO (operands[2])
15927 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15928 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15929 ; /* ok */
15930 else
15931 gcc_assert (is_sse);
15932 #endif
15933
15934 switch (GET_CODE (operands[3]))
15935 {
15936 case PLUS:
15937 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15938 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15939 p = "fiadd";
15940 else
15941 p = "fadd";
15942 ssep = "vadd";
15943 break;
15944
15945 case MINUS:
15946 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15947 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15948 p = "fisub";
15949 else
15950 p = "fsub";
15951 ssep = "vsub";
15952 break;
15953
15954 case MULT:
15955 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15956 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15957 p = "fimul";
15958 else
15959 p = "fmul";
15960 ssep = "vmul";
15961 break;
15962
15963 case DIV:
15964 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15965 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15966 p = "fidiv";
15967 else
15968 p = "fdiv";
15969 ssep = "vdiv";
15970 break;
15971
15972 default:
15973 gcc_unreachable ();
15974 }
15975
15976 if (is_sse)
15977 {
15978 if (TARGET_AVX)
15979 {
15980 strcpy (buf, ssep);
15981 if (GET_MODE (operands[0]) == SFmode)
15982 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15983 else
15984 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15985 }
15986 else
15987 {
15988 strcpy (buf, ssep + 1);
15989 if (GET_MODE (operands[0]) == SFmode)
15990 strcat (buf, "ss\t{%2, %0|%0, %2}");
15991 else
15992 strcat (buf, "sd\t{%2, %0|%0, %2}");
15993 }
15994 return buf;
15995 }
15996 strcpy (buf, p);
15997
15998 switch (GET_CODE (operands[3]))
15999 {
16000 case MULT:
16001 case PLUS:
16002 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16003 {
16004 rtx temp = operands[2];
16005 operands[2] = operands[1];
16006 operands[1] = temp;
16007 }
16008
16009 /* know operands[0] == operands[1]. */
16010
16011 if (MEM_P (operands[2]))
16012 {
16013 p = "%Z2\t%2";
16014 break;
16015 }
16016
16017 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16018 {
16019 if (STACK_TOP_P (operands[0]))
16020 /* How is it that we are storing to a dead operand[2]?
16021 Well, presumably operands[1] is dead too. We can't
16022 store the result to st(0) as st(0) gets popped on this
16023 instruction. Instead store to operands[2] (which I
16024 think has to be st(1)). st(1) will be popped later.
16025 gcc <= 2.8.1 didn't have this check and generated
16026 assembly code that the Unixware assembler rejected. */
16027 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16028 else
16029 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16030 break;
16031 }
16032
16033 if (STACK_TOP_P (operands[0]))
16034 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16035 else
16036 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16037 break;
16038
16039 case MINUS:
16040 case DIV:
16041 if (MEM_P (operands[1]))
16042 {
16043 p = "r%Z1\t%1";
16044 break;
16045 }
16046
16047 if (MEM_P (operands[2]))
16048 {
16049 p = "%Z2\t%2";
16050 break;
16051 }
16052
16053 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16054 {
16055 #if SYSV386_COMPAT
16056 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16057 derived assemblers, confusingly reverse the direction of
16058 the operation for fsub{r} and fdiv{r} when the
16059 destination register is not st(0). The Intel assembler
16060 doesn't have this brain damage. Read !SYSV386_COMPAT to
16061 figure out what the hardware really does. */
16062 if (STACK_TOP_P (operands[0]))
16063 p = "{p\t%0, %2|rp\t%2, %0}";
16064 else
16065 p = "{rp\t%2, %0|p\t%0, %2}";
16066 #else
16067 if (STACK_TOP_P (operands[0]))
16068 /* As above for fmul/fadd, we can't store to st(0). */
16069 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16070 else
16071 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16072 #endif
16073 break;
16074 }
16075
16076 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16077 {
16078 #if SYSV386_COMPAT
16079 if (STACK_TOP_P (operands[0]))
16080 p = "{rp\t%0, %1|p\t%1, %0}";
16081 else
16082 p = "{p\t%1, %0|rp\t%0, %1}";
16083 #else
16084 if (STACK_TOP_P (operands[0]))
16085 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16086 else
16087 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16088 #endif
16089 break;
16090 }
16091
16092 if (STACK_TOP_P (operands[0]))
16093 {
16094 if (STACK_TOP_P (operands[1]))
16095 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16096 else
16097 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16098 break;
16099 }
16100 else if (STACK_TOP_P (operands[1]))
16101 {
16102 #if SYSV386_COMPAT
16103 p = "{\t%1, %0|r\t%0, %1}";
16104 #else
16105 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16106 #endif
16107 }
16108 else
16109 {
16110 #if SYSV386_COMPAT
16111 p = "{r\t%2, %0|\t%0, %2}";
16112 #else
16113 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16114 #endif
16115 }
16116 break;
16117
16118 default:
16119 gcc_unreachable ();
16120 }
16121
16122 strcat (buf, p);
16123 return buf;
16124 }
16125
16126 /* Check if a 256bit AVX register is referenced inside of EXP. */
16127
16128 static bool
16129 ix86_check_avx256_register (const_rtx exp)
16130 {
16131 if (GET_CODE (exp) == SUBREG)
16132 exp = SUBREG_REG (exp);
16133
16134 return (REG_P (exp)
16135 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16136 }
16137
16138 /* Return needed mode for entity in optimize_mode_switching pass. */
16139
16140 static int
16141 ix86_avx_u128_mode_needed (rtx_insn *insn)
16142 {
16143 if (CALL_P (insn))
16144 {
16145 rtx link;
16146
16147 /* Needed mode is set to AVX_U128_CLEAN if there are
16148 no 256bit modes used in function arguments. */
16149 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16150 link;
16151 link = XEXP (link, 1))
16152 {
16153 if (GET_CODE (XEXP (link, 0)) == USE)
16154 {
16155 rtx arg = XEXP (XEXP (link, 0), 0);
16156
16157 if (ix86_check_avx256_register (arg))
16158 return AVX_U128_DIRTY;
16159 }
16160 }
16161
16162 return AVX_U128_CLEAN;
16163 }
16164
16165 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16166 changes state only when a 256bit register is written to, but we need
16167 to prevent the compiler from moving optimal insertion point above
16168 eventual read from 256bit register. */
16169 subrtx_iterator::array_type array;
16170 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16171 if (ix86_check_avx256_register (*iter))
16172 return AVX_U128_DIRTY;
16173
16174 return AVX_U128_ANY;
16175 }
16176
16177 /* Return mode that i387 must be switched into
16178 prior to the execution of insn. */
16179
16180 static int
16181 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16182 {
16183 enum attr_i387_cw mode;
16184
16185 /* The mode UNINITIALIZED is used to store control word after a
16186 function call or ASM pattern. The mode ANY specify that function
16187 has no requirements on the control word and make no changes in the
16188 bits we are interested in. */
16189
16190 if (CALL_P (insn)
16191 || (NONJUMP_INSN_P (insn)
16192 && (asm_noperands (PATTERN (insn)) >= 0
16193 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16194 return I387_CW_UNINITIALIZED;
16195
16196 if (recog_memoized (insn) < 0)
16197 return I387_CW_ANY;
16198
16199 mode = get_attr_i387_cw (insn);
16200
16201 switch (entity)
16202 {
16203 case I387_TRUNC:
16204 if (mode == I387_CW_TRUNC)
16205 return mode;
16206 break;
16207
16208 case I387_FLOOR:
16209 if (mode == I387_CW_FLOOR)
16210 return mode;
16211 break;
16212
16213 case I387_CEIL:
16214 if (mode == I387_CW_CEIL)
16215 return mode;
16216 break;
16217
16218 case I387_MASK_PM:
16219 if (mode == I387_CW_MASK_PM)
16220 return mode;
16221 break;
16222
16223 default:
16224 gcc_unreachable ();
16225 }
16226
16227 return I387_CW_ANY;
16228 }
16229
16230 /* Return mode that entity must be switched into
16231 prior to the execution of insn. */
16232
16233 static int
16234 ix86_mode_needed (int entity, rtx_insn *insn)
16235 {
16236 switch (entity)
16237 {
16238 case AVX_U128:
16239 return ix86_avx_u128_mode_needed (insn);
16240 case I387_TRUNC:
16241 case I387_FLOOR:
16242 case I387_CEIL:
16243 case I387_MASK_PM:
16244 return ix86_i387_mode_needed (entity, insn);
16245 default:
16246 gcc_unreachable ();
16247 }
16248 return 0;
16249 }
16250
16251 /* Check if a 256bit AVX register is referenced in stores. */
16252
16253 static void
16254 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16255 {
16256 if (ix86_check_avx256_register (dest))
16257 {
16258 bool *used = (bool *) data;
16259 *used = true;
16260 }
16261 }
16262
16263 /* Calculate mode of upper 128bit AVX registers after the insn. */
16264
16265 static int
16266 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16267 {
16268 rtx pat = PATTERN (insn);
16269
16270 if (vzeroupper_operation (pat, VOIDmode)
16271 || vzeroall_operation (pat, VOIDmode))
16272 return AVX_U128_CLEAN;
16273
16274 /* We know that state is clean after CALL insn if there are no
16275 256bit registers used in the function return register. */
16276 if (CALL_P (insn))
16277 {
16278 bool avx_reg256_found = false;
16279 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16280
16281 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16282 }
16283
16284 /* Otherwise, return current mode. Remember that if insn
16285 references AVX 256bit registers, the mode was already changed
16286 to DIRTY from MODE_NEEDED. */
16287 return mode;
16288 }
16289
16290 /* Return the mode that an insn results in. */
16291
16292 int
16293 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16294 {
16295 switch (entity)
16296 {
16297 case AVX_U128:
16298 return ix86_avx_u128_mode_after (mode, insn);
16299 case I387_TRUNC:
16300 case I387_FLOOR:
16301 case I387_CEIL:
16302 case I387_MASK_PM:
16303 return mode;
16304 default:
16305 gcc_unreachable ();
16306 }
16307 }
16308
16309 static int
16310 ix86_avx_u128_mode_entry (void)
16311 {
16312 tree arg;
16313
16314 /* Entry mode is set to AVX_U128_DIRTY if there are
16315 256bit modes used in function arguments. */
16316 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16317 arg = TREE_CHAIN (arg))
16318 {
16319 rtx incoming = DECL_INCOMING_RTL (arg);
16320
16321 if (incoming && ix86_check_avx256_register (incoming))
16322 return AVX_U128_DIRTY;
16323 }
16324
16325 return AVX_U128_CLEAN;
16326 }
16327
16328 /* Return a mode that ENTITY is assumed to be
16329 switched to at function entry. */
16330
16331 static int
16332 ix86_mode_entry (int entity)
16333 {
16334 switch (entity)
16335 {
16336 case AVX_U128:
16337 return ix86_avx_u128_mode_entry ();
16338 case I387_TRUNC:
16339 case I387_FLOOR:
16340 case I387_CEIL:
16341 case I387_MASK_PM:
16342 return I387_CW_ANY;
16343 default:
16344 gcc_unreachable ();
16345 }
16346 }
16347
16348 static int
16349 ix86_avx_u128_mode_exit (void)
16350 {
16351 rtx reg = crtl->return_rtx;
16352
16353 /* Exit mode is set to AVX_U128_DIRTY if there are
16354 256bit modes used in the function return register. */
16355 if (reg && ix86_check_avx256_register (reg))
16356 return AVX_U128_DIRTY;
16357
16358 return AVX_U128_CLEAN;
16359 }
16360
16361 /* Return a mode that ENTITY is assumed to be
16362 switched to at function exit. */
16363
16364 static int
16365 ix86_mode_exit (int entity)
16366 {
16367 switch (entity)
16368 {
16369 case AVX_U128:
16370 return ix86_avx_u128_mode_exit ();
16371 case I387_TRUNC:
16372 case I387_FLOOR:
16373 case I387_CEIL:
16374 case I387_MASK_PM:
16375 return I387_CW_ANY;
16376 default:
16377 gcc_unreachable ();
16378 }
16379 }
16380
16381 static int
16382 ix86_mode_priority (int, int n)
16383 {
16384 return n;
16385 }
16386
16387 /* Output code to initialize control word copies used by trunc?f?i and
16388 rounding patterns. CURRENT_MODE is set to current control word,
16389 while NEW_MODE is set to new control word. */
16390
16391 static void
16392 emit_i387_cw_initialization (int mode)
16393 {
16394 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16395 rtx new_mode;
16396
16397 enum ix86_stack_slot slot;
16398
16399 rtx reg = gen_reg_rtx (HImode);
16400
16401 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16402 emit_move_insn (reg, copy_rtx (stored_mode));
16403
16404 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16405 || optimize_insn_for_size_p ())
16406 {
16407 switch (mode)
16408 {
16409 case I387_CW_TRUNC:
16410 /* round toward zero (truncate) */
16411 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16412 slot = SLOT_CW_TRUNC;
16413 break;
16414
16415 case I387_CW_FLOOR:
16416 /* round down toward -oo */
16417 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16418 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16419 slot = SLOT_CW_FLOOR;
16420 break;
16421
16422 case I387_CW_CEIL:
16423 /* round up toward +oo */
16424 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16425 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16426 slot = SLOT_CW_CEIL;
16427 break;
16428
16429 case I387_CW_MASK_PM:
16430 /* mask precision exception for nearbyint() */
16431 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16432 slot = SLOT_CW_MASK_PM;
16433 break;
16434
16435 default:
16436 gcc_unreachable ();
16437 }
16438 }
16439 else
16440 {
16441 switch (mode)
16442 {
16443 case I387_CW_TRUNC:
16444 /* round toward zero (truncate) */
16445 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16446 slot = SLOT_CW_TRUNC;
16447 break;
16448
16449 case I387_CW_FLOOR:
16450 /* round down toward -oo */
16451 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16452 slot = SLOT_CW_FLOOR;
16453 break;
16454
16455 case I387_CW_CEIL:
16456 /* round up toward +oo */
16457 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16458 slot = SLOT_CW_CEIL;
16459 break;
16460
16461 case I387_CW_MASK_PM:
16462 /* mask precision exception for nearbyint() */
16463 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16464 slot = SLOT_CW_MASK_PM;
16465 break;
16466
16467 default:
16468 gcc_unreachable ();
16469 }
16470 }
16471
16472 gcc_assert (slot < MAX_386_STACK_LOCALS);
16473
16474 new_mode = assign_386_stack_local (HImode, slot);
16475 emit_move_insn (new_mode, reg);
16476 }
16477
16478 /* Emit vzeroupper. */
16479
16480 void
16481 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16482 {
16483 int i;
16484
16485 /* Cancel automatic vzeroupper insertion if there are
16486 live call-saved SSE registers at the insertion point. */
16487
16488 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16489 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16490 return;
16491
16492 if (TARGET_64BIT)
16493 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16494 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16495 return;
16496
16497 emit_insn (gen_avx_vzeroupper ());
16498 }
16499
16500 /* Generate one or more insns to set ENTITY to MODE. */
16501
16502 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16503 is the set of hard registers live at the point where the insn(s)
16504 are to be inserted. */
16505
16506 static void
16507 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16508 HARD_REG_SET regs_live)
16509 {
16510 switch (entity)
16511 {
16512 case AVX_U128:
16513 if (mode == AVX_U128_CLEAN)
16514 ix86_avx_emit_vzeroupper (regs_live);
16515 break;
16516 case I387_TRUNC:
16517 case I387_FLOOR:
16518 case I387_CEIL:
16519 case I387_MASK_PM:
16520 if (mode != I387_CW_ANY
16521 && mode != I387_CW_UNINITIALIZED)
16522 emit_i387_cw_initialization (mode);
16523 break;
16524 default:
16525 gcc_unreachable ();
16526 }
16527 }
16528
16529 /* Output code for INSN to convert a float to a signed int. OPERANDS
16530 are the insn operands. The output may be [HSD]Imode and the input
16531 operand may be [SDX]Fmode. */
16532
16533 const char *
16534 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16535 {
16536 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16537 int dimode_p = GET_MODE (operands[0]) == DImode;
16538 int round_mode = get_attr_i387_cw (insn);
16539
16540 /* Jump through a hoop or two for DImode, since the hardware has no
16541 non-popping instruction. We used to do this a different way, but
16542 that was somewhat fragile and broke with post-reload splitters. */
16543 if ((dimode_p || fisttp) && !stack_top_dies)
16544 output_asm_insn ("fld\t%y1", operands);
16545
16546 gcc_assert (STACK_TOP_P (operands[1]));
16547 gcc_assert (MEM_P (operands[0]));
16548 gcc_assert (GET_MODE (operands[1]) != TFmode);
16549
16550 if (fisttp)
16551 output_asm_insn ("fisttp%Z0\t%0", operands);
16552 else
16553 {
16554 if (round_mode != I387_CW_ANY)
16555 output_asm_insn ("fldcw\t%3", operands);
16556 if (stack_top_dies || dimode_p)
16557 output_asm_insn ("fistp%Z0\t%0", operands);
16558 else
16559 output_asm_insn ("fist%Z0\t%0", operands);
16560 if (round_mode != I387_CW_ANY)
16561 output_asm_insn ("fldcw\t%2", operands);
16562 }
16563
16564 return "";
16565 }
16566
16567 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16568 have the values zero or one, indicates the ffreep insn's operand
16569 from the OPERANDS array. */
16570
16571 static const char *
16572 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16573 {
16574 if (TARGET_USE_FFREEP)
16575 #ifdef HAVE_AS_IX86_FFREEP
16576 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16577 #else
16578 {
16579 static char retval[32];
16580 int regno = REGNO (operands[opno]);
16581
16582 gcc_assert (STACK_REGNO_P (regno));
16583
16584 regno -= FIRST_STACK_REG;
16585
16586 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16587 return retval;
16588 }
16589 #endif
16590
16591 return opno ? "fstp\t%y1" : "fstp\t%y0";
16592 }
16593
16594
16595 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16596 should be used. UNORDERED_P is true when fucom should be used. */
16597
16598 const char *
16599 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16600 {
16601 int stack_top_dies;
16602 rtx cmp_op0, cmp_op1;
16603 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16604
16605 if (eflags_p)
16606 {
16607 cmp_op0 = operands[0];
16608 cmp_op1 = operands[1];
16609 }
16610 else
16611 {
16612 cmp_op0 = operands[1];
16613 cmp_op1 = operands[2];
16614 }
16615
16616 if (is_sse)
16617 {
16618 if (GET_MODE (operands[0]) == SFmode)
16619 if (unordered_p)
16620 return "%vucomiss\t{%1, %0|%0, %1}";
16621 else
16622 return "%vcomiss\t{%1, %0|%0, %1}";
16623 else
16624 if (unordered_p)
16625 return "%vucomisd\t{%1, %0|%0, %1}";
16626 else
16627 return "%vcomisd\t{%1, %0|%0, %1}";
16628 }
16629
16630 gcc_assert (STACK_TOP_P (cmp_op0));
16631
16632 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16633
16634 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16635 {
16636 if (stack_top_dies)
16637 {
16638 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16639 return output_387_ffreep (operands, 1);
16640 }
16641 else
16642 return "ftst\n\tfnstsw\t%0";
16643 }
16644
16645 if (STACK_REG_P (cmp_op1)
16646 && stack_top_dies
16647 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16648 && REGNO (cmp_op1) != FIRST_STACK_REG)
16649 {
16650 /* If both the top of the 387 stack dies, and the other operand
16651 is also a stack register that dies, then this must be a
16652 `fcompp' float compare */
16653
16654 if (eflags_p)
16655 {
16656 /* There is no double popping fcomi variant. Fortunately,
16657 eflags is immune from the fstp's cc clobbering. */
16658 if (unordered_p)
16659 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16660 else
16661 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16662 return output_387_ffreep (operands, 0);
16663 }
16664 else
16665 {
16666 if (unordered_p)
16667 return "fucompp\n\tfnstsw\t%0";
16668 else
16669 return "fcompp\n\tfnstsw\t%0";
16670 }
16671 }
16672 else
16673 {
16674 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16675
16676 static const char * const alt[16] =
16677 {
16678 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16679 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16680 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16681 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16682
16683 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16684 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16685 NULL,
16686 NULL,
16687
16688 "fcomi\t{%y1, %0|%0, %y1}",
16689 "fcomip\t{%y1, %0|%0, %y1}",
16690 "fucomi\t{%y1, %0|%0, %y1}",
16691 "fucomip\t{%y1, %0|%0, %y1}",
16692
16693 NULL,
16694 NULL,
16695 NULL,
16696 NULL
16697 };
16698
16699 int mask;
16700 const char *ret;
16701
16702 mask = eflags_p << 3;
16703 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16704 mask |= unordered_p << 1;
16705 mask |= stack_top_dies;
16706
16707 gcc_assert (mask < 16);
16708 ret = alt[mask];
16709 gcc_assert (ret);
16710
16711 return ret;
16712 }
16713 }
16714
16715 void
16716 ix86_output_addr_vec_elt (FILE *file, int value)
16717 {
16718 const char *directive = ASM_LONG;
16719
16720 #ifdef ASM_QUAD
16721 if (TARGET_LP64)
16722 directive = ASM_QUAD;
16723 #else
16724 gcc_assert (!TARGET_64BIT);
16725 #endif
16726
16727 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16728 }
16729
16730 void
16731 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16732 {
16733 const char *directive = ASM_LONG;
16734
16735 #ifdef ASM_QUAD
16736 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16737 directive = ASM_QUAD;
16738 #else
16739 gcc_assert (!TARGET_64BIT);
16740 #endif
16741 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16742 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16743 fprintf (file, "%s%s%d-%s%d\n",
16744 directive, LPREFIX, value, LPREFIX, rel);
16745 else if (HAVE_AS_GOTOFF_IN_DATA)
16746 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16747 #if TARGET_MACHO
16748 else if (TARGET_MACHO)
16749 {
16750 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16751 machopic_output_function_base_name (file);
16752 putc ('\n', file);
16753 }
16754 #endif
16755 else
16756 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16757 GOT_SYMBOL_NAME, LPREFIX, value);
16758 }
16759 \f
16760 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16761 for the target. */
16762
16763 void
16764 ix86_expand_clear (rtx dest)
16765 {
16766 rtx tmp;
16767
16768 /* We play register width games, which are only valid after reload. */
16769 gcc_assert (reload_completed);
16770
16771 /* Avoid HImode and its attendant prefix byte. */
16772 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16773 dest = gen_rtx_REG (SImode, REGNO (dest));
16774 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16775
16776 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16777 {
16778 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16779 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16780 }
16781
16782 emit_insn (tmp);
16783 }
16784
16785 /* X is an unchanging MEM. If it is a constant pool reference, return
16786 the constant pool rtx, else NULL. */
16787
16788 rtx
16789 maybe_get_pool_constant (rtx x)
16790 {
16791 x = ix86_delegitimize_address (XEXP (x, 0));
16792
16793 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16794 return get_pool_constant (x);
16795
16796 return NULL_RTX;
16797 }
16798
16799 void
16800 ix86_expand_move (enum machine_mode mode, rtx operands[])
16801 {
16802 rtx op0, op1;
16803 enum tls_model model;
16804
16805 op0 = operands[0];
16806 op1 = operands[1];
16807
16808 if (GET_CODE (op1) == SYMBOL_REF)
16809 {
16810 rtx tmp;
16811
16812 model = SYMBOL_REF_TLS_MODEL (op1);
16813 if (model)
16814 {
16815 op1 = legitimize_tls_address (op1, model, true);
16816 op1 = force_operand (op1, op0);
16817 if (op1 == op0)
16818 return;
16819 op1 = convert_to_mode (mode, op1, 1);
16820 }
16821 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16822 op1 = tmp;
16823 }
16824 else if (GET_CODE (op1) == CONST
16825 && GET_CODE (XEXP (op1, 0)) == PLUS
16826 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16827 {
16828 rtx addend = XEXP (XEXP (op1, 0), 1);
16829 rtx symbol = XEXP (XEXP (op1, 0), 0);
16830 rtx tmp;
16831
16832 model = SYMBOL_REF_TLS_MODEL (symbol);
16833 if (model)
16834 tmp = legitimize_tls_address (symbol, model, true);
16835 else
16836 tmp = legitimize_pe_coff_symbol (symbol, true);
16837
16838 if (tmp)
16839 {
16840 tmp = force_operand (tmp, NULL);
16841 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16842 op0, 1, OPTAB_DIRECT);
16843 if (tmp == op0)
16844 return;
16845 op1 = convert_to_mode (mode, tmp, 1);
16846 }
16847 }
16848
16849 if ((flag_pic || MACHOPIC_INDIRECT)
16850 && symbolic_operand (op1, mode))
16851 {
16852 if (TARGET_MACHO && !TARGET_64BIT)
16853 {
16854 #if TARGET_MACHO
16855 /* dynamic-no-pic */
16856 if (MACHOPIC_INDIRECT)
16857 {
16858 rtx temp = ((reload_in_progress
16859 || ((op0 && REG_P (op0))
16860 && mode == Pmode))
16861 ? op0 : gen_reg_rtx (Pmode));
16862 op1 = machopic_indirect_data_reference (op1, temp);
16863 if (MACHOPIC_PURE)
16864 op1 = machopic_legitimize_pic_address (op1, mode,
16865 temp == op1 ? 0 : temp);
16866 }
16867 if (op0 != op1 && GET_CODE (op0) != MEM)
16868 {
16869 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16870 emit_insn (insn);
16871 return;
16872 }
16873 if (GET_CODE (op0) == MEM)
16874 op1 = force_reg (Pmode, op1);
16875 else
16876 {
16877 rtx temp = op0;
16878 if (GET_CODE (temp) != REG)
16879 temp = gen_reg_rtx (Pmode);
16880 temp = legitimize_pic_address (op1, temp);
16881 if (temp == op0)
16882 return;
16883 op1 = temp;
16884 }
16885 /* dynamic-no-pic */
16886 #endif
16887 }
16888 else
16889 {
16890 if (MEM_P (op0))
16891 op1 = force_reg (mode, op1);
16892 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16893 {
16894 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16895 op1 = legitimize_pic_address (op1, reg);
16896 if (op0 == op1)
16897 return;
16898 op1 = convert_to_mode (mode, op1, 1);
16899 }
16900 }
16901 }
16902 else
16903 {
16904 if (MEM_P (op0)
16905 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16906 || !push_operand (op0, mode))
16907 && MEM_P (op1))
16908 op1 = force_reg (mode, op1);
16909
16910 if (push_operand (op0, mode)
16911 && ! general_no_elim_operand (op1, mode))
16912 op1 = copy_to_mode_reg (mode, op1);
16913
16914 /* Force large constants in 64bit compilation into register
16915 to get them CSEed. */
16916 if (can_create_pseudo_p ()
16917 && (mode == DImode) && TARGET_64BIT
16918 && immediate_operand (op1, mode)
16919 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16920 && !register_operand (op0, mode)
16921 && optimize)
16922 op1 = copy_to_mode_reg (mode, op1);
16923
16924 if (can_create_pseudo_p ()
16925 && FLOAT_MODE_P (mode)
16926 && GET_CODE (op1) == CONST_DOUBLE)
16927 {
16928 /* If we are loading a floating point constant to a register,
16929 force the value to memory now, since we'll get better code
16930 out the back end. */
16931
16932 op1 = validize_mem (force_const_mem (mode, op1));
16933 if (!register_operand (op0, mode))
16934 {
16935 rtx temp = gen_reg_rtx (mode);
16936 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16937 emit_move_insn (op0, temp);
16938 return;
16939 }
16940 }
16941 }
16942
16943 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16944 }
16945
16946 void
16947 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16948 {
16949 rtx op0 = operands[0], op1 = operands[1];
16950 unsigned int align = GET_MODE_ALIGNMENT (mode);
16951
16952 if (push_operand (op0, VOIDmode))
16953 op0 = emit_move_resolve_push (mode, op0);
16954
16955 /* Force constants other than zero into memory. We do not know how
16956 the instructions used to build constants modify the upper 64 bits
16957 of the register, once we have that information we may be able
16958 to handle some of them more efficiently. */
16959 if (can_create_pseudo_p ()
16960 && register_operand (op0, mode)
16961 && (CONSTANT_P (op1)
16962 || (GET_CODE (op1) == SUBREG
16963 && CONSTANT_P (SUBREG_REG (op1))))
16964 && !standard_sse_constant_p (op1))
16965 op1 = validize_mem (force_const_mem (mode, op1));
16966
16967 /* We need to check memory alignment for SSE mode since attribute
16968 can make operands unaligned. */
16969 if (can_create_pseudo_p ()
16970 && SSE_REG_MODE_P (mode)
16971 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16972 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16973 {
16974 rtx tmp[2];
16975
16976 /* ix86_expand_vector_move_misalign() does not like constants ... */
16977 if (CONSTANT_P (op1)
16978 || (GET_CODE (op1) == SUBREG
16979 && CONSTANT_P (SUBREG_REG (op1))))
16980 op1 = validize_mem (force_const_mem (mode, op1));
16981
16982 /* ... nor both arguments in memory. */
16983 if (!register_operand (op0, mode)
16984 && !register_operand (op1, mode))
16985 op1 = force_reg (mode, op1);
16986
16987 tmp[0] = op0; tmp[1] = op1;
16988 ix86_expand_vector_move_misalign (mode, tmp);
16989 return;
16990 }
16991
16992 /* Make operand1 a register if it isn't already. */
16993 if (can_create_pseudo_p ()
16994 && !register_operand (op0, mode)
16995 && !register_operand (op1, mode))
16996 {
16997 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16998 return;
16999 }
17000
17001 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17002 }
17003
17004 /* Split 32-byte AVX unaligned load and store if needed. */
17005
17006 static void
17007 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17008 {
17009 rtx m;
17010 rtx (*extract) (rtx, rtx, rtx);
17011 rtx (*load_unaligned) (rtx, rtx);
17012 rtx (*store_unaligned) (rtx, rtx);
17013 enum machine_mode mode;
17014
17015 switch (GET_MODE (op0))
17016 {
17017 default:
17018 gcc_unreachable ();
17019 case V32QImode:
17020 extract = gen_avx_vextractf128v32qi;
17021 load_unaligned = gen_avx_loaddquv32qi;
17022 store_unaligned = gen_avx_storedquv32qi;
17023 mode = V16QImode;
17024 break;
17025 case V8SFmode:
17026 extract = gen_avx_vextractf128v8sf;
17027 load_unaligned = gen_avx_loadups256;
17028 store_unaligned = gen_avx_storeups256;
17029 mode = V4SFmode;
17030 break;
17031 case V4DFmode:
17032 extract = gen_avx_vextractf128v4df;
17033 load_unaligned = gen_avx_loadupd256;
17034 store_unaligned = gen_avx_storeupd256;
17035 mode = V2DFmode;
17036 break;
17037 }
17038
17039 if (MEM_P (op1))
17040 {
17041 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17042 {
17043 rtx r = gen_reg_rtx (mode);
17044 m = adjust_address (op1, mode, 0);
17045 emit_move_insn (r, m);
17046 m = adjust_address (op1, mode, 16);
17047 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17048 emit_move_insn (op0, r);
17049 }
17050 /* Normal *mov<mode>_internal pattern will handle
17051 unaligned loads just fine if misaligned_operand
17052 is true, and without the UNSPEC it can be combined
17053 with arithmetic instructions. */
17054 else if (misaligned_operand (op1, GET_MODE (op1)))
17055 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17056 else
17057 emit_insn (load_unaligned (op0, op1));
17058 }
17059 else if (MEM_P (op0))
17060 {
17061 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17062 {
17063 m = adjust_address (op0, mode, 0);
17064 emit_insn (extract (m, op1, const0_rtx));
17065 m = adjust_address (op0, mode, 16);
17066 emit_insn (extract (m, op1, const1_rtx));
17067 }
17068 else
17069 emit_insn (store_unaligned (op0, op1));
17070 }
17071 else
17072 gcc_unreachable ();
17073 }
17074
17075 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17076 straight to ix86_expand_vector_move. */
17077 /* Code generation for scalar reg-reg moves of single and double precision data:
17078 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17079 movaps reg, reg
17080 else
17081 movss reg, reg
17082 if (x86_sse_partial_reg_dependency == true)
17083 movapd reg, reg
17084 else
17085 movsd reg, reg
17086
17087 Code generation for scalar loads of double precision data:
17088 if (x86_sse_split_regs == true)
17089 movlpd mem, reg (gas syntax)
17090 else
17091 movsd mem, reg
17092
17093 Code generation for unaligned packed loads of single precision data
17094 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17095 if (x86_sse_unaligned_move_optimal)
17096 movups mem, reg
17097
17098 if (x86_sse_partial_reg_dependency == true)
17099 {
17100 xorps reg, reg
17101 movlps mem, reg
17102 movhps mem+8, reg
17103 }
17104 else
17105 {
17106 movlps mem, reg
17107 movhps mem+8, reg
17108 }
17109
17110 Code generation for unaligned packed loads of double precision data
17111 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17112 if (x86_sse_unaligned_move_optimal)
17113 movupd mem, reg
17114
17115 if (x86_sse_split_regs == true)
17116 {
17117 movlpd mem, reg
17118 movhpd mem+8, reg
17119 }
17120 else
17121 {
17122 movsd mem, reg
17123 movhpd mem+8, reg
17124 }
17125 */
17126
17127 void
17128 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17129 {
17130 rtx op0, op1, orig_op0 = NULL_RTX, m;
17131 rtx (*load_unaligned) (rtx, rtx);
17132 rtx (*store_unaligned) (rtx, rtx);
17133
17134 op0 = operands[0];
17135 op1 = operands[1];
17136
17137 if (GET_MODE_SIZE (mode) == 64)
17138 {
17139 switch (GET_MODE_CLASS (mode))
17140 {
17141 case MODE_VECTOR_INT:
17142 case MODE_INT:
17143 if (GET_MODE (op0) != V16SImode)
17144 {
17145 if (!MEM_P (op0))
17146 {
17147 orig_op0 = op0;
17148 op0 = gen_reg_rtx (V16SImode);
17149 }
17150 else
17151 op0 = gen_lowpart (V16SImode, op0);
17152 }
17153 op1 = gen_lowpart (V16SImode, op1);
17154 /* FALLTHRU */
17155
17156 case MODE_VECTOR_FLOAT:
17157 switch (GET_MODE (op0))
17158 {
17159 default:
17160 gcc_unreachable ();
17161 case V16SImode:
17162 load_unaligned = gen_avx512f_loaddquv16si;
17163 store_unaligned = gen_avx512f_storedquv16si;
17164 break;
17165 case V16SFmode:
17166 load_unaligned = gen_avx512f_loadups512;
17167 store_unaligned = gen_avx512f_storeups512;
17168 break;
17169 case V8DFmode:
17170 load_unaligned = gen_avx512f_loadupd512;
17171 store_unaligned = gen_avx512f_storeupd512;
17172 break;
17173 }
17174
17175 if (MEM_P (op1))
17176 emit_insn (load_unaligned (op0, op1));
17177 else if (MEM_P (op0))
17178 emit_insn (store_unaligned (op0, op1));
17179 else
17180 gcc_unreachable ();
17181 if (orig_op0)
17182 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17183 break;
17184
17185 default:
17186 gcc_unreachable ();
17187 }
17188
17189 return;
17190 }
17191
17192 if (TARGET_AVX
17193 && GET_MODE_SIZE (mode) == 32)
17194 {
17195 switch (GET_MODE_CLASS (mode))
17196 {
17197 case MODE_VECTOR_INT:
17198 case MODE_INT:
17199 if (GET_MODE (op0) != V32QImode)
17200 {
17201 if (!MEM_P (op0))
17202 {
17203 orig_op0 = op0;
17204 op0 = gen_reg_rtx (V32QImode);
17205 }
17206 else
17207 op0 = gen_lowpart (V32QImode, op0);
17208 }
17209 op1 = gen_lowpart (V32QImode, op1);
17210 /* FALLTHRU */
17211
17212 case MODE_VECTOR_FLOAT:
17213 ix86_avx256_split_vector_move_misalign (op0, op1);
17214 if (orig_op0)
17215 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17216 break;
17217
17218 default:
17219 gcc_unreachable ();
17220 }
17221
17222 return;
17223 }
17224
17225 if (MEM_P (op1))
17226 {
17227 /* Normal *mov<mode>_internal pattern will handle
17228 unaligned loads just fine if misaligned_operand
17229 is true, and without the UNSPEC it can be combined
17230 with arithmetic instructions. */
17231 if (TARGET_AVX
17232 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17233 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17234 && misaligned_operand (op1, GET_MODE (op1)))
17235 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17236 /* ??? If we have typed data, then it would appear that using
17237 movdqu is the only way to get unaligned data loaded with
17238 integer type. */
17239 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17240 {
17241 if (GET_MODE (op0) != V16QImode)
17242 {
17243 orig_op0 = op0;
17244 op0 = gen_reg_rtx (V16QImode);
17245 }
17246 op1 = gen_lowpart (V16QImode, op1);
17247 /* We will eventually emit movups based on insn attributes. */
17248 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17249 if (orig_op0)
17250 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17251 }
17252 else if (TARGET_SSE2 && mode == V2DFmode)
17253 {
17254 rtx zero;
17255
17256 if (TARGET_AVX
17257 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17258 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17259 || optimize_insn_for_size_p ())
17260 {
17261 /* We will eventually emit movups based on insn attributes. */
17262 emit_insn (gen_sse2_loadupd (op0, op1));
17263 return;
17264 }
17265
17266 /* When SSE registers are split into halves, we can avoid
17267 writing to the top half twice. */
17268 if (TARGET_SSE_SPLIT_REGS)
17269 {
17270 emit_clobber (op0);
17271 zero = op0;
17272 }
17273 else
17274 {
17275 /* ??? Not sure about the best option for the Intel chips.
17276 The following would seem to satisfy; the register is
17277 entirely cleared, breaking the dependency chain. We
17278 then store to the upper half, with a dependency depth
17279 of one. A rumor has it that Intel recommends two movsd
17280 followed by an unpacklpd, but this is unconfirmed. And
17281 given that the dependency depth of the unpacklpd would
17282 still be one, I'm not sure why this would be better. */
17283 zero = CONST0_RTX (V2DFmode);
17284 }
17285
17286 m = adjust_address (op1, DFmode, 0);
17287 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17288 m = adjust_address (op1, DFmode, 8);
17289 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17290 }
17291 else
17292 {
17293 rtx t;
17294
17295 if (TARGET_AVX
17296 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17297 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17298 || optimize_insn_for_size_p ())
17299 {
17300 if (GET_MODE (op0) != V4SFmode)
17301 {
17302 orig_op0 = op0;
17303 op0 = gen_reg_rtx (V4SFmode);
17304 }
17305 op1 = gen_lowpart (V4SFmode, op1);
17306 emit_insn (gen_sse_loadups (op0, op1));
17307 if (orig_op0)
17308 emit_move_insn (orig_op0,
17309 gen_lowpart (GET_MODE (orig_op0), op0));
17310 return;
17311 }
17312
17313 if (mode != V4SFmode)
17314 t = gen_reg_rtx (V4SFmode);
17315 else
17316 t = op0;
17317
17318 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17319 emit_move_insn (t, CONST0_RTX (V4SFmode));
17320 else
17321 emit_clobber (t);
17322
17323 m = adjust_address (op1, V2SFmode, 0);
17324 emit_insn (gen_sse_loadlps (t, t, m));
17325 m = adjust_address (op1, V2SFmode, 8);
17326 emit_insn (gen_sse_loadhps (t, t, m));
17327 if (mode != V4SFmode)
17328 emit_move_insn (op0, gen_lowpart (mode, t));
17329 }
17330 }
17331 else if (MEM_P (op0))
17332 {
17333 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17334 {
17335 op0 = gen_lowpart (V16QImode, op0);
17336 op1 = gen_lowpart (V16QImode, op1);
17337 /* We will eventually emit movups based on insn attributes. */
17338 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17339 }
17340 else if (TARGET_SSE2 && mode == V2DFmode)
17341 {
17342 if (TARGET_AVX
17343 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17344 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17345 || optimize_insn_for_size_p ())
17346 /* We will eventually emit movups based on insn attributes. */
17347 emit_insn (gen_sse2_storeupd (op0, op1));
17348 else
17349 {
17350 m = adjust_address (op0, DFmode, 0);
17351 emit_insn (gen_sse2_storelpd (m, op1));
17352 m = adjust_address (op0, DFmode, 8);
17353 emit_insn (gen_sse2_storehpd (m, op1));
17354 }
17355 }
17356 else
17357 {
17358 if (mode != V4SFmode)
17359 op1 = gen_lowpart (V4SFmode, op1);
17360
17361 if (TARGET_AVX
17362 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17363 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17364 || optimize_insn_for_size_p ())
17365 {
17366 op0 = gen_lowpart (V4SFmode, op0);
17367 emit_insn (gen_sse_storeups (op0, op1));
17368 }
17369 else
17370 {
17371 m = adjust_address (op0, V2SFmode, 0);
17372 emit_insn (gen_sse_storelps (m, op1));
17373 m = adjust_address (op0, V2SFmode, 8);
17374 emit_insn (gen_sse_storehps (m, op1));
17375 }
17376 }
17377 }
17378 else
17379 gcc_unreachable ();
17380 }
17381
17382 /* Helper function of ix86_fixup_binary_operands to canonicalize
17383 operand order. Returns true if the operands should be swapped. */
17384
17385 static bool
17386 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17387 rtx operands[])
17388 {
17389 rtx dst = operands[0];
17390 rtx src1 = operands[1];
17391 rtx src2 = operands[2];
17392
17393 /* If the operation is not commutative, we can't do anything. */
17394 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17395 return false;
17396
17397 /* Highest priority is that src1 should match dst. */
17398 if (rtx_equal_p (dst, src1))
17399 return false;
17400 if (rtx_equal_p (dst, src2))
17401 return true;
17402
17403 /* Next highest priority is that immediate constants come second. */
17404 if (immediate_operand (src2, mode))
17405 return false;
17406 if (immediate_operand (src1, mode))
17407 return true;
17408
17409 /* Lowest priority is that memory references should come second. */
17410 if (MEM_P (src2))
17411 return false;
17412 if (MEM_P (src1))
17413 return true;
17414
17415 return false;
17416 }
17417
17418
17419 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17420 destination to use for the operation. If different from the true
17421 destination in operands[0], a copy operation will be required. */
17422
17423 rtx
17424 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17425 rtx operands[])
17426 {
17427 rtx dst = operands[0];
17428 rtx src1 = operands[1];
17429 rtx src2 = operands[2];
17430
17431 /* Canonicalize operand order. */
17432 if (ix86_swap_binary_operands_p (code, mode, operands))
17433 {
17434 rtx temp;
17435
17436 /* It is invalid to swap operands of different modes. */
17437 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17438
17439 temp = src1;
17440 src1 = src2;
17441 src2 = temp;
17442 }
17443
17444 /* Both source operands cannot be in memory. */
17445 if (MEM_P (src1) && MEM_P (src2))
17446 {
17447 /* Optimization: Only read from memory once. */
17448 if (rtx_equal_p (src1, src2))
17449 {
17450 src2 = force_reg (mode, src2);
17451 src1 = src2;
17452 }
17453 else if (rtx_equal_p (dst, src1))
17454 src2 = force_reg (mode, src2);
17455 else
17456 src1 = force_reg (mode, src1);
17457 }
17458
17459 /* If the destination is memory, and we do not have matching source
17460 operands, do things in registers. */
17461 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17462 dst = gen_reg_rtx (mode);
17463
17464 /* Source 1 cannot be a constant. */
17465 if (CONSTANT_P (src1))
17466 src1 = force_reg (mode, src1);
17467
17468 /* Source 1 cannot be a non-matching memory. */
17469 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17470 src1 = force_reg (mode, src1);
17471
17472 /* Improve address combine. */
17473 if (code == PLUS
17474 && GET_MODE_CLASS (mode) == MODE_INT
17475 && MEM_P (src2))
17476 src2 = force_reg (mode, src2);
17477
17478 operands[1] = src1;
17479 operands[2] = src2;
17480 return dst;
17481 }
17482
17483 /* Similarly, but assume that the destination has already been
17484 set up properly. */
17485
17486 void
17487 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17488 enum machine_mode mode, rtx operands[])
17489 {
17490 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17491 gcc_assert (dst == operands[0]);
17492 }
17493
17494 /* Attempt to expand a binary operator. Make the expansion closer to the
17495 actual machine, then just general_operand, which will allow 3 separate
17496 memory references (one output, two input) in a single insn. */
17497
17498 void
17499 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17500 rtx operands[])
17501 {
17502 rtx src1, src2, dst, op, clob;
17503
17504 dst = ix86_fixup_binary_operands (code, mode, operands);
17505 src1 = operands[1];
17506 src2 = operands[2];
17507
17508 /* Emit the instruction. */
17509
17510 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17511 if (reload_in_progress)
17512 {
17513 /* Reload doesn't know about the flags register, and doesn't know that
17514 it doesn't want to clobber it. We can only do this with PLUS. */
17515 gcc_assert (code == PLUS);
17516 emit_insn (op);
17517 }
17518 else if (reload_completed
17519 && code == PLUS
17520 && !rtx_equal_p (dst, src1))
17521 {
17522 /* This is going to be an LEA; avoid splitting it later. */
17523 emit_insn (op);
17524 }
17525 else
17526 {
17527 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17528 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17529 }
17530
17531 /* Fix up the destination if needed. */
17532 if (dst != operands[0])
17533 emit_move_insn (operands[0], dst);
17534 }
17535
17536 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17537 the given OPERANDS. */
17538
17539 void
17540 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17541 rtx operands[])
17542 {
17543 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17544 if (GET_CODE (operands[1]) == SUBREG)
17545 {
17546 op1 = operands[1];
17547 op2 = operands[2];
17548 }
17549 else if (GET_CODE (operands[2]) == SUBREG)
17550 {
17551 op1 = operands[2];
17552 op2 = operands[1];
17553 }
17554 /* Optimize (__m128i) d | (__m128i) e and similar code
17555 when d and e are float vectors into float vector logical
17556 insn. In C/C++ without using intrinsics there is no other way
17557 to express vector logical operation on float vectors than
17558 to cast them temporarily to integer vectors. */
17559 if (op1
17560 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17561 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17562 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17563 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17564 && SUBREG_BYTE (op1) == 0
17565 && (GET_CODE (op2) == CONST_VECTOR
17566 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17567 && SUBREG_BYTE (op2) == 0))
17568 && can_create_pseudo_p ())
17569 {
17570 rtx dst;
17571 switch (GET_MODE (SUBREG_REG (op1)))
17572 {
17573 case V4SFmode:
17574 case V8SFmode:
17575 case V16SFmode:
17576 case V2DFmode:
17577 case V4DFmode:
17578 case V8DFmode:
17579 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17580 if (GET_CODE (op2) == CONST_VECTOR)
17581 {
17582 op2 = gen_lowpart (GET_MODE (dst), op2);
17583 op2 = force_reg (GET_MODE (dst), op2);
17584 }
17585 else
17586 {
17587 op1 = operands[1];
17588 op2 = SUBREG_REG (operands[2]);
17589 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17590 op2 = force_reg (GET_MODE (dst), op2);
17591 }
17592 op1 = SUBREG_REG (op1);
17593 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17594 op1 = force_reg (GET_MODE (dst), op1);
17595 emit_insn (gen_rtx_SET (VOIDmode, dst,
17596 gen_rtx_fmt_ee (code, GET_MODE (dst),
17597 op1, op2)));
17598 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17599 return;
17600 default:
17601 break;
17602 }
17603 }
17604 if (!nonimmediate_operand (operands[1], mode))
17605 operands[1] = force_reg (mode, operands[1]);
17606 if (!nonimmediate_operand (operands[2], mode))
17607 operands[2] = force_reg (mode, operands[2]);
17608 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17609 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17610 gen_rtx_fmt_ee (code, mode, operands[1],
17611 operands[2])));
17612 }
17613
17614 /* Return TRUE or FALSE depending on whether the binary operator meets the
17615 appropriate constraints. */
17616
17617 bool
17618 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17619 rtx operands[3])
17620 {
17621 rtx dst = operands[0];
17622 rtx src1 = operands[1];
17623 rtx src2 = operands[2];
17624
17625 /* Both source operands cannot be in memory. */
17626 if (MEM_P (src1) && MEM_P (src2))
17627 return false;
17628
17629 /* Canonicalize operand order for commutative operators. */
17630 if (ix86_swap_binary_operands_p (code, mode, operands))
17631 {
17632 rtx temp = src1;
17633 src1 = src2;
17634 src2 = temp;
17635 }
17636
17637 /* If the destination is memory, we must have a matching source operand. */
17638 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17639 return false;
17640
17641 /* Source 1 cannot be a constant. */
17642 if (CONSTANT_P (src1))
17643 return false;
17644
17645 /* Source 1 cannot be a non-matching memory. */
17646 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17647 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17648 return (code == AND
17649 && (mode == HImode
17650 || mode == SImode
17651 || (TARGET_64BIT && mode == DImode))
17652 && satisfies_constraint_L (src2));
17653
17654 return true;
17655 }
17656
17657 /* Attempt to expand a unary operator. Make the expansion closer to the
17658 actual machine, then just general_operand, which will allow 2 separate
17659 memory references (one output, one input) in a single insn. */
17660
17661 void
17662 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17663 rtx operands[])
17664 {
17665 int matching_memory;
17666 rtx src, dst, op, clob;
17667
17668 dst = operands[0];
17669 src = operands[1];
17670
17671 /* If the destination is memory, and we do not have matching source
17672 operands, do things in registers. */
17673 matching_memory = 0;
17674 if (MEM_P (dst))
17675 {
17676 if (rtx_equal_p (dst, src))
17677 matching_memory = 1;
17678 else
17679 dst = gen_reg_rtx (mode);
17680 }
17681
17682 /* When source operand is memory, destination must match. */
17683 if (MEM_P (src) && !matching_memory)
17684 src = force_reg (mode, src);
17685
17686 /* Emit the instruction. */
17687
17688 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17689 if (reload_in_progress || code == NOT)
17690 {
17691 /* Reload doesn't know about the flags register, and doesn't know that
17692 it doesn't want to clobber it. */
17693 gcc_assert (code == NOT);
17694 emit_insn (op);
17695 }
17696 else
17697 {
17698 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17699 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17700 }
17701
17702 /* Fix up the destination if needed. */
17703 if (dst != operands[0])
17704 emit_move_insn (operands[0], dst);
17705 }
17706
17707 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17708 divisor are within the range [0-255]. */
17709
17710 void
17711 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17712 bool signed_p)
17713 {
17714 rtx_code_label *end_label, *qimode_label;
17715 rtx insn, div, mod;
17716 rtx scratch, tmp0, tmp1, tmp2;
17717 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17718 rtx (*gen_zero_extend) (rtx, rtx);
17719 rtx (*gen_test_ccno_1) (rtx, rtx);
17720
17721 switch (mode)
17722 {
17723 case SImode:
17724 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17725 gen_test_ccno_1 = gen_testsi_ccno_1;
17726 gen_zero_extend = gen_zero_extendqisi2;
17727 break;
17728 case DImode:
17729 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17730 gen_test_ccno_1 = gen_testdi_ccno_1;
17731 gen_zero_extend = gen_zero_extendqidi2;
17732 break;
17733 default:
17734 gcc_unreachable ();
17735 }
17736
17737 end_label = gen_label_rtx ();
17738 qimode_label = gen_label_rtx ();
17739
17740 scratch = gen_reg_rtx (mode);
17741
17742 /* Use 8bit unsigned divimod if dividend and divisor are within
17743 the range [0-255]. */
17744 emit_move_insn (scratch, operands[2]);
17745 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17746 scratch, 1, OPTAB_DIRECT);
17747 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17748 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17749 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17750 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17751 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17752 pc_rtx);
17753 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17754 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17755 JUMP_LABEL (insn) = qimode_label;
17756
17757 /* Generate original signed/unsigned divimod. */
17758 div = gen_divmod4_1 (operands[0], operands[1],
17759 operands[2], operands[3]);
17760 emit_insn (div);
17761
17762 /* Branch to the end. */
17763 emit_jump_insn (gen_jump (end_label));
17764 emit_barrier ();
17765
17766 /* Generate 8bit unsigned divide. */
17767 emit_label (qimode_label);
17768 /* Don't use operands[0] for result of 8bit divide since not all
17769 registers support QImode ZERO_EXTRACT. */
17770 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17771 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17772 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17773 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17774
17775 if (signed_p)
17776 {
17777 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17778 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17779 }
17780 else
17781 {
17782 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17783 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17784 }
17785
17786 /* Extract remainder from AH. */
17787 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17788 if (REG_P (operands[1]))
17789 insn = emit_move_insn (operands[1], tmp1);
17790 else
17791 {
17792 /* Need a new scratch register since the old one has result
17793 of 8bit divide. */
17794 scratch = gen_reg_rtx (mode);
17795 emit_move_insn (scratch, tmp1);
17796 insn = emit_move_insn (operands[1], scratch);
17797 }
17798 set_unique_reg_note (insn, REG_EQUAL, mod);
17799
17800 /* Zero extend quotient from AL. */
17801 tmp1 = gen_lowpart (QImode, tmp0);
17802 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17803 set_unique_reg_note (insn, REG_EQUAL, div);
17804
17805 emit_label (end_label);
17806 }
17807
17808 #define LEA_MAX_STALL (3)
17809 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17810
17811 /* Increase given DISTANCE in half-cycles according to
17812 dependencies between PREV and NEXT instructions.
17813 Add 1 half-cycle if there is no dependency and
17814 go to next cycle if there is some dependecy. */
17815
17816 static unsigned int
17817 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17818 {
17819 df_ref def, use;
17820
17821 if (!prev || !next)
17822 return distance + (distance & 1) + 2;
17823
17824 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17825 return distance + 1;
17826
17827 FOR_EACH_INSN_USE (use, next)
17828 FOR_EACH_INSN_DEF (def, prev)
17829 if (!DF_REF_IS_ARTIFICIAL (def)
17830 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17831 return distance + (distance & 1) + 2;
17832
17833 return distance + 1;
17834 }
17835
17836 /* Function checks if instruction INSN defines register number
17837 REGNO1 or REGNO2. */
17838
17839 static bool
17840 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17841 rtx insn)
17842 {
17843 df_ref def;
17844
17845 FOR_EACH_INSN_DEF (def, insn)
17846 if (DF_REF_REG_DEF_P (def)
17847 && !DF_REF_IS_ARTIFICIAL (def)
17848 && (regno1 == DF_REF_REGNO (def)
17849 || regno2 == DF_REF_REGNO (def)))
17850 return true;
17851
17852 return false;
17853 }
17854
17855 /* Function checks if instruction INSN uses register number
17856 REGNO as a part of address expression. */
17857
17858 static bool
17859 insn_uses_reg_mem (unsigned int regno, rtx insn)
17860 {
17861 df_ref use;
17862
17863 FOR_EACH_INSN_USE (use, insn)
17864 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17865 return true;
17866
17867 return false;
17868 }
17869
17870 /* Search backward for non-agu definition of register number REGNO1
17871 or register number REGNO2 in basic block starting from instruction
17872 START up to head of basic block or instruction INSN.
17873
17874 Function puts true value into *FOUND var if definition was found
17875 and false otherwise.
17876
17877 Distance in half-cycles between START and found instruction or head
17878 of BB is added to DISTANCE and returned. */
17879
17880 static int
17881 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17882 rtx_insn *insn, int distance,
17883 rtx_insn *start, bool *found)
17884 {
17885 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17886 rtx_insn *prev = start;
17887 rtx_insn *next = NULL;
17888
17889 *found = false;
17890
17891 while (prev
17892 && prev != insn
17893 && distance < LEA_SEARCH_THRESHOLD)
17894 {
17895 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17896 {
17897 distance = increase_distance (prev, next, distance);
17898 if (insn_defines_reg (regno1, regno2, prev))
17899 {
17900 if (recog_memoized (prev) < 0
17901 || get_attr_type (prev) != TYPE_LEA)
17902 {
17903 *found = true;
17904 return distance;
17905 }
17906 }
17907
17908 next = prev;
17909 }
17910 if (prev == BB_HEAD (bb))
17911 break;
17912
17913 prev = PREV_INSN (prev);
17914 }
17915
17916 return distance;
17917 }
17918
17919 /* Search backward for non-agu definition of register number REGNO1
17920 or register number REGNO2 in INSN's basic block until
17921 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17922 2. Reach neighbour BBs boundary, or
17923 3. Reach agu definition.
17924 Returns the distance between the non-agu definition point and INSN.
17925 If no definition point, returns -1. */
17926
17927 static int
17928 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17929 rtx_insn *insn)
17930 {
17931 basic_block bb = BLOCK_FOR_INSN (insn);
17932 int distance = 0;
17933 bool found = false;
17934
17935 if (insn != BB_HEAD (bb))
17936 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17937 distance, PREV_INSN (insn),
17938 &found);
17939
17940 if (!found && distance < LEA_SEARCH_THRESHOLD)
17941 {
17942 edge e;
17943 edge_iterator ei;
17944 bool simple_loop = false;
17945
17946 FOR_EACH_EDGE (e, ei, bb->preds)
17947 if (e->src == bb)
17948 {
17949 simple_loop = true;
17950 break;
17951 }
17952
17953 if (simple_loop)
17954 distance = distance_non_agu_define_in_bb (regno1, regno2,
17955 insn, distance,
17956 BB_END (bb), &found);
17957 else
17958 {
17959 int shortest_dist = -1;
17960 bool found_in_bb = false;
17961
17962 FOR_EACH_EDGE (e, ei, bb->preds)
17963 {
17964 int bb_dist
17965 = distance_non_agu_define_in_bb (regno1, regno2,
17966 insn, distance,
17967 BB_END (e->src),
17968 &found_in_bb);
17969 if (found_in_bb)
17970 {
17971 if (shortest_dist < 0)
17972 shortest_dist = bb_dist;
17973 else if (bb_dist > 0)
17974 shortest_dist = MIN (bb_dist, shortest_dist);
17975
17976 found = true;
17977 }
17978 }
17979
17980 distance = shortest_dist;
17981 }
17982 }
17983
17984 /* get_attr_type may modify recog data. We want to make sure
17985 that recog data is valid for instruction INSN, on which
17986 distance_non_agu_define is called. INSN is unchanged here. */
17987 extract_insn_cached (insn);
17988
17989 if (!found)
17990 return -1;
17991
17992 return distance >> 1;
17993 }
17994
17995 /* Return the distance in half-cycles between INSN and the next
17996 insn that uses register number REGNO in memory address added
17997 to DISTANCE. Return -1 if REGNO0 is set.
17998
17999 Put true value into *FOUND if register usage was found and
18000 false otherwise.
18001 Put true value into *REDEFINED if register redefinition was
18002 found and false otherwise. */
18003
18004 static int
18005 distance_agu_use_in_bb (unsigned int regno,
18006 rtx_insn *insn, int distance, rtx_insn *start,
18007 bool *found, bool *redefined)
18008 {
18009 basic_block bb = NULL;
18010 rtx_insn *next = start;
18011 rtx_insn *prev = NULL;
18012
18013 *found = false;
18014 *redefined = false;
18015
18016 if (start != NULL_RTX)
18017 {
18018 bb = BLOCK_FOR_INSN (start);
18019 if (start != BB_HEAD (bb))
18020 /* If insn and start belong to the same bb, set prev to insn,
18021 so the call to increase_distance will increase the distance
18022 between insns by 1. */
18023 prev = insn;
18024 }
18025
18026 while (next
18027 && next != insn
18028 && distance < LEA_SEARCH_THRESHOLD)
18029 {
18030 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18031 {
18032 distance = increase_distance(prev, next, distance);
18033 if (insn_uses_reg_mem (regno, next))
18034 {
18035 /* Return DISTANCE if OP0 is used in memory
18036 address in NEXT. */
18037 *found = true;
18038 return distance;
18039 }
18040
18041 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18042 {
18043 /* Return -1 if OP0 is set in NEXT. */
18044 *redefined = true;
18045 return -1;
18046 }
18047
18048 prev = next;
18049 }
18050
18051 if (next == BB_END (bb))
18052 break;
18053
18054 next = NEXT_INSN (next);
18055 }
18056
18057 return distance;
18058 }
18059
18060 /* Return the distance between INSN and the next insn that uses
18061 register number REGNO0 in memory address. Return -1 if no such
18062 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18063
18064 static int
18065 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18066 {
18067 basic_block bb = BLOCK_FOR_INSN (insn);
18068 int distance = 0;
18069 bool found = false;
18070 bool redefined = false;
18071
18072 if (insn != BB_END (bb))
18073 distance = distance_agu_use_in_bb (regno0, insn, distance,
18074 NEXT_INSN (insn),
18075 &found, &redefined);
18076
18077 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18078 {
18079 edge e;
18080 edge_iterator ei;
18081 bool simple_loop = false;
18082
18083 FOR_EACH_EDGE (e, ei, bb->succs)
18084 if (e->dest == bb)
18085 {
18086 simple_loop = true;
18087 break;
18088 }
18089
18090 if (simple_loop)
18091 distance = distance_agu_use_in_bb (regno0, insn,
18092 distance, BB_HEAD (bb),
18093 &found, &redefined);
18094 else
18095 {
18096 int shortest_dist = -1;
18097 bool found_in_bb = false;
18098 bool redefined_in_bb = false;
18099
18100 FOR_EACH_EDGE (e, ei, bb->succs)
18101 {
18102 int bb_dist
18103 = distance_agu_use_in_bb (regno0, insn,
18104 distance, BB_HEAD (e->dest),
18105 &found_in_bb, &redefined_in_bb);
18106 if (found_in_bb)
18107 {
18108 if (shortest_dist < 0)
18109 shortest_dist = bb_dist;
18110 else if (bb_dist > 0)
18111 shortest_dist = MIN (bb_dist, shortest_dist);
18112
18113 found = true;
18114 }
18115 }
18116
18117 distance = shortest_dist;
18118 }
18119 }
18120
18121 if (!found || redefined)
18122 return -1;
18123
18124 return distance >> 1;
18125 }
18126
18127 /* Define this macro to tune LEA priority vs ADD, it take effect when
18128 there is a dilemma of choicing LEA or ADD
18129 Negative value: ADD is more preferred than LEA
18130 Zero: Netrual
18131 Positive value: LEA is more preferred than ADD*/
18132 #define IX86_LEA_PRIORITY 0
18133
18134 /* Return true if usage of lea INSN has performance advantage
18135 over a sequence of instructions. Instructions sequence has
18136 SPLIT_COST cycles higher latency than lea latency. */
18137
18138 static bool
18139 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18140 unsigned int regno2, int split_cost, bool has_scale)
18141 {
18142 int dist_define, dist_use;
18143
18144 /* For Silvermont if using a 2-source or 3-source LEA for
18145 non-destructive destination purposes, or due to wanting
18146 ability to use SCALE, the use of LEA is justified. */
18147 if (TARGET_SILVERMONT || TARGET_INTEL)
18148 {
18149 if (has_scale)
18150 return true;
18151 if (split_cost < 1)
18152 return false;
18153 if (regno0 == regno1 || regno0 == regno2)
18154 return false;
18155 return true;
18156 }
18157
18158 dist_define = distance_non_agu_define (regno1, regno2, insn);
18159 dist_use = distance_agu_use (regno0, insn);
18160
18161 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18162 {
18163 /* If there is no non AGU operand definition, no AGU
18164 operand usage and split cost is 0 then both lea
18165 and non lea variants have same priority. Currently
18166 we prefer lea for 64 bit code and non lea on 32 bit
18167 code. */
18168 if (dist_use < 0 && split_cost == 0)
18169 return TARGET_64BIT || IX86_LEA_PRIORITY;
18170 else
18171 return true;
18172 }
18173
18174 /* With longer definitions distance lea is more preferable.
18175 Here we change it to take into account splitting cost and
18176 lea priority. */
18177 dist_define += split_cost + IX86_LEA_PRIORITY;
18178
18179 /* If there is no use in memory addess then we just check
18180 that split cost exceeds AGU stall. */
18181 if (dist_use < 0)
18182 return dist_define > LEA_MAX_STALL;
18183
18184 /* If this insn has both backward non-agu dependence and forward
18185 agu dependence, the one with short distance takes effect. */
18186 return dist_define >= dist_use;
18187 }
18188
18189 /* Return true if it is legal to clobber flags by INSN and
18190 false otherwise. */
18191
18192 static bool
18193 ix86_ok_to_clobber_flags (rtx_insn *insn)
18194 {
18195 basic_block bb = BLOCK_FOR_INSN (insn);
18196 df_ref use;
18197 bitmap live;
18198
18199 while (insn)
18200 {
18201 if (NONDEBUG_INSN_P (insn))
18202 {
18203 FOR_EACH_INSN_USE (use, insn)
18204 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18205 return false;
18206
18207 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18208 return true;
18209 }
18210
18211 if (insn == BB_END (bb))
18212 break;
18213
18214 insn = NEXT_INSN (insn);
18215 }
18216
18217 live = df_get_live_out(bb);
18218 return !REGNO_REG_SET_P (live, FLAGS_REG);
18219 }
18220
18221 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18222 move and add to avoid AGU stalls. */
18223
18224 bool
18225 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18226 {
18227 unsigned int regno0, regno1, regno2;
18228
18229 /* Check if we need to optimize. */
18230 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18231 return false;
18232
18233 /* Check it is correct to split here. */
18234 if (!ix86_ok_to_clobber_flags(insn))
18235 return false;
18236
18237 regno0 = true_regnum (operands[0]);
18238 regno1 = true_regnum (operands[1]);
18239 regno2 = true_regnum (operands[2]);
18240
18241 /* We need to split only adds with non destructive
18242 destination operand. */
18243 if (regno0 == regno1 || regno0 == regno2)
18244 return false;
18245 else
18246 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18247 }
18248
18249 /* Return true if we should emit lea instruction instead of mov
18250 instruction. */
18251
18252 bool
18253 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18254 {
18255 unsigned int regno0, regno1;
18256
18257 /* Check if we need to optimize. */
18258 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18259 return false;
18260
18261 /* Use lea for reg to reg moves only. */
18262 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18263 return false;
18264
18265 regno0 = true_regnum (operands[0]);
18266 regno1 = true_regnum (operands[1]);
18267
18268 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18269 }
18270
18271 /* Return true if we need to split lea into a sequence of
18272 instructions to avoid AGU stalls. */
18273
18274 bool
18275 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18276 {
18277 unsigned int regno0, regno1, regno2;
18278 int split_cost;
18279 struct ix86_address parts;
18280 int ok;
18281
18282 /* Check we need to optimize. */
18283 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18284 return false;
18285
18286 /* The "at least two components" test below might not catch simple
18287 move or zero extension insns if parts.base is non-NULL and parts.disp
18288 is const0_rtx as the only components in the address, e.g. if the
18289 register is %rbp or %r13. As this test is much cheaper and moves or
18290 zero extensions are the common case, do this check first. */
18291 if (REG_P (operands[1])
18292 || (SImode_address_operand (operands[1], VOIDmode)
18293 && REG_P (XEXP (operands[1], 0))))
18294 return false;
18295
18296 /* Check if it is OK to split here. */
18297 if (!ix86_ok_to_clobber_flags (insn))
18298 return false;
18299
18300 ok = ix86_decompose_address (operands[1], &parts);
18301 gcc_assert (ok);
18302
18303 /* There should be at least two components in the address. */
18304 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18305 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18306 return false;
18307
18308 /* We should not split into add if non legitimate pic
18309 operand is used as displacement. */
18310 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18311 return false;
18312
18313 regno0 = true_regnum (operands[0]) ;
18314 regno1 = INVALID_REGNUM;
18315 regno2 = INVALID_REGNUM;
18316
18317 if (parts.base)
18318 regno1 = true_regnum (parts.base);
18319 if (parts.index)
18320 regno2 = true_regnum (parts.index);
18321
18322 split_cost = 0;
18323
18324 /* Compute how many cycles we will add to execution time
18325 if split lea into a sequence of instructions. */
18326 if (parts.base || parts.index)
18327 {
18328 /* Have to use mov instruction if non desctructive
18329 destination form is used. */
18330 if (regno1 != regno0 && regno2 != regno0)
18331 split_cost += 1;
18332
18333 /* Have to add index to base if both exist. */
18334 if (parts.base && parts.index)
18335 split_cost += 1;
18336
18337 /* Have to use shift and adds if scale is 2 or greater. */
18338 if (parts.scale > 1)
18339 {
18340 if (regno0 != regno1)
18341 split_cost += 1;
18342 else if (regno2 == regno0)
18343 split_cost += 4;
18344 else
18345 split_cost += parts.scale;
18346 }
18347
18348 /* Have to use add instruction with immediate if
18349 disp is non zero. */
18350 if (parts.disp && parts.disp != const0_rtx)
18351 split_cost += 1;
18352
18353 /* Subtract the price of lea. */
18354 split_cost -= 1;
18355 }
18356
18357 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18358 parts.scale > 1);
18359 }
18360
18361 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18362 matches destination. RTX includes clobber of FLAGS_REG. */
18363
18364 static void
18365 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18366 rtx dst, rtx src)
18367 {
18368 rtx op, clob;
18369
18370 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18371 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18372
18373 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18374 }
18375
18376 /* Return true if regno1 def is nearest to the insn. */
18377
18378 static bool
18379 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18380 {
18381 rtx_insn *prev = insn;
18382 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18383
18384 if (insn == start)
18385 return false;
18386 while (prev && prev != start)
18387 {
18388 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18389 {
18390 prev = PREV_INSN (prev);
18391 continue;
18392 }
18393 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18394 return true;
18395 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18396 return false;
18397 prev = PREV_INSN (prev);
18398 }
18399
18400 /* None of the regs is defined in the bb. */
18401 return false;
18402 }
18403
18404 /* Split lea instructions into a sequence of instructions
18405 which are executed on ALU to avoid AGU stalls.
18406 It is assumed that it is allowed to clobber flags register
18407 at lea position. */
18408
18409 void
18410 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18411 {
18412 unsigned int regno0, regno1, regno2;
18413 struct ix86_address parts;
18414 rtx target, tmp;
18415 int ok, adds;
18416
18417 ok = ix86_decompose_address (operands[1], &parts);
18418 gcc_assert (ok);
18419
18420 target = gen_lowpart (mode, operands[0]);
18421
18422 regno0 = true_regnum (target);
18423 regno1 = INVALID_REGNUM;
18424 regno2 = INVALID_REGNUM;
18425
18426 if (parts.base)
18427 {
18428 parts.base = gen_lowpart (mode, parts.base);
18429 regno1 = true_regnum (parts.base);
18430 }
18431
18432 if (parts.index)
18433 {
18434 parts.index = gen_lowpart (mode, parts.index);
18435 regno2 = true_regnum (parts.index);
18436 }
18437
18438 if (parts.disp)
18439 parts.disp = gen_lowpart (mode, parts.disp);
18440
18441 if (parts.scale > 1)
18442 {
18443 /* Case r1 = r1 + ... */
18444 if (regno1 == regno0)
18445 {
18446 /* If we have a case r1 = r1 + C * r2 then we
18447 should use multiplication which is very
18448 expensive. Assume cost model is wrong if we
18449 have such case here. */
18450 gcc_assert (regno2 != regno0);
18451
18452 for (adds = parts.scale; adds > 0; adds--)
18453 ix86_emit_binop (PLUS, mode, target, parts.index);
18454 }
18455 else
18456 {
18457 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18458 if (regno0 != regno2)
18459 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18460
18461 /* Use shift for scaling. */
18462 ix86_emit_binop (ASHIFT, mode, target,
18463 GEN_INT (exact_log2 (parts.scale)));
18464
18465 if (parts.base)
18466 ix86_emit_binop (PLUS, mode, target, parts.base);
18467
18468 if (parts.disp && parts.disp != const0_rtx)
18469 ix86_emit_binop (PLUS, mode, target, parts.disp);
18470 }
18471 }
18472 else if (!parts.base && !parts.index)
18473 {
18474 gcc_assert(parts.disp);
18475 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18476 }
18477 else
18478 {
18479 if (!parts.base)
18480 {
18481 if (regno0 != regno2)
18482 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18483 }
18484 else if (!parts.index)
18485 {
18486 if (regno0 != regno1)
18487 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18488 }
18489 else
18490 {
18491 if (regno0 == regno1)
18492 tmp = parts.index;
18493 else if (regno0 == regno2)
18494 tmp = parts.base;
18495 else
18496 {
18497 rtx tmp1;
18498
18499 /* Find better operand for SET instruction, depending
18500 on which definition is farther from the insn. */
18501 if (find_nearest_reg_def (insn, regno1, regno2))
18502 tmp = parts.index, tmp1 = parts.base;
18503 else
18504 tmp = parts.base, tmp1 = parts.index;
18505
18506 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18507
18508 if (parts.disp && parts.disp != const0_rtx)
18509 ix86_emit_binop (PLUS, mode, target, parts.disp);
18510
18511 ix86_emit_binop (PLUS, mode, target, tmp1);
18512 return;
18513 }
18514
18515 ix86_emit_binop (PLUS, mode, target, tmp);
18516 }
18517
18518 if (parts.disp && parts.disp != const0_rtx)
18519 ix86_emit_binop (PLUS, mode, target, parts.disp);
18520 }
18521 }
18522
18523 /* Return true if it is ok to optimize an ADD operation to LEA
18524 operation to avoid flag register consumation. For most processors,
18525 ADD is faster than LEA. For the processors like BONNELL, if the
18526 destination register of LEA holds an actual address which will be
18527 used soon, LEA is better and otherwise ADD is better. */
18528
18529 bool
18530 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18531 {
18532 unsigned int regno0 = true_regnum (operands[0]);
18533 unsigned int regno1 = true_regnum (operands[1]);
18534 unsigned int regno2 = true_regnum (operands[2]);
18535
18536 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18537 if (regno0 != regno1 && regno0 != regno2)
18538 return true;
18539
18540 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18541 return false;
18542
18543 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18544 }
18545
18546 /* Return true if destination reg of SET_BODY is shift count of
18547 USE_BODY. */
18548
18549 static bool
18550 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18551 {
18552 rtx set_dest;
18553 rtx shift_rtx;
18554 int i;
18555
18556 /* Retrieve destination of SET_BODY. */
18557 switch (GET_CODE (set_body))
18558 {
18559 case SET:
18560 set_dest = SET_DEST (set_body);
18561 if (!set_dest || !REG_P (set_dest))
18562 return false;
18563 break;
18564 case PARALLEL:
18565 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18566 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18567 use_body))
18568 return true;
18569 default:
18570 return false;
18571 break;
18572 }
18573
18574 /* Retrieve shift count of USE_BODY. */
18575 switch (GET_CODE (use_body))
18576 {
18577 case SET:
18578 shift_rtx = XEXP (use_body, 1);
18579 break;
18580 case PARALLEL:
18581 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18582 if (ix86_dep_by_shift_count_body (set_body,
18583 XVECEXP (use_body, 0, i)))
18584 return true;
18585 default:
18586 return false;
18587 break;
18588 }
18589
18590 if (shift_rtx
18591 && (GET_CODE (shift_rtx) == ASHIFT
18592 || GET_CODE (shift_rtx) == LSHIFTRT
18593 || GET_CODE (shift_rtx) == ASHIFTRT
18594 || GET_CODE (shift_rtx) == ROTATE
18595 || GET_CODE (shift_rtx) == ROTATERT))
18596 {
18597 rtx shift_count = XEXP (shift_rtx, 1);
18598
18599 /* Return true if shift count is dest of SET_BODY. */
18600 if (REG_P (shift_count))
18601 {
18602 /* Add check since it can be invoked before register
18603 allocation in pre-reload schedule. */
18604 if (reload_completed
18605 && true_regnum (set_dest) == true_regnum (shift_count))
18606 return true;
18607 else if (REGNO(set_dest) == REGNO(shift_count))
18608 return true;
18609 }
18610 }
18611
18612 return false;
18613 }
18614
18615 /* Return true if destination reg of SET_INSN is shift count of
18616 USE_INSN. */
18617
18618 bool
18619 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18620 {
18621 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18622 PATTERN (use_insn));
18623 }
18624
18625 /* Return TRUE or FALSE depending on whether the unary operator meets the
18626 appropriate constraints. */
18627
18628 bool
18629 ix86_unary_operator_ok (enum rtx_code,
18630 enum machine_mode,
18631 rtx operands[2])
18632 {
18633 /* If one of operands is memory, source and destination must match. */
18634 if ((MEM_P (operands[0])
18635 || MEM_P (operands[1]))
18636 && ! rtx_equal_p (operands[0], operands[1]))
18637 return false;
18638 return true;
18639 }
18640
18641 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18642 are ok, keeping in mind the possible movddup alternative. */
18643
18644 bool
18645 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18646 {
18647 if (MEM_P (operands[0]))
18648 return rtx_equal_p (operands[0], operands[1 + high]);
18649 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18650 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18651 return true;
18652 }
18653
18654 /* Post-reload splitter for converting an SF or DFmode value in an
18655 SSE register into an unsigned SImode. */
18656
18657 void
18658 ix86_split_convert_uns_si_sse (rtx operands[])
18659 {
18660 enum machine_mode vecmode;
18661 rtx value, large, zero_or_two31, input, two31, x;
18662
18663 large = operands[1];
18664 zero_or_two31 = operands[2];
18665 input = operands[3];
18666 two31 = operands[4];
18667 vecmode = GET_MODE (large);
18668 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18669
18670 /* Load up the value into the low element. We must ensure that the other
18671 elements are valid floats -- zero is the easiest such value. */
18672 if (MEM_P (input))
18673 {
18674 if (vecmode == V4SFmode)
18675 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18676 else
18677 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18678 }
18679 else
18680 {
18681 input = gen_rtx_REG (vecmode, REGNO (input));
18682 emit_move_insn (value, CONST0_RTX (vecmode));
18683 if (vecmode == V4SFmode)
18684 emit_insn (gen_sse_movss (value, value, input));
18685 else
18686 emit_insn (gen_sse2_movsd (value, value, input));
18687 }
18688
18689 emit_move_insn (large, two31);
18690 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18691
18692 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18693 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18694
18695 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18696 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18697
18698 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18699 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18700
18701 large = gen_rtx_REG (V4SImode, REGNO (large));
18702 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18703
18704 x = gen_rtx_REG (V4SImode, REGNO (value));
18705 if (vecmode == V4SFmode)
18706 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18707 else
18708 emit_insn (gen_sse2_cvttpd2dq (x, value));
18709 value = x;
18710
18711 emit_insn (gen_xorv4si3 (value, value, large));
18712 }
18713
18714 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18715 Expects the 64-bit DImode to be supplied in a pair of integral
18716 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18717 -mfpmath=sse, !optimize_size only. */
18718
18719 void
18720 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18721 {
18722 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18723 rtx int_xmm, fp_xmm;
18724 rtx biases, exponents;
18725 rtx x;
18726
18727 int_xmm = gen_reg_rtx (V4SImode);
18728 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18729 emit_insn (gen_movdi_to_sse (int_xmm, input));
18730 else if (TARGET_SSE_SPLIT_REGS)
18731 {
18732 emit_clobber (int_xmm);
18733 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18734 }
18735 else
18736 {
18737 x = gen_reg_rtx (V2DImode);
18738 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18739 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18740 }
18741
18742 x = gen_rtx_CONST_VECTOR (V4SImode,
18743 gen_rtvec (4, GEN_INT (0x43300000UL),
18744 GEN_INT (0x45300000UL),
18745 const0_rtx, const0_rtx));
18746 exponents = validize_mem (force_const_mem (V4SImode, x));
18747
18748 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18749 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18750
18751 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18752 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18753 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18754 (0x1.0p84 + double(fp_value_hi_xmm)).
18755 Note these exponents differ by 32. */
18756
18757 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18758
18759 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18760 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18761 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18762 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18763 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18764 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18765 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18766 biases = validize_mem (force_const_mem (V2DFmode, biases));
18767 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18768
18769 /* Add the upper and lower DFmode values together. */
18770 if (TARGET_SSE3)
18771 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18772 else
18773 {
18774 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18775 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18776 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18777 }
18778
18779 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18780 }
18781
18782 /* Not used, but eases macroization of patterns. */
18783 void
18784 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18785 {
18786 gcc_unreachable ();
18787 }
18788
18789 /* Convert an unsigned SImode value into a DFmode. Only currently used
18790 for SSE, but applicable anywhere. */
18791
18792 void
18793 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18794 {
18795 REAL_VALUE_TYPE TWO31r;
18796 rtx x, fp;
18797
18798 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18799 NULL, 1, OPTAB_DIRECT);
18800
18801 fp = gen_reg_rtx (DFmode);
18802 emit_insn (gen_floatsidf2 (fp, x));
18803
18804 real_ldexp (&TWO31r, &dconst1, 31);
18805 x = const_double_from_real_value (TWO31r, DFmode);
18806
18807 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18808 if (x != target)
18809 emit_move_insn (target, x);
18810 }
18811
18812 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18813 32-bit mode; otherwise we have a direct convert instruction. */
18814
18815 void
18816 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18817 {
18818 REAL_VALUE_TYPE TWO32r;
18819 rtx fp_lo, fp_hi, x;
18820
18821 fp_lo = gen_reg_rtx (DFmode);
18822 fp_hi = gen_reg_rtx (DFmode);
18823
18824 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18825
18826 real_ldexp (&TWO32r, &dconst1, 32);
18827 x = const_double_from_real_value (TWO32r, DFmode);
18828 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18829
18830 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18831
18832 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18833 0, OPTAB_DIRECT);
18834 if (x != target)
18835 emit_move_insn (target, x);
18836 }
18837
18838 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18839 For x86_32, -mfpmath=sse, !optimize_size only. */
18840 void
18841 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18842 {
18843 REAL_VALUE_TYPE ONE16r;
18844 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18845
18846 real_ldexp (&ONE16r, &dconst1, 16);
18847 x = const_double_from_real_value (ONE16r, SFmode);
18848 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18849 NULL, 0, OPTAB_DIRECT);
18850 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18851 NULL, 0, OPTAB_DIRECT);
18852 fp_hi = gen_reg_rtx (SFmode);
18853 fp_lo = gen_reg_rtx (SFmode);
18854 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18855 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18856 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18857 0, OPTAB_DIRECT);
18858 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18859 0, OPTAB_DIRECT);
18860 if (!rtx_equal_p (target, fp_hi))
18861 emit_move_insn (target, fp_hi);
18862 }
18863
18864 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18865 a vector of unsigned ints VAL to vector of floats TARGET. */
18866
18867 void
18868 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18869 {
18870 rtx tmp[8];
18871 REAL_VALUE_TYPE TWO16r;
18872 enum machine_mode intmode = GET_MODE (val);
18873 enum machine_mode fltmode = GET_MODE (target);
18874 rtx (*cvt) (rtx, rtx);
18875
18876 if (intmode == V4SImode)
18877 cvt = gen_floatv4siv4sf2;
18878 else
18879 cvt = gen_floatv8siv8sf2;
18880 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18881 tmp[0] = force_reg (intmode, tmp[0]);
18882 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18883 OPTAB_DIRECT);
18884 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18885 NULL_RTX, 1, OPTAB_DIRECT);
18886 tmp[3] = gen_reg_rtx (fltmode);
18887 emit_insn (cvt (tmp[3], tmp[1]));
18888 tmp[4] = gen_reg_rtx (fltmode);
18889 emit_insn (cvt (tmp[4], tmp[2]));
18890 real_ldexp (&TWO16r, &dconst1, 16);
18891 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18892 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18893 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18894 OPTAB_DIRECT);
18895 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18896 OPTAB_DIRECT);
18897 if (tmp[7] != target)
18898 emit_move_insn (target, tmp[7]);
18899 }
18900
18901 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18902 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18903 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18904 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18905
18906 rtx
18907 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18908 {
18909 REAL_VALUE_TYPE TWO31r;
18910 rtx two31r, tmp[4];
18911 enum machine_mode mode = GET_MODE (val);
18912 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18913 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18914 rtx (*cmp) (rtx, rtx, rtx, rtx);
18915 int i;
18916
18917 for (i = 0; i < 3; i++)
18918 tmp[i] = gen_reg_rtx (mode);
18919 real_ldexp (&TWO31r, &dconst1, 31);
18920 two31r = const_double_from_real_value (TWO31r, scalarmode);
18921 two31r = ix86_build_const_vector (mode, 1, two31r);
18922 two31r = force_reg (mode, two31r);
18923 switch (mode)
18924 {
18925 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18926 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18927 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18928 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18929 default: gcc_unreachable ();
18930 }
18931 tmp[3] = gen_rtx_LE (mode, two31r, val);
18932 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18933 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18934 0, OPTAB_DIRECT);
18935 if (intmode == V4SImode || TARGET_AVX2)
18936 *xorp = expand_simple_binop (intmode, ASHIFT,
18937 gen_lowpart (intmode, tmp[0]),
18938 GEN_INT (31), NULL_RTX, 0,
18939 OPTAB_DIRECT);
18940 else
18941 {
18942 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18943 two31 = ix86_build_const_vector (intmode, 1, two31);
18944 *xorp = expand_simple_binop (intmode, AND,
18945 gen_lowpart (intmode, tmp[0]),
18946 two31, NULL_RTX, 0,
18947 OPTAB_DIRECT);
18948 }
18949 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18950 0, OPTAB_DIRECT);
18951 }
18952
18953 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18954 then replicate the value for all elements of the vector
18955 register. */
18956
18957 rtx
18958 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18959 {
18960 int i, n_elt;
18961 rtvec v;
18962 enum machine_mode scalar_mode;
18963
18964 switch (mode)
18965 {
18966 case V64QImode:
18967 case V32QImode:
18968 case V16QImode:
18969 case V32HImode:
18970 case V16HImode:
18971 case V8HImode:
18972 case V16SImode:
18973 case V8SImode:
18974 case V4SImode:
18975 case V8DImode:
18976 case V4DImode:
18977 case V2DImode:
18978 gcc_assert (vect);
18979 case V16SFmode:
18980 case V8SFmode:
18981 case V4SFmode:
18982 case V8DFmode:
18983 case V4DFmode:
18984 case V2DFmode:
18985 n_elt = GET_MODE_NUNITS (mode);
18986 v = rtvec_alloc (n_elt);
18987 scalar_mode = GET_MODE_INNER (mode);
18988
18989 RTVEC_ELT (v, 0) = value;
18990
18991 for (i = 1; i < n_elt; ++i)
18992 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18993
18994 return gen_rtx_CONST_VECTOR (mode, v);
18995
18996 default:
18997 gcc_unreachable ();
18998 }
18999 }
19000
19001 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19002 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19003 for an SSE register. If VECT is true, then replicate the mask for
19004 all elements of the vector register. If INVERT is true, then create
19005 a mask excluding the sign bit. */
19006
19007 rtx
19008 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19009 {
19010 enum machine_mode vec_mode, imode;
19011 HOST_WIDE_INT hi, lo;
19012 int shift = 63;
19013 rtx v;
19014 rtx mask;
19015
19016 /* Find the sign bit, sign extended to 2*HWI. */
19017 switch (mode)
19018 {
19019 case V16SImode:
19020 case V16SFmode:
19021 case V8SImode:
19022 case V4SImode:
19023 case V8SFmode:
19024 case V4SFmode:
19025 vec_mode = mode;
19026 mode = GET_MODE_INNER (mode);
19027 imode = SImode;
19028 lo = 0x80000000, hi = lo < 0;
19029 break;
19030
19031 case V8DImode:
19032 case V4DImode:
19033 case V2DImode:
19034 case V8DFmode:
19035 case V4DFmode:
19036 case V2DFmode:
19037 vec_mode = mode;
19038 mode = GET_MODE_INNER (mode);
19039 imode = DImode;
19040 if (HOST_BITS_PER_WIDE_INT >= 64)
19041 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19042 else
19043 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19044 break;
19045
19046 case TImode:
19047 case TFmode:
19048 vec_mode = VOIDmode;
19049 if (HOST_BITS_PER_WIDE_INT >= 64)
19050 {
19051 imode = TImode;
19052 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19053 }
19054 else
19055 {
19056 rtvec vec;
19057
19058 imode = DImode;
19059 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19060
19061 if (invert)
19062 {
19063 lo = ~lo, hi = ~hi;
19064 v = constm1_rtx;
19065 }
19066 else
19067 v = const0_rtx;
19068
19069 mask = immed_double_const (lo, hi, imode);
19070
19071 vec = gen_rtvec (2, v, mask);
19072 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19073 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19074
19075 return v;
19076 }
19077 break;
19078
19079 default:
19080 gcc_unreachable ();
19081 }
19082
19083 if (invert)
19084 lo = ~lo, hi = ~hi;
19085
19086 /* Force this value into the low part of a fp vector constant. */
19087 mask = immed_double_const (lo, hi, imode);
19088 mask = gen_lowpart (mode, mask);
19089
19090 if (vec_mode == VOIDmode)
19091 return force_reg (mode, mask);
19092
19093 v = ix86_build_const_vector (vec_mode, vect, mask);
19094 return force_reg (vec_mode, v);
19095 }
19096
19097 /* Generate code for floating point ABS or NEG. */
19098
19099 void
19100 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19101 rtx operands[])
19102 {
19103 rtx mask, set, dst, src;
19104 bool use_sse = false;
19105 bool vector_mode = VECTOR_MODE_P (mode);
19106 enum machine_mode vmode = mode;
19107
19108 if (vector_mode)
19109 use_sse = true;
19110 else if (mode == TFmode)
19111 use_sse = true;
19112 else if (TARGET_SSE_MATH)
19113 {
19114 use_sse = SSE_FLOAT_MODE_P (mode);
19115 if (mode == SFmode)
19116 vmode = V4SFmode;
19117 else if (mode == DFmode)
19118 vmode = V2DFmode;
19119 }
19120
19121 /* NEG and ABS performed with SSE use bitwise mask operations.
19122 Create the appropriate mask now. */
19123 if (use_sse)
19124 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19125 else
19126 mask = NULL_RTX;
19127
19128 dst = operands[0];
19129 src = operands[1];
19130
19131 set = gen_rtx_fmt_e (code, mode, src);
19132 set = gen_rtx_SET (VOIDmode, dst, set);
19133
19134 if (mask)
19135 {
19136 rtx use, clob;
19137 rtvec par;
19138
19139 use = gen_rtx_USE (VOIDmode, mask);
19140 if (vector_mode)
19141 par = gen_rtvec (2, set, use);
19142 else
19143 {
19144 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19145 par = gen_rtvec (3, set, use, clob);
19146 }
19147 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19148 }
19149 else
19150 emit_insn (set);
19151 }
19152
19153 /* Expand a copysign operation. Special case operand 0 being a constant. */
19154
19155 void
19156 ix86_expand_copysign (rtx operands[])
19157 {
19158 enum machine_mode mode, vmode;
19159 rtx dest, op0, op1, mask, nmask;
19160
19161 dest = operands[0];
19162 op0 = operands[1];
19163 op1 = operands[2];
19164
19165 mode = GET_MODE (dest);
19166
19167 if (mode == SFmode)
19168 vmode = V4SFmode;
19169 else if (mode == DFmode)
19170 vmode = V2DFmode;
19171 else
19172 vmode = mode;
19173
19174 if (GET_CODE (op0) == CONST_DOUBLE)
19175 {
19176 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19177
19178 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19179 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19180
19181 if (mode == SFmode || mode == DFmode)
19182 {
19183 if (op0 == CONST0_RTX (mode))
19184 op0 = CONST0_RTX (vmode);
19185 else
19186 {
19187 rtx v = ix86_build_const_vector (vmode, false, op0);
19188
19189 op0 = force_reg (vmode, v);
19190 }
19191 }
19192 else if (op0 != CONST0_RTX (mode))
19193 op0 = force_reg (mode, op0);
19194
19195 mask = ix86_build_signbit_mask (vmode, 0, 0);
19196
19197 if (mode == SFmode)
19198 copysign_insn = gen_copysignsf3_const;
19199 else if (mode == DFmode)
19200 copysign_insn = gen_copysigndf3_const;
19201 else
19202 copysign_insn = gen_copysigntf3_const;
19203
19204 emit_insn (copysign_insn (dest, op0, op1, mask));
19205 }
19206 else
19207 {
19208 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19209
19210 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19211 mask = ix86_build_signbit_mask (vmode, 0, 0);
19212
19213 if (mode == SFmode)
19214 copysign_insn = gen_copysignsf3_var;
19215 else if (mode == DFmode)
19216 copysign_insn = gen_copysigndf3_var;
19217 else
19218 copysign_insn = gen_copysigntf3_var;
19219
19220 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19221 }
19222 }
19223
19224 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19225 be a constant, and so has already been expanded into a vector constant. */
19226
19227 void
19228 ix86_split_copysign_const (rtx operands[])
19229 {
19230 enum machine_mode mode, vmode;
19231 rtx dest, op0, mask, x;
19232
19233 dest = operands[0];
19234 op0 = operands[1];
19235 mask = operands[3];
19236
19237 mode = GET_MODE (dest);
19238 vmode = GET_MODE (mask);
19239
19240 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19241 x = gen_rtx_AND (vmode, dest, mask);
19242 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19243
19244 if (op0 != CONST0_RTX (vmode))
19245 {
19246 x = gen_rtx_IOR (vmode, dest, op0);
19247 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19248 }
19249 }
19250
19251 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19252 so we have to do two masks. */
19253
19254 void
19255 ix86_split_copysign_var (rtx operands[])
19256 {
19257 enum machine_mode mode, vmode;
19258 rtx dest, scratch, op0, op1, mask, nmask, x;
19259
19260 dest = operands[0];
19261 scratch = operands[1];
19262 op0 = operands[2];
19263 op1 = operands[3];
19264 nmask = operands[4];
19265 mask = operands[5];
19266
19267 mode = GET_MODE (dest);
19268 vmode = GET_MODE (mask);
19269
19270 if (rtx_equal_p (op0, op1))
19271 {
19272 /* Shouldn't happen often (it's useless, obviously), but when it does
19273 we'd generate incorrect code if we continue below. */
19274 emit_move_insn (dest, op0);
19275 return;
19276 }
19277
19278 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19279 {
19280 gcc_assert (REGNO (op1) == REGNO (scratch));
19281
19282 x = gen_rtx_AND (vmode, scratch, mask);
19283 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19284
19285 dest = mask;
19286 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19287 x = gen_rtx_NOT (vmode, dest);
19288 x = gen_rtx_AND (vmode, x, op0);
19289 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19290 }
19291 else
19292 {
19293 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19294 {
19295 x = gen_rtx_AND (vmode, scratch, mask);
19296 }
19297 else /* alternative 2,4 */
19298 {
19299 gcc_assert (REGNO (mask) == REGNO (scratch));
19300 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19301 x = gen_rtx_AND (vmode, scratch, op1);
19302 }
19303 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19304
19305 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19306 {
19307 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19308 x = gen_rtx_AND (vmode, dest, nmask);
19309 }
19310 else /* alternative 3,4 */
19311 {
19312 gcc_assert (REGNO (nmask) == REGNO (dest));
19313 dest = nmask;
19314 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19315 x = gen_rtx_AND (vmode, dest, op0);
19316 }
19317 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19318 }
19319
19320 x = gen_rtx_IOR (vmode, dest, scratch);
19321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19322 }
19323
19324 /* Return TRUE or FALSE depending on whether the first SET in INSN
19325 has source and destination with matching CC modes, and that the
19326 CC mode is at least as constrained as REQ_MODE. */
19327
19328 bool
19329 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19330 {
19331 rtx set;
19332 enum machine_mode set_mode;
19333
19334 set = PATTERN (insn);
19335 if (GET_CODE (set) == PARALLEL)
19336 set = XVECEXP (set, 0, 0);
19337 gcc_assert (GET_CODE (set) == SET);
19338 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19339
19340 set_mode = GET_MODE (SET_DEST (set));
19341 switch (set_mode)
19342 {
19343 case CCNOmode:
19344 if (req_mode != CCNOmode
19345 && (req_mode != CCmode
19346 || XEXP (SET_SRC (set), 1) != const0_rtx))
19347 return false;
19348 break;
19349 case CCmode:
19350 if (req_mode == CCGCmode)
19351 return false;
19352 /* FALLTHRU */
19353 case CCGCmode:
19354 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19355 return false;
19356 /* FALLTHRU */
19357 case CCGOCmode:
19358 if (req_mode == CCZmode)
19359 return false;
19360 /* FALLTHRU */
19361 case CCZmode:
19362 break;
19363
19364 case CCAmode:
19365 case CCCmode:
19366 case CCOmode:
19367 case CCSmode:
19368 if (set_mode != req_mode)
19369 return false;
19370 break;
19371
19372 default:
19373 gcc_unreachable ();
19374 }
19375
19376 return GET_MODE (SET_SRC (set)) == set_mode;
19377 }
19378
19379 /* Generate insn patterns to do an integer compare of OPERANDS. */
19380
19381 static rtx
19382 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19383 {
19384 enum machine_mode cmpmode;
19385 rtx tmp, flags;
19386
19387 cmpmode = SELECT_CC_MODE (code, op0, op1);
19388 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19389
19390 /* This is very simple, but making the interface the same as in the
19391 FP case makes the rest of the code easier. */
19392 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19393 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19394
19395 /* Return the test that should be put into the flags user, i.e.
19396 the bcc, scc, or cmov instruction. */
19397 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19398 }
19399
19400 /* Figure out whether to use ordered or unordered fp comparisons.
19401 Return the appropriate mode to use. */
19402
19403 enum machine_mode
19404 ix86_fp_compare_mode (enum rtx_code)
19405 {
19406 /* ??? In order to make all comparisons reversible, we do all comparisons
19407 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19408 all forms trapping and nontrapping comparisons, we can make inequality
19409 comparisons trapping again, since it results in better code when using
19410 FCOM based compares. */
19411 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19412 }
19413
19414 enum machine_mode
19415 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19416 {
19417 enum machine_mode mode = GET_MODE (op0);
19418
19419 if (SCALAR_FLOAT_MODE_P (mode))
19420 {
19421 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19422 return ix86_fp_compare_mode (code);
19423 }
19424
19425 switch (code)
19426 {
19427 /* Only zero flag is needed. */
19428 case EQ: /* ZF=0 */
19429 case NE: /* ZF!=0 */
19430 return CCZmode;
19431 /* Codes needing carry flag. */
19432 case GEU: /* CF=0 */
19433 case LTU: /* CF=1 */
19434 /* Detect overflow checks. They need just the carry flag. */
19435 if (GET_CODE (op0) == PLUS
19436 && rtx_equal_p (op1, XEXP (op0, 0)))
19437 return CCCmode;
19438 else
19439 return CCmode;
19440 case GTU: /* CF=0 & ZF=0 */
19441 case LEU: /* CF=1 | ZF=1 */
19442 return CCmode;
19443 /* Codes possibly doable only with sign flag when
19444 comparing against zero. */
19445 case GE: /* SF=OF or SF=0 */
19446 case LT: /* SF<>OF or SF=1 */
19447 if (op1 == const0_rtx)
19448 return CCGOCmode;
19449 else
19450 /* For other cases Carry flag is not required. */
19451 return CCGCmode;
19452 /* Codes doable only with sign flag when comparing
19453 against zero, but we miss jump instruction for it
19454 so we need to use relational tests against overflow
19455 that thus needs to be zero. */
19456 case GT: /* ZF=0 & SF=OF */
19457 case LE: /* ZF=1 | SF<>OF */
19458 if (op1 == const0_rtx)
19459 return CCNOmode;
19460 else
19461 return CCGCmode;
19462 /* strcmp pattern do (use flags) and combine may ask us for proper
19463 mode. */
19464 case USE:
19465 return CCmode;
19466 default:
19467 gcc_unreachable ();
19468 }
19469 }
19470
19471 /* Return the fixed registers used for condition codes. */
19472
19473 static bool
19474 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19475 {
19476 *p1 = FLAGS_REG;
19477 *p2 = FPSR_REG;
19478 return true;
19479 }
19480
19481 /* If two condition code modes are compatible, return a condition code
19482 mode which is compatible with both. Otherwise, return
19483 VOIDmode. */
19484
19485 static enum machine_mode
19486 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19487 {
19488 if (m1 == m2)
19489 return m1;
19490
19491 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19492 return VOIDmode;
19493
19494 if ((m1 == CCGCmode && m2 == CCGOCmode)
19495 || (m1 == CCGOCmode && m2 == CCGCmode))
19496 return CCGCmode;
19497
19498 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19499 return m2;
19500 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19501 return m1;
19502
19503 switch (m1)
19504 {
19505 default:
19506 gcc_unreachable ();
19507
19508 case CCmode:
19509 case CCGCmode:
19510 case CCGOCmode:
19511 case CCNOmode:
19512 case CCAmode:
19513 case CCCmode:
19514 case CCOmode:
19515 case CCSmode:
19516 case CCZmode:
19517 switch (m2)
19518 {
19519 default:
19520 return VOIDmode;
19521
19522 case CCmode:
19523 case CCGCmode:
19524 case CCGOCmode:
19525 case CCNOmode:
19526 case CCAmode:
19527 case CCCmode:
19528 case CCOmode:
19529 case CCSmode:
19530 case CCZmode:
19531 return CCmode;
19532 }
19533
19534 case CCFPmode:
19535 case CCFPUmode:
19536 /* These are only compatible with themselves, which we already
19537 checked above. */
19538 return VOIDmode;
19539 }
19540 }
19541
19542
19543 /* Return a comparison we can do and that it is equivalent to
19544 swap_condition (code) apart possibly from orderedness.
19545 But, never change orderedness if TARGET_IEEE_FP, returning
19546 UNKNOWN in that case if necessary. */
19547
19548 static enum rtx_code
19549 ix86_fp_swap_condition (enum rtx_code code)
19550 {
19551 switch (code)
19552 {
19553 case GT: /* GTU - CF=0 & ZF=0 */
19554 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19555 case GE: /* GEU - CF=0 */
19556 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19557 case UNLT: /* LTU - CF=1 */
19558 return TARGET_IEEE_FP ? UNKNOWN : GT;
19559 case UNLE: /* LEU - CF=1 | ZF=1 */
19560 return TARGET_IEEE_FP ? UNKNOWN : GE;
19561 default:
19562 return swap_condition (code);
19563 }
19564 }
19565
19566 /* Return cost of comparison CODE using the best strategy for performance.
19567 All following functions do use number of instructions as a cost metrics.
19568 In future this should be tweaked to compute bytes for optimize_size and
19569 take into account performance of various instructions on various CPUs. */
19570
19571 static int
19572 ix86_fp_comparison_cost (enum rtx_code code)
19573 {
19574 int arith_cost;
19575
19576 /* The cost of code using bit-twiddling on %ah. */
19577 switch (code)
19578 {
19579 case UNLE:
19580 case UNLT:
19581 case LTGT:
19582 case GT:
19583 case GE:
19584 case UNORDERED:
19585 case ORDERED:
19586 case UNEQ:
19587 arith_cost = 4;
19588 break;
19589 case LT:
19590 case NE:
19591 case EQ:
19592 case UNGE:
19593 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19594 break;
19595 case LE:
19596 case UNGT:
19597 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19598 break;
19599 default:
19600 gcc_unreachable ();
19601 }
19602
19603 switch (ix86_fp_comparison_strategy (code))
19604 {
19605 case IX86_FPCMP_COMI:
19606 return arith_cost > 4 ? 3 : 2;
19607 case IX86_FPCMP_SAHF:
19608 return arith_cost > 4 ? 4 : 3;
19609 default:
19610 return arith_cost;
19611 }
19612 }
19613
19614 /* Return strategy to use for floating-point. We assume that fcomi is always
19615 preferrable where available, since that is also true when looking at size
19616 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19617
19618 enum ix86_fpcmp_strategy
19619 ix86_fp_comparison_strategy (enum rtx_code)
19620 {
19621 /* Do fcomi/sahf based test when profitable. */
19622
19623 if (TARGET_CMOVE)
19624 return IX86_FPCMP_COMI;
19625
19626 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19627 return IX86_FPCMP_SAHF;
19628
19629 return IX86_FPCMP_ARITH;
19630 }
19631
19632 /* Swap, force into registers, or otherwise massage the two operands
19633 to a fp comparison. The operands are updated in place; the new
19634 comparison code is returned. */
19635
19636 static enum rtx_code
19637 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19638 {
19639 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19640 rtx op0 = *pop0, op1 = *pop1;
19641 enum machine_mode op_mode = GET_MODE (op0);
19642 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19643
19644 /* All of the unordered compare instructions only work on registers.
19645 The same is true of the fcomi compare instructions. The XFmode
19646 compare instructions require registers except when comparing
19647 against zero or when converting operand 1 from fixed point to
19648 floating point. */
19649
19650 if (!is_sse
19651 && (fpcmp_mode == CCFPUmode
19652 || (op_mode == XFmode
19653 && ! (standard_80387_constant_p (op0) == 1
19654 || standard_80387_constant_p (op1) == 1)
19655 && GET_CODE (op1) != FLOAT)
19656 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19657 {
19658 op0 = force_reg (op_mode, op0);
19659 op1 = force_reg (op_mode, op1);
19660 }
19661 else
19662 {
19663 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19664 things around if they appear profitable, otherwise force op0
19665 into a register. */
19666
19667 if (standard_80387_constant_p (op0) == 0
19668 || (MEM_P (op0)
19669 && ! (standard_80387_constant_p (op1) == 0
19670 || MEM_P (op1))))
19671 {
19672 enum rtx_code new_code = ix86_fp_swap_condition (code);
19673 if (new_code != UNKNOWN)
19674 {
19675 rtx tmp;
19676 tmp = op0, op0 = op1, op1 = tmp;
19677 code = new_code;
19678 }
19679 }
19680
19681 if (!REG_P (op0))
19682 op0 = force_reg (op_mode, op0);
19683
19684 if (CONSTANT_P (op1))
19685 {
19686 int tmp = standard_80387_constant_p (op1);
19687 if (tmp == 0)
19688 op1 = validize_mem (force_const_mem (op_mode, op1));
19689 else if (tmp == 1)
19690 {
19691 if (TARGET_CMOVE)
19692 op1 = force_reg (op_mode, op1);
19693 }
19694 else
19695 op1 = force_reg (op_mode, op1);
19696 }
19697 }
19698
19699 /* Try to rearrange the comparison to make it cheaper. */
19700 if (ix86_fp_comparison_cost (code)
19701 > ix86_fp_comparison_cost (swap_condition (code))
19702 && (REG_P (op1) || can_create_pseudo_p ()))
19703 {
19704 rtx tmp;
19705 tmp = op0, op0 = op1, op1 = tmp;
19706 code = swap_condition (code);
19707 if (!REG_P (op0))
19708 op0 = force_reg (op_mode, op0);
19709 }
19710
19711 *pop0 = op0;
19712 *pop1 = op1;
19713 return code;
19714 }
19715
19716 /* Convert comparison codes we use to represent FP comparison to integer
19717 code that will result in proper branch. Return UNKNOWN if no such code
19718 is available. */
19719
19720 enum rtx_code
19721 ix86_fp_compare_code_to_integer (enum rtx_code code)
19722 {
19723 switch (code)
19724 {
19725 case GT:
19726 return GTU;
19727 case GE:
19728 return GEU;
19729 case ORDERED:
19730 case UNORDERED:
19731 return code;
19732 break;
19733 case UNEQ:
19734 return EQ;
19735 break;
19736 case UNLT:
19737 return LTU;
19738 break;
19739 case UNLE:
19740 return LEU;
19741 break;
19742 case LTGT:
19743 return NE;
19744 break;
19745 default:
19746 return UNKNOWN;
19747 }
19748 }
19749
19750 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19751
19752 static rtx
19753 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19754 {
19755 enum machine_mode fpcmp_mode, intcmp_mode;
19756 rtx tmp, tmp2;
19757
19758 fpcmp_mode = ix86_fp_compare_mode (code);
19759 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19760
19761 /* Do fcomi/sahf based test when profitable. */
19762 switch (ix86_fp_comparison_strategy (code))
19763 {
19764 case IX86_FPCMP_COMI:
19765 intcmp_mode = fpcmp_mode;
19766 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19767 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19768 tmp);
19769 emit_insn (tmp);
19770 break;
19771
19772 case IX86_FPCMP_SAHF:
19773 intcmp_mode = fpcmp_mode;
19774 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19775 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19776 tmp);
19777
19778 if (!scratch)
19779 scratch = gen_reg_rtx (HImode);
19780 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19781 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19782 break;
19783
19784 case IX86_FPCMP_ARITH:
19785 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19786 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19787 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19788 if (!scratch)
19789 scratch = gen_reg_rtx (HImode);
19790 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19791
19792 /* In the unordered case, we have to check C2 for NaN's, which
19793 doesn't happen to work out to anything nice combination-wise.
19794 So do some bit twiddling on the value we've got in AH to come
19795 up with an appropriate set of condition codes. */
19796
19797 intcmp_mode = CCNOmode;
19798 switch (code)
19799 {
19800 case GT:
19801 case UNGT:
19802 if (code == GT || !TARGET_IEEE_FP)
19803 {
19804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19805 code = EQ;
19806 }
19807 else
19808 {
19809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19810 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19811 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19812 intcmp_mode = CCmode;
19813 code = GEU;
19814 }
19815 break;
19816 case LT:
19817 case UNLT:
19818 if (code == LT && TARGET_IEEE_FP)
19819 {
19820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19821 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19822 intcmp_mode = CCmode;
19823 code = EQ;
19824 }
19825 else
19826 {
19827 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19828 code = NE;
19829 }
19830 break;
19831 case GE:
19832 case UNGE:
19833 if (code == GE || !TARGET_IEEE_FP)
19834 {
19835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19836 code = EQ;
19837 }
19838 else
19839 {
19840 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19841 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19842 code = NE;
19843 }
19844 break;
19845 case LE:
19846 case UNLE:
19847 if (code == LE && TARGET_IEEE_FP)
19848 {
19849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19850 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19852 intcmp_mode = CCmode;
19853 code = LTU;
19854 }
19855 else
19856 {
19857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19858 code = NE;
19859 }
19860 break;
19861 case EQ:
19862 case UNEQ:
19863 if (code == EQ && TARGET_IEEE_FP)
19864 {
19865 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19866 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19867 intcmp_mode = CCmode;
19868 code = EQ;
19869 }
19870 else
19871 {
19872 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19873 code = NE;
19874 }
19875 break;
19876 case NE:
19877 case LTGT:
19878 if (code == NE && TARGET_IEEE_FP)
19879 {
19880 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19881 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19882 GEN_INT (0x40)));
19883 code = NE;
19884 }
19885 else
19886 {
19887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19888 code = EQ;
19889 }
19890 break;
19891
19892 case UNORDERED:
19893 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19894 code = NE;
19895 break;
19896 case ORDERED:
19897 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19898 code = EQ;
19899 break;
19900
19901 default:
19902 gcc_unreachable ();
19903 }
19904 break;
19905
19906 default:
19907 gcc_unreachable();
19908 }
19909
19910 /* Return the test that should be put into the flags user, i.e.
19911 the bcc, scc, or cmov instruction. */
19912 return gen_rtx_fmt_ee (code, VOIDmode,
19913 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19914 const0_rtx);
19915 }
19916
19917 static rtx
19918 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19919 {
19920 rtx ret;
19921
19922 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19923 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19924
19925 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19926 {
19927 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19928 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19929 }
19930 else
19931 ret = ix86_expand_int_compare (code, op0, op1);
19932
19933 return ret;
19934 }
19935
19936 void
19937 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19938 {
19939 enum machine_mode mode = GET_MODE (op0);
19940 rtx tmp;
19941
19942 switch (mode)
19943 {
19944 case SFmode:
19945 case DFmode:
19946 case XFmode:
19947 case QImode:
19948 case HImode:
19949 case SImode:
19950 simple:
19951 tmp = ix86_expand_compare (code, op0, op1);
19952 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19953 gen_rtx_LABEL_REF (VOIDmode, label),
19954 pc_rtx);
19955 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19956 return;
19957
19958 case DImode:
19959 if (TARGET_64BIT)
19960 goto simple;
19961 case TImode:
19962 /* Expand DImode branch into multiple compare+branch. */
19963 {
19964 rtx lo[2], hi[2];
19965 rtx_code_label *label2;
19966 enum rtx_code code1, code2, code3;
19967 enum machine_mode submode;
19968
19969 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19970 {
19971 tmp = op0, op0 = op1, op1 = tmp;
19972 code = swap_condition (code);
19973 }
19974
19975 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19976 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19977
19978 submode = mode == DImode ? SImode : DImode;
19979
19980 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19981 avoid two branches. This costs one extra insn, so disable when
19982 optimizing for size. */
19983
19984 if ((code == EQ || code == NE)
19985 && (!optimize_insn_for_size_p ()
19986 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19987 {
19988 rtx xor0, xor1;
19989
19990 xor1 = hi[0];
19991 if (hi[1] != const0_rtx)
19992 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19993 NULL_RTX, 0, OPTAB_WIDEN);
19994
19995 xor0 = lo[0];
19996 if (lo[1] != const0_rtx)
19997 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19998 NULL_RTX, 0, OPTAB_WIDEN);
19999
20000 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20001 NULL_RTX, 0, OPTAB_WIDEN);
20002
20003 ix86_expand_branch (code, tmp, const0_rtx, label);
20004 return;
20005 }
20006
20007 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20008 op1 is a constant and the low word is zero, then we can just
20009 examine the high word. Similarly for low word -1 and
20010 less-or-equal-than or greater-than. */
20011
20012 if (CONST_INT_P (hi[1]))
20013 switch (code)
20014 {
20015 case LT: case LTU: case GE: case GEU:
20016 if (lo[1] == const0_rtx)
20017 {
20018 ix86_expand_branch (code, hi[0], hi[1], label);
20019 return;
20020 }
20021 break;
20022 case LE: case LEU: case GT: case GTU:
20023 if (lo[1] == constm1_rtx)
20024 {
20025 ix86_expand_branch (code, hi[0], hi[1], label);
20026 return;
20027 }
20028 break;
20029 default:
20030 break;
20031 }
20032
20033 /* Otherwise, we need two or three jumps. */
20034
20035 label2 = gen_label_rtx ();
20036
20037 code1 = code;
20038 code2 = swap_condition (code);
20039 code3 = unsigned_condition (code);
20040
20041 switch (code)
20042 {
20043 case LT: case GT: case LTU: case GTU:
20044 break;
20045
20046 case LE: code1 = LT; code2 = GT; break;
20047 case GE: code1 = GT; code2 = LT; break;
20048 case LEU: code1 = LTU; code2 = GTU; break;
20049 case GEU: code1 = GTU; code2 = LTU; break;
20050
20051 case EQ: code1 = UNKNOWN; code2 = NE; break;
20052 case NE: code2 = UNKNOWN; break;
20053
20054 default:
20055 gcc_unreachable ();
20056 }
20057
20058 /*
20059 * a < b =>
20060 * if (hi(a) < hi(b)) goto true;
20061 * if (hi(a) > hi(b)) goto false;
20062 * if (lo(a) < lo(b)) goto true;
20063 * false:
20064 */
20065
20066 if (code1 != UNKNOWN)
20067 ix86_expand_branch (code1, hi[0], hi[1], label);
20068 if (code2 != UNKNOWN)
20069 ix86_expand_branch (code2, hi[0], hi[1], label2);
20070
20071 ix86_expand_branch (code3, lo[0], lo[1], label);
20072
20073 if (code2 != UNKNOWN)
20074 emit_label (label2);
20075 return;
20076 }
20077
20078 default:
20079 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20080 goto simple;
20081 }
20082 }
20083
20084 /* Split branch based on floating point condition. */
20085 void
20086 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20087 rtx target1, rtx target2, rtx tmp)
20088 {
20089 rtx condition;
20090 rtx i;
20091
20092 if (target2 != pc_rtx)
20093 {
20094 rtx tmp = target2;
20095 code = reverse_condition_maybe_unordered (code);
20096 target2 = target1;
20097 target1 = tmp;
20098 }
20099
20100 condition = ix86_expand_fp_compare (code, op1, op2,
20101 tmp);
20102
20103 i = emit_jump_insn (gen_rtx_SET
20104 (VOIDmode, pc_rtx,
20105 gen_rtx_IF_THEN_ELSE (VOIDmode,
20106 condition, target1, target2)));
20107 if (split_branch_probability >= 0)
20108 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20109 }
20110
20111 void
20112 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20113 {
20114 rtx ret;
20115
20116 gcc_assert (GET_MODE (dest) == QImode);
20117
20118 ret = ix86_expand_compare (code, op0, op1);
20119 PUT_MODE (ret, QImode);
20120 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20121 }
20122
20123 /* Expand comparison setting or clearing carry flag. Return true when
20124 successful and set pop for the operation. */
20125 static bool
20126 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20127 {
20128 enum machine_mode mode =
20129 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20130
20131 /* Do not handle double-mode compares that go through special path. */
20132 if (mode == (TARGET_64BIT ? TImode : DImode))
20133 return false;
20134
20135 if (SCALAR_FLOAT_MODE_P (mode))
20136 {
20137 rtx compare_op;
20138 rtx_insn *compare_seq;
20139
20140 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20141
20142 /* Shortcut: following common codes never translate
20143 into carry flag compares. */
20144 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20145 || code == ORDERED || code == UNORDERED)
20146 return false;
20147
20148 /* These comparisons require zero flag; swap operands so they won't. */
20149 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20150 && !TARGET_IEEE_FP)
20151 {
20152 rtx tmp = op0;
20153 op0 = op1;
20154 op1 = tmp;
20155 code = swap_condition (code);
20156 }
20157
20158 /* Try to expand the comparison and verify that we end up with
20159 carry flag based comparison. This fails to be true only when
20160 we decide to expand comparison using arithmetic that is not
20161 too common scenario. */
20162 start_sequence ();
20163 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20164 compare_seq = get_insns ();
20165 end_sequence ();
20166
20167 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20168 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20169 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20170 else
20171 code = GET_CODE (compare_op);
20172
20173 if (code != LTU && code != GEU)
20174 return false;
20175
20176 emit_insn (compare_seq);
20177 *pop = compare_op;
20178 return true;
20179 }
20180
20181 if (!INTEGRAL_MODE_P (mode))
20182 return false;
20183
20184 switch (code)
20185 {
20186 case LTU:
20187 case GEU:
20188 break;
20189
20190 /* Convert a==0 into (unsigned)a<1. */
20191 case EQ:
20192 case NE:
20193 if (op1 != const0_rtx)
20194 return false;
20195 op1 = const1_rtx;
20196 code = (code == EQ ? LTU : GEU);
20197 break;
20198
20199 /* Convert a>b into b<a or a>=b-1. */
20200 case GTU:
20201 case LEU:
20202 if (CONST_INT_P (op1))
20203 {
20204 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20205 /* Bail out on overflow. We still can swap operands but that
20206 would force loading of the constant into register. */
20207 if (op1 == const0_rtx
20208 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20209 return false;
20210 code = (code == GTU ? GEU : LTU);
20211 }
20212 else
20213 {
20214 rtx tmp = op1;
20215 op1 = op0;
20216 op0 = tmp;
20217 code = (code == GTU ? LTU : GEU);
20218 }
20219 break;
20220
20221 /* Convert a>=0 into (unsigned)a<0x80000000. */
20222 case LT:
20223 case GE:
20224 if (mode == DImode || op1 != const0_rtx)
20225 return false;
20226 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20227 code = (code == LT ? GEU : LTU);
20228 break;
20229 case LE:
20230 case GT:
20231 if (mode == DImode || op1 != constm1_rtx)
20232 return false;
20233 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20234 code = (code == LE ? GEU : LTU);
20235 break;
20236
20237 default:
20238 return false;
20239 }
20240 /* Swapping operands may cause constant to appear as first operand. */
20241 if (!nonimmediate_operand (op0, VOIDmode))
20242 {
20243 if (!can_create_pseudo_p ())
20244 return false;
20245 op0 = force_reg (mode, op0);
20246 }
20247 *pop = ix86_expand_compare (code, op0, op1);
20248 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20249 return true;
20250 }
20251
20252 bool
20253 ix86_expand_int_movcc (rtx operands[])
20254 {
20255 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20256 rtx_insn *compare_seq;
20257 rtx compare_op;
20258 enum machine_mode mode = GET_MODE (operands[0]);
20259 bool sign_bit_compare_p = false;
20260 rtx op0 = XEXP (operands[1], 0);
20261 rtx op1 = XEXP (operands[1], 1);
20262
20263 if (GET_MODE (op0) == TImode
20264 || (GET_MODE (op0) == DImode
20265 && !TARGET_64BIT))
20266 return false;
20267
20268 start_sequence ();
20269 compare_op = ix86_expand_compare (code, op0, op1);
20270 compare_seq = get_insns ();
20271 end_sequence ();
20272
20273 compare_code = GET_CODE (compare_op);
20274
20275 if ((op1 == const0_rtx && (code == GE || code == LT))
20276 || (op1 == constm1_rtx && (code == GT || code == LE)))
20277 sign_bit_compare_p = true;
20278
20279 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20280 HImode insns, we'd be swallowed in word prefix ops. */
20281
20282 if ((mode != HImode || TARGET_FAST_PREFIX)
20283 && (mode != (TARGET_64BIT ? TImode : DImode))
20284 && CONST_INT_P (operands[2])
20285 && CONST_INT_P (operands[3]))
20286 {
20287 rtx out = operands[0];
20288 HOST_WIDE_INT ct = INTVAL (operands[2]);
20289 HOST_WIDE_INT cf = INTVAL (operands[3]);
20290 HOST_WIDE_INT diff;
20291
20292 diff = ct - cf;
20293 /* Sign bit compares are better done using shifts than we do by using
20294 sbb. */
20295 if (sign_bit_compare_p
20296 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20297 {
20298 /* Detect overlap between destination and compare sources. */
20299 rtx tmp = out;
20300
20301 if (!sign_bit_compare_p)
20302 {
20303 rtx flags;
20304 bool fpcmp = false;
20305
20306 compare_code = GET_CODE (compare_op);
20307
20308 flags = XEXP (compare_op, 0);
20309
20310 if (GET_MODE (flags) == CCFPmode
20311 || GET_MODE (flags) == CCFPUmode)
20312 {
20313 fpcmp = true;
20314 compare_code
20315 = ix86_fp_compare_code_to_integer (compare_code);
20316 }
20317
20318 /* To simplify rest of code, restrict to the GEU case. */
20319 if (compare_code == LTU)
20320 {
20321 HOST_WIDE_INT tmp = ct;
20322 ct = cf;
20323 cf = tmp;
20324 compare_code = reverse_condition (compare_code);
20325 code = reverse_condition (code);
20326 }
20327 else
20328 {
20329 if (fpcmp)
20330 PUT_CODE (compare_op,
20331 reverse_condition_maybe_unordered
20332 (GET_CODE (compare_op)));
20333 else
20334 PUT_CODE (compare_op,
20335 reverse_condition (GET_CODE (compare_op)));
20336 }
20337 diff = ct - cf;
20338
20339 if (reg_overlap_mentioned_p (out, op0)
20340 || reg_overlap_mentioned_p (out, op1))
20341 tmp = gen_reg_rtx (mode);
20342
20343 if (mode == DImode)
20344 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20345 else
20346 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20347 flags, compare_op));
20348 }
20349 else
20350 {
20351 if (code == GT || code == GE)
20352 code = reverse_condition (code);
20353 else
20354 {
20355 HOST_WIDE_INT tmp = ct;
20356 ct = cf;
20357 cf = tmp;
20358 diff = ct - cf;
20359 }
20360 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20361 }
20362
20363 if (diff == 1)
20364 {
20365 /*
20366 * cmpl op0,op1
20367 * sbbl dest,dest
20368 * [addl dest, ct]
20369 *
20370 * Size 5 - 8.
20371 */
20372 if (ct)
20373 tmp = expand_simple_binop (mode, PLUS,
20374 tmp, GEN_INT (ct),
20375 copy_rtx (tmp), 1, OPTAB_DIRECT);
20376 }
20377 else if (cf == -1)
20378 {
20379 /*
20380 * cmpl op0,op1
20381 * sbbl dest,dest
20382 * orl $ct, dest
20383 *
20384 * Size 8.
20385 */
20386 tmp = expand_simple_binop (mode, IOR,
20387 tmp, GEN_INT (ct),
20388 copy_rtx (tmp), 1, OPTAB_DIRECT);
20389 }
20390 else if (diff == -1 && ct)
20391 {
20392 /*
20393 * cmpl op0,op1
20394 * sbbl dest,dest
20395 * notl dest
20396 * [addl dest, cf]
20397 *
20398 * Size 8 - 11.
20399 */
20400 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20401 if (cf)
20402 tmp = expand_simple_binop (mode, PLUS,
20403 copy_rtx (tmp), GEN_INT (cf),
20404 copy_rtx (tmp), 1, OPTAB_DIRECT);
20405 }
20406 else
20407 {
20408 /*
20409 * cmpl op0,op1
20410 * sbbl dest,dest
20411 * [notl dest]
20412 * andl cf - ct, dest
20413 * [addl dest, ct]
20414 *
20415 * Size 8 - 11.
20416 */
20417
20418 if (cf == 0)
20419 {
20420 cf = ct;
20421 ct = 0;
20422 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20423 }
20424
20425 tmp = expand_simple_binop (mode, AND,
20426 copy_rtx (tmp),
20427 gen_int_mode (cf - ct, mode),
20428 copy_rtx (tmp), 1, OPTAB_DIRECT);
20429 if (ct)
20430 tmp = expand_simple_binop (mode, PLUS,
20431 copy_rtx (tmp), GEN_INT (ct),
20432 copy_rtx (tmp), 1, OPTAB_DIRECT);
20433 }
20434
20435 if (!rtx_equal_p (tmp, out))
20436 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20437
20438 return true;
20439 }
20440
20441 if (diff < 0)
20442 {
20443 enum machine_mode cmp_mode = GET_MODE (op0);
20444
20445 HOST_WIDE_INT tmp;
20446 tmp = ct, ct = cf, cf = tmp;
20447 diff = -diff;
20448
20449 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20450 {
20451 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20452
20453 /* We may be reversing unordered compare to normal compare, that
20454 is not valid in general (we may convert non-trapping condition
20455 to trapping one), however on i386 we currently emit all
20456 comparisons unordered. */
20457 compare_code = reverse_condition_maybe_unordered (compare_code);
20458 code = reverse_condition_maybe_unordered (code);
20459 }
20460 else
20461 {
20462 compare_code = reverse_condition (compare_code);
20463 code = reverse_condition (code);
20464 }
20465 }
20466
20467 compare_code = UNKNOWN;
20468 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20469 && CONST_INT_P (op1))
20470 {
20471 if (op1 == const0_rtx
20472 && (code == LT || code == GE))
20473 compare_code = code;
20474 else if (op1 == constm1_rtx)
20475 {
20476 if (code == LE)
20477 compare_code = LT;
20478 else if (code == GT)
20479 compare_code = GE;
20480 }
20481 }
20482
20483 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20484 if (compare_code != UNKNOWN
20485 && GET_MODE (op0) == GET_MODE (out)
20486 && (cf == -1 || ct == -1))
20487 {
20488 /* If lea code below could be used, only optimize
20489 if it results in a 2 insn sequence. */
20490
20491 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20492 || diff == 3 || diff == 5 || diff == 9)
20493 || (compare_code == LT && ct == -1)
20494 || (compare_code == GE && cf == -1))
20495 {
20496 /*
20497 * notl op1 (if necessary)
20498 * sarl $31, op1
20499 * orl cf, op1
20500 */
20501 if (ct != -1)
20502 {
20503 cf = ct;
20504 ct = -1;
20505 code = reverse_condition (code);
20506 }
20507
20508 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20509
20510 out = expand_simple_binop (mode, IOR,
20511 out, GEN_INT (cf),
20512 out, 1, OPTAB_DIRECT);
20513 if (out != operands[0])
20514 emit_move_insn (operands[0], out);
20515
20516 return true;
20517 }
20518 }
20519
20520
20521 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20522 || diff == 3 || diff == 5 || diff == 9)
20523 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20524 && (mode != DImode
20525 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20526 {
20527 /*
20528 * xorl dest,dest
20529 * cmpl op1,op2
20530 * setcc dest
20531 * lea cf(dest*(ct-cf)),dest
20532 *
20533 * Size 14.
20534 *
20535 * This also catches the degenerate setcc-only case.
20536 */
20537
20538 rtx tmp;
20539 int nops;
20540
20541 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20542
20543 nops = 0;
20544 /* On x86_64 the lea instruction operates on Pmode, so we need
20545 to get arithmetics done in proper mode to match. */
20546 if (diff == 1)
20547 tmp = copy_rtx (out);
20548 else
20549 {
20550 rtx out1;
20551 out1 = copy_rtx (out);
20552 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20553 nops++;
20554 if (diff & 1)
20555 {
20556 tmp = gen_rtx_PLUS (mode, tmp, out1);
20557 nops++;
20558 }
20559 }
20560 if (cf != 0)
20561 {
20562 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20563 nops++;
20564 }
20565 if (!rtx_equal_p (tmp, out))
20566 {
20567 if (nops == 1)
20568 out = force_operand (tmp, copy_rtx (out));
20569 else
20570 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20571 }
20572 if (!rtx_equal_p (out, operands[0]))
20573 emit_move_insn (operands[0], copy_rtx (out));
20574
20575 return true;
20576 }
20577
20578 /*
20579 * General case: Jumpful:
20580 * xorl dest,dest cmpl op1, op2
20581 * cmpl op1, op2 movl ct, dest
20582 * setcc dest jcc 1f
20583 * decl dest movl cf, dest
20584 * andl (cf-ct),dest 1:
20585 * addl ct,dest
20586 *
20587 * Size 20. Size 14.
20588 *
20589 * This is reasonably steep, but branch mispredict costs are
20590 * high on modern cpus, so consider failing only if optimizing
20591 * for space.
20592 */
20593
20594 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20595 && BRANCH_COST (optimize_insn_for_speed_p (),
20596 false) >= 2)
20597 {
20598 if (cf == 0)
20599 {
20600 enum machine_mode cmp_mode = GET_MODE (op0);
20601
20602 cf = ct;
20603 ct = 0;
20604
20605 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20606 {
20607 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20608
20609 /* We may be reversing unordered compare to normal compare,
20610 that is not valid in general (we may convert non-trapping
20611 condition to trapping one), however on i386 we currently
20612 emit all comparisons unordered. */
20613 code = reverse_condition_maybe_unordered (code);
20614 }
20615 else
20616 {
20617 code = reverse_condition (code);
20618 if (compare_code != UNKNOWN)
20619 compare_code = reverse_condition (compare_code);
20620 }
20621 }
20622
20623 if (compare_code != UNKNOWN)
20624 {
20625 /* notl op1 (if needed)
20626 sarl $31, op1
20627 andl (cf-ct), op1
20628 addl ct, op1
20629
20630 For x < 0 (resp. x <= -1) there will be no notl,
20631 so if possible swap the constants to get rid of the
20632 complement.
20633 True/false will be -1/0 while code below (store flag
20634 followed by decrement) is 0/-1, so the constants need
20635 to be exchanged once more. */
20636
20637 if (compare_code == GE || !cf)
20638 {
20639 code = reverse_condition (code);
20640 compare_code = LT;
20641 }
20642 else
20643 {
20644 HOST_WIDE_INT tmp = cf;
20645 cf = ct;
20646 ct = tmp;
20647 }
20648
20649 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20650 }
20651 else
20652 {
20653 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20654
20655 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20656 constm1_rtx,
20657 copy_rtx (out), 1, OPTAB_DIRECT);
20658 }
20659
20660 out = expand_simple_binop (mode, AND, copy_rtx (out),
20661 gen_int_mode (cf - ct, mode),
20662 copy_rtx (out), 1, OPTAB_DIRECT);
20663 if (ct)
20664 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20665 copy_rtx (out), 1, OPTAB_DIRECT);
20666 if (!rtx_equal_p (out, operands[0]))
20667 emit_move_insn (operands[0], copy_rtx (out));
20668
20669 return true;
20670 }
20671 }
20672
20673 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20674 {
20675 /* Try a few things more with specific constants and a variable. */
20676
20677 optab op;
20678 rtx var, orig_out, out, tmp;
20679
20680 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20681 return false;
20682
20683 /* If one of the two operands is an interesting constant, load a
20684 constant with the above and mask it in with a logical operation. */
20685
20686 if (CONST_INT_P (operands[2]))
20687 {
20688 var = operands[3];
20689 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20690 operands[3] = constm1_rtx, op = and_optab;
20691 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20692 operands[3] = const0_rtx, op = ior_optab;
20693 else
20694 return false;
20695 }
20696 else if (CONST_INT_P (operands[3]))
20697 {
20698 var = operands[2];
20699 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20700 operands[2] = constm1_rtx, op = and_optab;
20701 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20702 operands[2] = const0_rtx, op = ior_optab;
20703 else
20704 return false;
20705 }
20706 else
20707 return false;
20708
20709 orig_out = operands[0];
20710 tmp = gen_reg_rtx (mode);
20711 operands[0] = tmp;
20712
20713 /* Recurse to get the constant loaded. */
20714 if (ix86_expand_int_movcc (operands) == 0)
20715 return false;
20716
20717 /* Mask in the interesting variable. */
20718 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20719 OPTAB_WIDEN);
20720 if (!rtx_equal_p (out, orig_out))
20721 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20722
20723 return true;
20724 }
20725
20726 /*
20727 * For comparison with above,
20728 *
20729 * movl cf,dest
20730 * movl ct,tmp
20731 * cmpl op1,op2
20732 * cmovcc tmp,dest
20733 *
20734 * Size 15.
20735 */
20736
20737 if (! nonimmediate_operand (operands[2], mode))
20738 operands[2] = force_reg (mode, operands[2]);
20739 if (! nonimmediate_operand (operands[3], mode))
20740 operands[3] = force_reg (mode, operands[3]);
20741
20742 if (! register_operand (operands[2], VOIDmode)
20743 && (mode == QImode
20744 || ! register_operand (operands[3], VOIDmode)))
20745 operands[2] = force_reg (mode, operands[2]);
20746
20747 if (mode == QImode
20748 && ! register_operand (operands[3], VOIDmode))
20749 operands[3] = force_reg (mode, operands[3]);
20750
20751 emit_insn (compare_seq);
20752 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20753 gen_rtx_IF_THEN_ELSE (mode,
20754 compare_op, operands[2],
20755 operands[3])));
20756 return true;
20757 }
20758
20759 /* Swap, force into registers, or otherwise massage the two operands
20760 to an sse comparison with a mask result. Thus we differ a bit from
20761 ix86_prepare_fp_compare_args which expects to produce a flags result.
20762
20763 The DEST operand exists to help determine whether to commute commutative
20764 operators. The POP0/POP1 operands are updated in place. The new
20765 comparison code is returned, or UNKNOWN if not implementable. */
20766
20767 static enum rtx_code
20768 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20769 rtx *pop0, rtx *pop1)
20770 {
20771 rtx tmp;
20772
20773 switch (code)
20774 {
20775 case LTGT:
20776 case UNEQ:
20777 /* AVX supports all the needed comparisons. */
20778 if (TARGET_AVX)
20779 break;
20780 /* We have no LTGT as an operator. We could implement it with
20781 NE & ORDERED, but this requires an extra temporary. It's
20782 not clear that it's worth it. */
20783 return UNKNOWN;
20784
20785 case LT:
20786 case LE:
20787 case UNGT:
20788 case UNGE:
20789 /* These are supported directly. */
20790 break;
20791
20792 case EQ:
20793 case NE:
20794 case UNORDERED:
20795 case ORDERED:
20796 /* AVX has 3 operand comparisons, no need to swap anything. */
20797 if (TARGET_AVX)
20798 break;
20799 /* For commutative operators, try to canonicalize the destination
20800 operand to be first in the comparison - this helps reload to
20801 avoid extra moves. */
20802 if (!dest || !rtx_equal_p (dest, *pop1))
20803 break;
20804 /* FALLTHRU */
20805
20806 case GE:
20807 case GT:
20808 case UNLE:
20809 case UNLT:
20810 /* These are not supported directly before AVX, and furthermore
20811 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20812 comparison operands to transform into something that is
20813 supported. */
20814 tmp = *pop0;
20815 *pop0 = *pop1;
20816 *pop1 = tmp;
20817 code = swap_condition (code);
20818 break;
20819
20820 default:
20821 gcc_unreachable ();
20822 }
20823
20824 return code;
20825 }
20826
20827 /* Detect conditional moves that exactly match min/max operational
20828 semantics. Note that this is IEEE safe, as long as we don't
20829 interchange the operands.
20830
20831 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20832 and TRUE if the operation is successful and instructions are emitted. */
20833
20834 static bool
20835 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20836 rtx cmp_op1, rtx if_true, rtx if_false)
20837 {
20838 enum machine_mode mode;
20839 bool is_min;
20840 rtx tmp;
20841
20842 if (code == LT)
20843 ;
20844 else if (code == UNGE)
20845 {
20846 tmp = if_true;
20847 if_true = if_false;
20848 if_false = tmp;
20849 }
20850 else
20851 return false;
20852
20853 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20854 is_min = true;
20855 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20856 is_min = false;
20857 else
20858 return false;
20859
20860 mode = GET_MODE (dest);
20861
20862 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20863 but MODE may be a vector mode and thus not appropriate. */
20864 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20865 {
20866 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20867 rtvec v;
20868
20869 if_true = force_reg (mode, if_true);
20870 v = gen_rtvec (2, if_true, if_false);
20871 tmp = gen_rtx_UNSPEC (mode, v, u);
20872 }
20873 else
20874 {
20875 code = is_min ? SMIN : SMAX;
20876 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20877 }
20878
20879 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20880 return true;
20881 }
20882
20883 /* Expand an sse vector comparison. Return the register with the result. */
20884
20885 static rtx
20886 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20887 rtx op_true, rtx op_false)
20888 {
20889 enum machine_mode mode = GET_MODE (dest);
20890 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20891
20892 /* In general case result of comparison can differ from operands' type. */
20893 enum machine_mode cmp_mode;
20894
20895 /* In AVX512F the result of comparison is an integer mask. */
20896 bool maskcmp = false;
20897 rtx x;
20898
20899 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20900 {
20901 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20902 gcc_assert (cmp_mode != BLKmode);
20903
20904 maskcmp = true;
20905 }
20906 else
20907 cmp_mode = cmp_ops_mode;
20908
20909
20910 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20911 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20912 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20913
20914 if (optimize
20915 || reg_overlap_mentioned_p (dest, op_true)
20916 || reg_overlap_mentioned_p (dest, op_false))
20917 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20918
20919 /* Compare patterns for int modes are unspec in AVX512F only. */
20920 if (maskcmp && (code == GT || code == EQ))
20921 {
20922 rtx (*gen)(rtx, rtx, rtx);
20923
20924 switch (cmp_ops_mode)
20925 {
20926 case V16SImode:
20927 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20928 break;
20929 case V8DImode:
20930 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20931 break;
20932 default:
20933 gen = NULL;
20934 }
20935
20936 if (gen)
20937 {
20938 emit_insn (gen (dest, cmp_op0, cmp_op1));
20939 return dest;
20940 }
20941 }
20942 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20943
20944 if (cmp_mode != mode && !maskcmp)
20945 {
20946 x = force_reg (cmp_ops_mode, x);
20947 convert_move (dest, x, false);
20948 }
20949 else
20950 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20951
20952 return dest;
20953 }
20954
20955 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20956 operations. This is used for both scalar and vector conditional moves. */
20957
20958 static void
20959 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20960 {
20961 enum machine_mode mode = GET_MODE (dest);
20962 enum machine_mode cmpmode = GET_MODE (cmp);
20963
20964 /* In AVX512F the result of comparison is an integer mask. */
20965 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20966
20967 rtx t2, t3, x;
20968
20969 if (vector_all_ones_operand (op_true, mode)
20970 && rtx_equal_p (op_false, CONST0_RTX (mode))
20971 && !maskcmp)
20972 {
20973 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20974 }
20975 else if (op_false == CONST0_RTX (mode)
20976 && !maskcmp)
20977 {
20978 op_true = force_reg (mode, op_true);
20979 x = gen_rtx_AND (mode, cmp, op_true);
20980 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20981 }
20982 else if (op_true == CONST0_RTX (mode)
20983 && !maskcmp)
20984 {
20985 op_false = force_reg (mode, op_false);
20986 x = gen_rtx_NOT (mode, cmp);
20987 x = gen_rtx_AND (mode, x, op_false);
20988 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20989 }
20990 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20991 && !maskcmp)
20992 {
20993 op_false = force_reg (mode, op_false);
20994 x = gen_rtx_IOR (mode, cmp, op_false);
20995 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20996 }
20997 else if (TARGET_XOP
20998 && !maskcmp)
20999 {
21000 op_true = force_reg (mode, op_true);
21001
21002 if (!nonimmediate_operand (op_false, mode))
21003 op_false = force_reg (mode, op_false);
21004
21005 emit_insn (gen_rtx_SET (mode, dest,
21006 gen_rtx_IF_THEN_ELSE (mode, cmp,
21007 op_true,
21008 op_false)));
21009 }
21010 else
21011 {
21012 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21013 rtx d = dest;
21014
21015 if (!nonimmediate_operand (op_true, mode))
21016 op_true = force_reg (mode, op_true);
21017
21018 op_false = force_reg (mode, op_false);
21019
21020 switch (mode)
21021 {
21022 case V4SFmode:
21023 if (TARGET_SSE4_1)
21024 gen = gen_sse4_1_blendvps;
21025 break;
21026 case V2DFmode:
21027 if (TARGET_SSE4_1)
21028 gen = gen_sse4_1_blendvpd;
21029 break;
21030 case V16QImode:
21031 case V8HImode:
21032 case V4SImode:
21033 case V2DImode:
21034 if (TARGET_SSE4_1)
21035 {
21036 gen = gen_sse4_1_pblendvb;
21037 if (mode != V16QImode)
21038 d = gen_reg_rtx (V16QImode);
21039 op_false = gen_lowpart (V16QImode, op_false);
21040 op_true = gen_lowpart (V16QImode, op_true);
21041 cmp = gen_lowpart (V16QImode, cmp);
21042 }
21043 break;
21044 case V8SFmode:
21045 if (TARGET_AVX)
21046 gen = gen_avx_blendvps256;
21047 break;
21048 case V4DFmode:
21049 if (TARGET_AVX)
21050 gen = gen_avx_blendvpd256;
21051 break;
21052 case V32QImode:
21053 case V16HImode:
21054 case V8SImode:
21055 case V4DImode:
21056 if (TARGET_AVX2)
21057 {
21058 gen = gen_avx2_pblendvb;
21059 if (mode != V32QImode)
21060 d = gen_reg_rtx (V32QImode);
21061 op_false = gen_lowpart (V32QImode, op_false);
21062 op_true = gen_lowpart (V32QImode, op_true);
21063 cmp = gen_lowpart (V32QImode, cmp);
21064 }
21065 break;
21066
21067 case V64QImode:
21068 gen = gen_avx512bw_blendmv64qi;
21069 break;
21070 case V32HImode:
21071 gen = gen_avx512bw_blendmv32hi;
21072 break;
21073 case V16SImode:
21074 gen = gen_avx512f_blendmv16si;
21075 break;
21076 case V8DImode:
21077 gen = gen_avx512f_blendmv8di;
21078 break;
21079 case V8DFmode:
21080 gen = gen_avx512f_blendmv8df;
21081 break;
21082 case V16SFmode:
21083 gen = gen_avx512f_blendmv16sf;
21084 break;
21085
21086 default:
21087 break;
21088 }
21089
21090 if (gen != NULL)
21091 {
21092 emit_insn (gen (d, op_false, op_true, cmp));
21093 if (d != dest)
21094 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21095 }
21096 else
21097 {
21098 op_true = force_reg (mode, op_true);
21099
21100 t2 = gen_reg_rtx (mode);
21101 if (optimize)
21102 t3 = gen_reg_rtx (mode);
21103 else
21104 t3 = dest;
21105
21106 x = gen_rtx_AND (mode, op_true, cmp);
21107 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21108
21109 x = gen_rtx_NOT (mode, cmp);
21110 x = gen_rtx_AND (mode, x, op_false);
21111 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21112
21113 x = gen_rtx_IOR (mode, t3, t2);
21114 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21115 }
21116 }
21117 }
21118
21119 /* Expand a floating-point conditional move. Return true if successful. */
21120
21121 bool
21122 ix86_expand_fp_movcc (rtx operands[])
21123 {
21124 enum machine_mode mode = GET_MODE (operands[0]);
21125 enum rtx_code code = GET_CODE (operands[1]);
21126 rtx tmp, compare_op;
21127 rtx op0 = XEXP (operands[1], 0);
21128 rtx op1 = XEXP (operands[1], 1);
21129
21130 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21131 {
21132 enum machine_mode cmode;
21133
21134 /* Since we've no cmove for sse registers, don't force bad register
21135 allocation just to gain access to it. Deny movcc when the
21136 comparison mode doesn't match the move mode. */
21137 cmode = GET_MODE (op0);
21138 if (cmode == VOIDmode)
21139 cmode = GET_MODE (op1);
21140 if (cmode != mode)
21141 return false;
21142
21143 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21144 if (code == UNKNOWN)
21145 return false;
21146
21147 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21148 operands[2], operands[3]))
21149 return true;
21150
21151 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21152 operands[2], operands[3]);
21153 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21154 return true;
21155 }
21156
21157 if (GET_MODE (op0) == TImode
21158 || (GET_MODE (op0) == DImode
21159 && !TARGET_64BIT))
21160 return false;
21161
21162 /* The floating point conditional move instructions don't directly
21163 support conditions resulting from a signed integer comparison. */
21164
21165 compare_op = ix86_expand_compare (code, op0, op1);
21166 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21167 {
21168 tmp = gen_reg_rtx (QImode);
21169 ix86_expand_setcc (tmp, code, op0, op1);
21170
21171 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21172 }
21173
21174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21175 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21176 operands[2], operands[3])));
21177
21178 return true;
21179 }
21180
21181 /* Expand a floating-point vector conditional move; a vcond operation
21182 rather than a movcc operation. */
21183
21184 bool
21185 ix86_expand_fp_vcond (rtx operands[])
21186 {
21187 enum rtx_code code = GET_CODE (operands[3]);
21188 rtx cmp;
21189
21190 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21191 &operands[4], &operands[5]);
21192 if (code == UNKNOWN)
21193 {
21194 rtx temp;
21195 switch (GET_CODE (operands[3]))
21196 {
21197 case LTGT:
21198 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21199 operands[5], operands[0], operands[0]);
21200 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21201 operands[5], operands[1], operands[2]);
21202 code = AND;
21203 break;
21204 case UNEQ:
21205 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21206 operands[5], operands[0], operands[0]);
21207 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21208 operands[5], operands[1], operands[2]);
21209 code = IOR;
21210 break;
21211 default:
21212 gcc_unreachable ();
21213 }
21214 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21215 OPTAB_DIRECT);
21216 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21217 return true;
21218 }
21219
21220 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21221 operands[5], operands[1], operands[2]))
21222 return true;
21223
21224 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21225 operands[1], operands[2]);
21226 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21227 return true;
21228 }
21229
21230 /* Expand a signed/unsigned integral vector conditional move. */
21231
21232 bool
21233 ix86_expand_int_vcond (rtx operands[])
21234 {
21235 enum machine_mode data_mode = GET_MODE (operands[0]);
21236 enum machine_mode mode = GET_MODE (operands[4]);
21237 enum rtx_code code = GET_CODE (operands[3]);
21238 bool negate = false;
21239 rtx x, cop0, cop1;
21240
21241 cop0 = operands[4];
21242 cop1 = operands[5];
21243
21244 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21245 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21246 if ((code == LT || code == GE)
21247 && data_mode == mode
21248 && cop1 == CONST0_RTX (mode)
21249 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21250 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21251 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21252 && (GET_MODE_SIZE (data_mode) == 16
21253 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21254 {
21255 rtx negop = operands[2 - (code == LT)];
21256 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21257 if (negop == CONST1_RTX (data_mode))
21258 {
21259 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21260 operands[0], 1, OPTAB_DIRECT);
21261 if (res != operands[0])
21262 emit_move_insn (operands[0], res);
21263 return true;
21264 }
21265 else if (GET_MODE_INNER (data_mode) != DImode
21266 && vector_all_ones_operand (negop, data_mode))
21267 {
21268 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21269 operands[0], 0, OPTAB_DIRECT);
21270 if (res != operands[0])
21271 emit_move_insn (operands[0], res);
21272 return true;
21273 }
21274 }
21275
21276 if (!nonimmediate_operand (cop1, mode))
21277 cop1 = force_reg (mode, cop1);
21278 if (!general_operand (operands[1], data_mode))
21279 operands[1] = force_reg (data_mode, operands[1]);
21280 if (!general_operand (operands[2], data_mode))
21281 operands[2] = force_reg (data_mode, operands[2]);
21282
21283 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21284 if (TARGET_XOP
21285 && (mode == V16QImode || mode == V8HImode
21286 || mode == V4SImode || mode == V2DImode))
21287 ;
21288 else
21289 {
21290 /* Canonicalize the comparison to EQ, GT, GTU. */
21291 switch (code)
21292 {
21293 case EQ:
21294 case GT:
21295 case GTU:
21296 break;
21297
21298 case NE:
21299 case LE:
21300 case LEU:
21301 code = reverse_condition (code);
21302 negate = true;
21303 break;
21304
21305 case GE:
21306 case GEU:
21307 code = reverse_condition (code);
21308 negate = true;
21309 /* FALLTHRU */
21310
21311 case LT:
21312 case LTU:
21313 code = swap_condition (code);
21314 x = cop0, cop0 = cop1, cop1 = x;
21315 break;
21316
21317 default:
21318 gcc_unreachable ();
21319 }
21320
21321 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21322 if (mode == V2DImode)
21323 {
21324 switch (code)
21325 {
21326 case EQ:
21327 /* SSE4.1 supports EQ. */
21328 if (!TARGET_SSE4_1)
21329 return false;
21330 break;
21331
21332 case GT:
21333 case GTU:
21334 /* SSE4.2 supports GT/GTU. */
21335 if (!TARGET_SSE4_2)
21336 return false;
21337 break;
21338
21339 default:
21340 gcc_unreachable ();
21341 }
21342 }
21343
21344 /* Unsigned parallel compare is not supported by the hardware.
21345 Play some tricks to turn this into a signed comparison
21346 against 0. */
21347 if (code == GTU)
21348 {
21349 cop0 = force_reg (mode, cop0);
21350
21351 switch (mode)
21352 {
21353 case V16SImode:
21354 case V8DImode:
21355 case V8SImode:
21356 case V4DImode:
21357 case V4SImode:
21358 case V2DImode:
21359 {
21360 rtx t1, t2, mask;
21361 rtx (*gen_sub3) (rtx, rtx, rtx);
21362
21363 switch (mode)
21364 {
21365 case V16SImode: gen_sub3 = gen_subv16si3; break;
21366 case V8DImode: gen_sub3 = gen_subv8di3; break;
21367 case V8SImode: gen_sub3 = gen_subv8si3; break;
21368 case V4DImode: gen_sub3 = gen_subv4di3; break;
21369 case V4SImode: gen_sub3 = gen_subv4si3; break;
21370 case V2DImode: gen_sub3 = gen_subv2di3; break;
21371 default:
21372 gcc_unreachable ();
21373 }
21374 /* Subtract (-(INT MAX) - 1) from both operands to make
21375 them signed. */
21376 mask = ix86_build_signbit_mask (mode, true, false);
21377 t1 = gen_reg_rtx (mode);
21378 emit_insn (gen_sub3 (t1, cop0, mask));
21379
21380 t2 = gen_reg_rtx (mode);
21381 emit_insn (gen_sub3 (t2, cop1, mask));
21382
21383 cop0 = t1;
21384 cop1 = t2;
21385 code = GT;
21386 }
21387 break;
21388
21389 case V64QImode:
21390 case V32HImode:
21391 case V32QImode:
21392 case V16HImode:
21393 case V16QImode:
21394 case V8HImode:
21395 /* Perform a parallel unsigned saturating subtraction. */
21396 x = gen_reg_rtx (mode);
21397 emit_insn (gen_rtx_SET (VOIDmode, x,
21398 gen_rtx_US_MINUS (mode, cop0, cop1)));
21399
21400 cop0 = x;
21401 cop1 = CONST0_RTX (mode);
21402 code = EQ;
21403 negate = !negate;
21404 break;
21405
21406 default:
21407 gcc_unreachable ();
21408 }
21409 }
21410 }
21411
21412 /* Allow the comparison to be done in one mode, but the movcc to
21413 happen in another mode. */
21414 if (data_mode == mode)
21415 {
21416 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21417 operands[1+negate], operands[2-negate]);
21418 }
21419 else
21420 {
21421 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21422 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21423 operands[1+negate], operands[2-negate]);
21424 if (GET_MODE (x) == mode)
21425 x = gen_lowpart (data_mode, x);
21426 }
21427
21428 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21429 operands[2-negate]);
21430 return true;
21431 }
21432
21433 /* AVX512F does support 64-byte integer vector operations,
21434 thus the longest vector we are faced with is V64QImode. */
21435 #define MAX_VECT_LEN 64
21436
21437 struct expand_vec_perm_d
21438 {
21439 rtx target, op0, op1;
21440 unsigned char perm[MAX_VECT_LEN];
21441 enum machine_mode vmode;
21442 unsigned char nelt;
21443 bool one_operand_p;
21444 bool testing_p;
21445 };
21446
21447 static bool
21448 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21449 struct expand_vec_perm_d *d)
21450 {
21451 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21452 expander, so args are either in d, or in op0, op1 etc. */
21453 enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
21454 enum machine_mode maskmode = mode;
21455 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21456
21457 switch (mode)
21458 {
21459 case V8HImode:
21460 if (TARGET_AVX512VL && TARGET_AVX512BW)
21461 gen = gen_avx512vl_vpermi2varv8hi3;
21462 break;
21463 case V16HImode:
21464 if (TARGET_AVX512VL && TARGET_AVX512BW)
21465 gen = gen_avx512vl_vpermi2varv16hi3;
21466 break;
21467 case V32HImode:
21468 if (TARGET_AVX512BW)
21469 gen = gen_avx512bw_vpermi2varv32hi3;
21470 break;
21471 case V4SImode:
21472 if (TARGET_AVX512VL)
21473 gen = gen_avx512vl_vpermi2varv4si3;
21474 break;
21475 case V8SImode:
21476 if (TARGET_AVX512VL)
21477 gen = gen_avx512vl_vpermi2varv8si3;
21478 break;
21479 case V16SImode:
21480 if (TARGET_AVX512F)
21481 gen = gen_avx512f_vpermi2varv16si3;
21482 break;
21483 case V4SFmode:
21484 if (TARGET_AVX512VL)
21485 {
21486 gen = gen_avx512vl_vpermi2varv4sf3;
21487 maskmode = V4SImode;
21488 }
21489 break;
21490 case V8SFmode:
21491 if (TARGET_AVX512VL)
21492 {
21493 gen = gen_avx512vl_vpermi2varv8sf3;
21494 maskmode = V8SImode;
21495 }
21496 break;
21497 case V16SFmode:
21498 if (TARGET_AVX512F)
21499 {
21500 gen = gen_avx512f_vpermi2varv16sf3;
21501 maskmode = V16SImode;
21502 }
21503 break;
21504 case V2DImode:
21505 if (TARGET_AVX512VL)
21506 gen = gen_avx512vl_vpermi2varv2di3;
21507 break;
21508 case V4DImode:
21509 if (TARGET_AVX512VL)
21510 gen = gen_avx512vl_vpermi2varv4di3;
21511 break;
21512 case V8DImode:
21513 if (TARGET_AVX512F)
21514 gen = gen_avx512f_vpermi2varv8di3;
21515 break;
21516 case V2DFmode:
21517 if (TARGET_AVX512VL)
21518 {
21519 gen = gen_avx512vl_vpermi2varv2df3;
21520 maskmode = V2DImode;
21521 }
21522 break;
21523 case V4DFmode:
21524 if (TARGET_AVX512VL)
21525 {
21526 gen = gen_avx512vl_vpermi2varv4df3;
21527 maskmode = V4DImode;
21528 }
21529 break;
21530 case V8DFmode:
21531 if (TARGET_AVX512F)
21532 {
21533 gen = gen_avx512f_vpermi2varv8df3;
21534 maskmode = V8DImode;
21535 }
21536 break;
21537 default:
21538 break;
21539 }
21540
21541 if (gen == NULL)
21542 return false;
21543
21544 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21545 expander, so args are either in d, or in op0, op1 etc. */
21546 if (d)
21547 {
21548 rtx vec[64];
21549 target = d->target;
21550 op0 = d->op0;
21551 op1 = d->op1;
21552 for (int i = 0; i < d->nelt; ++i)
21553 vec[i] = GEN_INT (d->perm[i]);
21554 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21555 }
21556
21557 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21558 return true;
21559 }
21560
21561 /* Expand a variable vector permutation. */
21562
21563 void
21564 ix86_expand_vec_perm (rtx operands[])
21565 {
21566 rtx target = operands[0];
21567 rtx op0 = operands[1];
21568 rtx op1 = operands[2];
21569 rtx mask = operands[3];
21570 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21571 enum machine_mode mode = GET_MODE (op0);
21572 enum machine_mode maskmode = GET_MODE (mask);
21573 int w, e, i;
21574 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21575
21576 /* Number of elements in the vector. */
21577 w = GET_MODE_NUNITS (mode);
21578 e = GET_MODE_UNIT_SIZE (mode);
21579 gcc_assert (w <= 64);
21580
21581 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21582 return;
21583
21584 if (TARGET_AVX2)
21585 {
21586 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21587 {
21588 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21589 an constant shuffle operand. With a tiny bit of effort we can
21590 use VPERMD instead. A re-interpretation stall for V4DFmode is
21591 unfortunate but there's no avoiding it.
21592 Similarly for V16HImode we don't have instructions for variable
21593 shuffling, while for V32QImode we can use after preparing suitable
21594 masks vpshufb; vpshufb; vpermq; vpor. */
21595
21596 if (mode == V16HImode)
21597 {
21598 maskmode = mode = V32QImode;
21599 w = 32;
21600 e = 1;
21601 }
21602 else
21603 {
21604 maskmode = mode = V8SImode;
21605 w = 8;
21606 e = 4;
21607 }
21608 t1 = gen_reg_rtx (maskmode);
21609
21610 /* Replicate the low bits of the V4DImode mask into V8SImode:
21611 mask = { A B C D }
21612 t1 = { A A B B C C D D }. */
21613 for (i = 0; i < w / 2; ++i)
21614 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21615 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21616 vt = force_reg (maskmode, vt);
21617 mask = gen_lowpart (maskmode, mask);
21618 if (maskmode == V8SImode)
21619 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21620 else
21621 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21622
21623 /* Multiply the shuffle indicies by two. */
21624 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21625 OPTAB_DIRECT);
21626
21627 /* Add one to the odd shuffle indicies:
21628 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21629 for (i = 0; i < w / 2; ++i)
21630 {
21631 vec[i * 2] = const0_rtx;
21632 vec[i * 2 + 1] = const1_rtx;
21633 }
21634 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21635 vt = validize_mem (force_const_mem (maskmode, vt));
21636 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21637 OPTAB_DIRECT);
21638
21639 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21640 operands[3] = mask = t1;
21641 target = gen_reg_rtx (mode);
21642 op0 = gen_lowpart (mode, op0);
21643 op1 = gen_lowpart (mode, op1);
21644 }
21645
21646 switch (mode)
21647 {
21648 case V8SImode:
21649 /* The VPERMD and VPERMPS instructions already properly ignore
21650 the high bits of the shuffle elements. No need for us to
21651 perform an AND ourselves. */
21652 if (one_operand_shuffle)
21653 {
21654 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21655 if (target != operands[0])
21656 emit_move_insn (operands[0],
21657 gen_lowpart (GET_MODE (operands[0]), target));
21658 }
21659 else
21660 {
21661 t1 = gen_reg_rtx (V8SImode);
21662 t2 = gen_reg_rtx (V8SImode);
21663 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21664 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21665 goto merge_two;
21666 }
21667 return;
21668
21669 case V8SFmode:
21670 mask = gen_lowpart (V8SImode, mask);
21671 if (one_operand_shuffle)
21672 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21673 else
21674 {
21675 t1 = gen_reg_rtx (V8SFmode);
21676 t2 = gen_reg_rtx (V8SFmode);
21677 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21678 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21679 goto merge_two;
21680 }
21681 return;
21682
21683 case V4SImode:
21684 /* By combining the two 128-bit input vectors into one 256-bit
21685 input vector, we can use VPERMD and VPERMPS for the full
21686 two-operand shuffle. */
21687 t1 = gen_reg_rtx (V8SImode);
21688 t2 = gen_reg_rtx (V8SImode);
21689 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21690 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21691 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21692 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21693 return;
21694
21695 case V4SFmode:
21696 t1 = gen_reg_rtx (V8SFmode);
21697 t2 = gen_reg_rtx (V8SImode);
21698 mask = gen_lowpart (V4SImode, mask);
21699 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21700 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21701 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21702 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21703 return;
21704
21705 case V32QImode:
21706 t1 = gen_reg_rtx (V32QImode);
21707 t2 = gen_reg_rtx (V32QImode);
21708 t3 = gen_reg_rtx (V32QImode);
21709 vt2 = GEN_INT (-128);
21710 for (i = 0; i < 32; i++)
21711 vec[i] = vt2;
21712 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21713 vt = force_reg (V32QImode, vt);
21714 for (i = 0; i < 32; i++)
21715 vec[i] = i < 16 ? vt2 : const0_rtx;
21716 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21717 vt2 = force_reg (V32QImode, vt2);
21718 /* From mask create two adjusted masks, which contain the same
21719 bits as mask in the low 7 bits of each vector element.
21720 The first mask will have the most significant bit clear
21721 if it requests element from the same 128-bit lane
21722 and MSB set if it requests element from the other 128-bit lane.
21723 The second mask will have the opposite values of the MSB,
21724 and additionally will have its 128-bit lanes swapped.
21725 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21726 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21727 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21728 stands for other 12 bytes. */
21729 /* The bit whether element is from the same lane or the other
21730 lane is bit 4, so shift it up by 3 to the MSB position. */
21731 t5 = gen_reg_rtx (V4DImode);
21732 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21733 GEN_INT (3)));
21734 /* Clear MSB bits from the mask just in case it had them set. */
21735 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21736 /* After this t1 will have MSB set for elements from other lane. */
21737 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21738 /* Clear bits other than MSB. */
21739 emit_insn (gen_andv32qi3 (t1, t1, vt));
21740 /* Or in the lower bits from mask into t3. */
21741 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21742 /* And invert MSB bits in t1, so MSB is set for elements from the same
21743 lane. */
21744 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21745 /* Swap 128-bit lanes in t3. */
21746 t6 = gen_reg_rtx (V4DImode);
21747 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21748 const2_rtx, GEN_INT (3),
21749 const0_rtx, const1_rtx));
21750 /* And or in the lower bits from mask into t1. */
21751 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21752 if (one_operand_shuffle)
21753 {
21754 /* Each of these shuffles will put 0s in places where
21755 element from the other 128-bit lane is needed, otherwise
21756 will shuffle in the requested value. */
21757 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21758 gen_lowpart (V32QImode, t6)));
21759 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21760 /* For t3 the 128-bit lanes are swapped again. */
21761 t7 = gen_reg_rtx (V4DImode);
21762 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21763 const2_rtx, GEN_INT (3),
21764 const0_rtx, const1_rtx));
21765 /* And oring both together leads to the result. */
21766 emit_insn (gen_iorv32qi3 (target, t1,
21767 gen_lowpart (V32QImode, t7)));
21768 if (target != operands[0])
21769 emit_move_insn (operands[0],
21770 gen_lowpart (GET_MODE (operands[0]), target));
21771 return;
21772 }
21773
21774 t4 = gen_reg_rtx (V32QImode);
21775 /* Similarly to the above one_operand_shuffle code,
21776 just for repeated twice for each operand. merge_two:
21777 code will merge the two results together. */
21778 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21779 gen_lowpart (V32QImode, t6)));
21780 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21781 gen_lowpart (V32QImode, t6)));
21782 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21783 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21784 t7 = gen_reg_rtx (V4DImode);
21785 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21786 const2_rtx, GEN_INT (3),
21787 const0_rtx, const1_rtx));
21788 t8 = gen_reg_rtx (V4DImode);
21789 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21790 const2_rtx, GEN_INT (3),
21791 const0_rtx, const1_rtx));
21792 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21793 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21794 t1 = t4;
21795 t2 = t3;
21796 goto merge_two;
21797
21798 default:
21799 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21800 break;
21801 }
21802 }
21803
21804 if (TARGET_XOP)
21805 {
21806 /* The XOP VPPERM insn supports three inputs. By ignoring the
21807 one_operand_shuffle special case, we avoid creating another
21808 set of constant vectors in memory. */
21809 one_operand_shuffle = false;
21810
21811 /* mask = mask & {2*w-1, ...} */
21812 vt = GEN_INT (2*w - 1);
21813 }
21814 else
21815 {
21816 /* mask = mask & {w-1, ...} */
21817 vt = GEN_INT (w - 1);
21818 }
21819
21820 for (i = 0; i < w; i++)
21821 vec[i] = vt;
21822 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21823 mask = expand_simple_binop (maskmode, AND, mask, vt,
21824 NULL_RTX, 0, OPTAB_DIRECT);
21825
21826 /* For non-QImode operations, convert the word permutation control
21827 into a byte permutation control. */
21828 if (mode != V16QImode)
21829 {
21830 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21831 GEN_INT (exact_log2 (e)),
21832 NULL_RTX, 0, OPTAB_DIRECT);
21833
21834 /* Convert mask to vector of chars. */
21835 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21836
21837 /* Replicate each of the input bytes into byte positions:
21838 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21839 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21840 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21841 for (i = 0; i < 16; ++i)
21842 vec[i] = GEN_INT (i/e * e);
21843 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21844 vt = validize_mem (force_const_mem (V16QImode, vt));
21845 if (TARGET_XOP)
21846 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21847 else
21848 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21849
21850 /* Convert it into the byte positions by doing
21851 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21852 for (i = 0; i < 16; ++i)
21853 vec[i] = GEN_INT (i % e);
21854 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21855 vt = validize_mem (force_const_mem (V16QImode, vt));
21856 emit_insn (gen_addv16qi3 (mask, mask, vt));
21857 }
21858
21859 /* The actual shuffle operations all operate on V16QImode. */
21860 op0 = gen_lowpart (V16QImode, op0);
21861 op1 = gen_lowpart (V16QImode, op1);
21862
21863 if (TARGET_XOP)
21864 {
21865 if (GET_MODE (target) != V16QImode)
21866 target = gen_reg_rtx (V16QImode);
21867 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21868 if (target != operands[0])
21869 emit_move_insn (operands[0],
21870 gen_lowpart (GET_MODE (operands[0]), target));
21871 }
21872 else if (one_operand_shuffle)
21873 {
21874 if (GET_MODE (target) != V16QImode)
21875 target = gen_reg_rtx (V16QImode);
21876 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21877 if (target != operands[0])
21878 emit_move_insn (operands[0],
21879 gen_lowpart (GET_MODE (operands[0]), target));
21880 }
21881 else
21882 {
21883 rtx xops[6];
21884 bool ok;
21885
21886 /* Shuffle the two input vectors independently. */
21887 t1 = gen_reg_rtx (V16QImode);
21888 t2 = gen_reg_rtx (V16QImode);
21889 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21890 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21891
21892 merge_two:
21893 /* Then merge them together. The key is whether any given control
21894 element contained a bit set that indicates the second word. */
21895 mask = operands[3];
21896 vt = GEN_INT (w);
21897 if (maskmode == V2DImode && !TARGET_SSE4_1)
21898 {
21899 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21900 more shuffle to convert the V2DI input mask into a V4SI
21901 input mask. At which point the masking that expand_int_vcond
21902 will work as desired. */
21903 rtx t3 = gen_reg_rtx (V4SImode);
21904 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21905 const0_rtx, const0_rtx,
21906 const2_rtx, const2_rtx));
21907 mask = t3;
21908 maskmode = V4SImode;
21909 e = w = 4;
21910 }
21911
21912 for (i = 0; i < w; i++)
21913 vec[i] = vt;
21914 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21915 vt = force_reg (maskmode, vt);
21916 mask = expand_simple_binop (maskmode, AND, mask, vt,
21917 NULL_RTX, 0, OPTAB_DIRECT);
21918
21919 if (GET_MODE (target) != mode)
21920 target = gen_reg_rtx (mode);
21921 xops[0] = target;
21922 xops[1] = gen_lowpart (mode, t2);
21923 xops[2] = gen_lowpart (mode, t1);
21924 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21925 xops[4] = mask;
21926 xops[5] = vt;
21927 ok = ix86_expand_int_vcond (xops);
21928 gcc_assert (ok);
21929 if (target != operands[0])
21930 emit_move_insn (operands[0],
21931 gen_lowpart (GET_MODE (operands[0]), target));
21932 }
21933 }
21934
21935 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21936 true if we should do zero extension, else sign extension. HIGH_P is
21937 true if we want the N/2 high elements, else the low elements. */
21938
21939 void
21940 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21941 {
21942 enum machine_mode imode = GET_MODE (src);
21943 rtx tmp;
21944
21945 if (TARGET_SSE4_1)
21946 {
21947 rtx (*unpack)(rtx, rtx);
21948 rtx (*extract)(rtx, rtx) = NULL;
21949 enum machine_mode halfmode = BLKmode;
21950
21951 switch (imode)
21952 {
21953 case V64QImode:
21954 if (unsigned_p)
21955 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
21956 else
21957 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
21958 halfmode = V32QImode;
21959 extract
21960 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
21961 break;
21962 case V32QImode:
21963 if (unsigned_p)
21964 unpack = gen_avx2_zero_extendv16qiv16hi2;
21965 else
21966 unpack = gen_avx2_sign_extendv16qiv16hi2;
21967 halfmode = V16QImode;
21968 extract
21969 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21970 break;
21971 case V32HImode:
21972 if (unsigned_p)
21973 unpack = gen_avx512f_zero_extendv16hiv16si2;
21974 else
21975 unpack = gen_avx512f_sign_extendv16hiv16si2;
21976 halfmode = V16HImode;
21977 extract
21978 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21979 break;
21980 case V16HImode:
21981 if (unsigned_p)
21982 unpack = gen_avx2_zero_extendv8hiv8si2;
21983 else
21984 unpack = gen_avx2_sign_extendv8hiv8si2;
21985 halfmode = V8HImode;
21986 extract
21987 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21988 break;
21989 case V16SImode:
21990 if (unsigned_p)
21991 unpack = gen_avx512f_zero_extendv8siv8di2;
21992 else
21993 unpack = gen_avx512f_sign_extendv8siv8di2;
21994 halfmode = V8SImode;
21995 extract
21996 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21997 break;
21998 case V8SImode:
21999 if (unsigned_p)
22000 unpack = gen_avx2_zero_extendv4siv4di2;
22001 else
22002 unpack = gen_avx2_sign_extendv4siv4di2;
22003 halfmode = V4SImode;
22004 extract
22005 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22006 break;
22007 case V16QImode:
22008 if (unsigned_p)
22009 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22010 else
22011 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22012 break;
22013 case V8HImode:
22014 if (unsigned_p)
22015 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22016 else
22017 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22018 break;
22019 case V4SImode:
22020 if (unsigned_p)
22021 unpack = gen_sse4_1_zero_extendv2siv2di2;
22022 else
22023 unpack = gen_sse4_1_sign_extendv2siv2di2;
22024 break;
22025 default:
22026 gcc_unreachable ();
22027 }
22028
22029 if (GET_MODE_SIZE (imode) >= 32)
22030 {
22031 tmp = gen_reg_rtx (halfmode);
22032 emit_insn (extract (tmp, src));
22033 }
22034 else if (high_p)
22035 {
22036 /* Shift higher 8 bytes to lower 8 bytes. */
22037 tmp = gen_reg_rtx (V1TImode);
22038 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22039 GEN_INT (64)));
22040 tmp = gen_lowpart (imode, tmp);
22041 }
22042 else
22043 tmp = src;
22044
22045 emit_insn (unpack (dest, tmp));
22046 }
22047 else
22048 {
22049 rtx (*unpack)(rtx, rtx, rtx);
22050
22051 switch (imode)
22052 {
22053 case V16QImode:
22054 if (high_p)
22055 unpack = gen_vec_interleave_highv16qi;
22056 else
22057 unpack = gen_vec_interleave_lowv16qi;
22058 break;
22059 case V8HImode:
22060 if (high_p)
22061 unpack = gen_vec_interleave_highv8hi;
22062 else
22063 unpack = gen_vec_interleave_lowv8hi;
22064 break;
22065 case V4SImode:
22066 if (high_p)
22067 unpack = gen_vec_interleave_highv4si;
22068 else
22069 unpack = gen_vec_interleave_lowv4si;
22070 break;
22071 default:
22072 gcc_unreachable ();
22073 }
22074
22075 if (unsigned_p)
22076 tmp = force_reg (imode, CONST0_RTX (imode));
22077 else
22078 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22079 src, pc_rtx, pc_rtx);
22080
22081 rtx tmp2 = gen_reg_rtx (imode);
22082 emit_insn (unpack (tmp2, src, tmp));
22083 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22084 }
22085 }
22086
22087 /* Expand conditional increment or decrement using adb/sbb instructions.
22088 The default case using setcc followed by the conditional move can be
22089 done by generic code. */
22090 bool
22091 ix86_expand_int_addcc (rtx operands[])
22092 {
22093 enum rtx_code code = GET_CODE (operands[1]);
22094 rtx flags;
22095 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22096 rtx compare_op;
22097 rtx val = const0_rtx;
22098 bool fpcmp = false;
22099 enum machine_mode mode;
22100 rtx op0 = XEXP (operands[1], 0);
22101 rtx op1 = XEXP (operands[1], 1);
22102
22103 if (operands[3] != const1_rtx
22104 && operands[3] != constm1_rtx)
22105 return false;
22106 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22107 return false;
22108 code = GET_CODE (compare_op);
22109
22110 flags = XEXP (compare_op, 0);
22111
22112 if (GET_MODE (flags) == CCFPmode
22113 || GET_MODE (flags) == CCFPUmode)
22114 {
22115 fpcmp = true;
22116 code = ix86_fp_compare_code_to_integer (code);
22117 }
22118
22119 if (code != LTU)
22120 {
22121 val = constm1_rtx;
22122 if (fpcmp)
22123 PUT_CODE (compare_op,
22124 reverse_condition_maybe_unordered
22125 (GET_CODE (compare_op)));
22126 else
22127 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22128 }
22129
22130 mode = GET_MODE (operands[0]);
22131
22132 /* Construct either adc or sbb insn. */
22133 if ((code == LTU) == (operands[3] == constm1_rtx))
22134 {
22135 switch (mode)
22136 {
22137 case QImode:
22138 insn = gen_subqi3_carry;
22139 break;
22140 case HImode:
22141 insn = gen_subhi3_carry;
22142 break;
22143 case SImode:
22144 insn = gen_subsi3_carry;
22145 break;
22146 case DImode:
22147 insn = gen_subdi3_carry;
22148 break;
22149 default:
22150 gcc_unreachable ();
22151 }
22152 }
22153 else
22154 {
22155 switch (mode)
22156 {
22157 case QImode:
22158 insn = gen_addqi3_carry;
22159 break;
22160 case HImode:
22161 insn = gen_addhi3_carry;
22162 break;
22163 case SImode:
22164 insn = gen_addsi3_carry;
22165 break;
22166 case DImode:
22167 insn = gen_adddi3_carry;
22168 break;
22169 default:
22170 gcc_unreachable ();
22171 }
22172 }
22173 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22174
22175 return true;
22176 }
22177
22178
22179 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22180 but works for floating pointer parameters and nonoffsetable memories.
22181 For pushes, it returns just stack offsets; the values will be saved
22182 in the right order. Maximally three parts are generated. */
22183
22184 static int
22185 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22186 {
22187 int size;
22188
22189 if (!TARGET_64BIT)
22190 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22191 else
22192 size = (GET_MODE_SIZE (mode) + 4) / 8;
22193
22194 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22195 gcc_assert (size >= 2 && size <= 4);
22196
22197 /* Optimize constant pool reference to immediates. This is used by fp
22198 moves, that force all constants to memory to allow combining. */
22199 if (MEM_P (operand) && MEM_READONLY_P (operand))
22200 {
22201 rtx tmp = maybe_get_pool_constant (operand);
22202 if (tmp)
22203 operand = tmp;
22204 }
22205
22206 if (MEM_P (operand) && !offsettable_memref_p (operand))
22207 {
22208 /* The only non-offsetable memories we handle are pushes. */
22209 int ok = push_operand (operand, VOIDmode);
22210
22211 gcc_assert (ok);
22212
22213 operand = copy_rtx (operand);
22214 PUT_MODE (operand, word_mode);
22215 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22216 return size;
22217 }
22218
22219 if (GET_CODE (operand) == CONST_VECTOR)
22220 {
22221 enum machine_mode imode = int_mode_for_mode (mode);
22222 /* Caution: if we looked through a constant pool memory above,
22223 the operand may actually have a different mode now. That's
22224 ok, since we want to pun this all the way back to an integer. */
22225 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22226 gcc_assert (operand != NULL);
22227 mode = imode;
22228 }
22229
22230 if (!TARGET_64BIT)
22231 {
22232 if (mode == DImode)
22233 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22234 else
22235 {
22236 int i;
22237
22238 if (REG_P (operand))
22239 {
22240 gcc_assert (reload_completed);
22241 for (i = 0; i < size; i++)
22242 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22243 }
22244 else if (offsettable_memref_p (operand))
22245 {
22246 operand = adjust_address (operand, SImode, 0);
22247 parts[0] = operand;
22248 for (i = 1; i < size; i++)
22249 parts[i] = adjust_address (operand, SImode, 4 * i);
22250 }
22251 else if (GET_CODE (operand) == CONST_DOUBLE)
22252 {
22253 REAL_VALUE_TYPE r;
22254 long l[4];
22255
22256 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22257 switch (mode)
22258 {
22259 case TFmode:
22260 real_to_target (l, &r, mode);
22261 parts[3] = gen_int_mode (l[3], SImode);
22262 parts[2] = gen_int_mode (l[2], SImode);
22263 break;
22264 case XFmode:
22265 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22266 long double may not be 80-bit. */
22267 real_to_target (l, &r, mode);
22268 parts[2] = gen_int_mode (l[2], SImode);
22269 break;
22270 case DFmode:
22271 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22272 break;
22273 default:
22274 gcc_unreachable ();
22275 }
22276 parts[1] = gen_int_mode (l[1], SImode);
22277 parts[0] = gen_int_mode (l[0], SImode);
22278 }
22279 else
22280 gcc_unreachable ();
22281 }
22282 }
22283 else
22284 {
22285 if (mode == TImode)
22286 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22287 if (mode == XFmode || mode == TFmode)
22288 {
22289 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22290 if (REG_P (operand))
22291 {
22292 gcc_assert (reload_completed);
22293 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22294 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22295 }
22296 else if (offsettable_memref_p (operand))
22297 {
22298 operand = adjust_address (operand, DImode, 0);
22299 parts[0] = operand;
22300 parts[1] = adjust_address (operand, upper_mode, 8);
22301 }
22302 else if (GET_CODE (operand) == CONST_DOUBLE)
22303 {
22304 REAL_VALUE_TYPE r;
22305 long l[4];
22306
22307 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22308 real_to_target (l, &r, mode);
22309
22310 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22311 if (HOST_BITS_PER_WIDE_INT >= 64)
22312 parts[0]
22313 = gen_int_mode
22314 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22315 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22316 DImode);
22317 else
22318 parts[0] = immed_double_const (l[0], l[1], DImode);
22319
22320 if (upper_mode == SImode)
22321 parts[1] = gen_int_mode (l[2], SImode);
22322 else if (HOST_BITS_PER_WIDE_INT >= 64)
22323 parts[1]
22324 = gen_int_mode
22325 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22326 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22327 DImode);
22328 else
22329 parts[1] = immed_double_const (l[2], l[3], DImode);
22330 }
22331 else
22332 gcc_unreachable ();
22333 }
22334 }
22335
22336 return size;
22337 }
22338
22339 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22340 Return false when normal moves are needed; true when all required
22341 insns have been emitted. Operands 2-4 contain the input values
22342 int the correct order; operands 5-7 contain the output values. */
22343
22344 void
22345 ix86_split_long_move (rtx operands[])
22346 {
22347 rtx part[2][4];
22348 int nparts, i, j;
22349 int push = 0;
22350 int collisions = 0;
22351 enum machine_mode mode = GET_MODE (operands[0]);
22352 bool collisionparts[4];
22353
22354 /* The DFmode expanders may ask us to move double.
22355 For 64bit target this is single move. By hiding the fact
22356 here we simplify i386.md splitters. */
22357 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22358 {
22359 /* Optimize constant pool reference to immediates. This is used by
22360 fp moves, that force all constants to memory to allow combining. */
22361
22362 if (MEM_P (operands[1])
22363 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22364 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22365 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22366 if (push_operand (operands[0], VOIDmode))
22367 {
22368 operands[0] = copy_rtx (operands[0]);
22369 PUT_MODE (operands[0], word_mode);
22370 }
22371 else
22372 operands[0] = gen_lowpart (DImode, operands[0]);
22373 operands[1] = gen_lowpart (DImode, operands[1]);
22374 emit_move_insn (operands[0], operands[1]);
22375 return;
22376 }
22377
22378 /* The only non-offsettable memory we handle is push. */
22379 if (push_operand (operands[0], VOIDmode))
22380 push = 1;
22381 else
22382 gcc_assert (!MEM_P (operands[0])
22383 || offsettable_memref_p (operands[0]));
22384
22385 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22386 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22387
22388 /* When emitting push, take care for source operands on the stack. */
22389 if (push && MEM_P (operands[1])
22390 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22391 {
22392 rtx src_base = XEXP (part[1][nparts - 1], 0);
22393
22394 /* Compensate for the stack decrement by 4. */
22395 if (!TARGET_64BIT && nparts == 3
22396 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22397 src_base = plus_constant (Pmode, src_base, 4);
22398
22399 /* src_base refers to the stack pointer and is
22400 automatically decreased by emitted push. */
22401 for (i = 0; i < nparts; i++)
22402 part[1][i] = change_address (part[1][i],
22403 GET_MODE (part[1][i]), src_base);
22404 }
22405
22406 /* We need to do copy in the right order in case an address register
22407 of the source overlaps the destination. */
22408 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22409 {
22410 rtx tmp;
22411
22412 for (i = 0; i < nparts; i++)
22413 {
22414 collisionparts[i]
22415 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22416 if (collisionparts[i])
22417 collisions++;
22418 }
22419
22420 /* Collision in the middle part can be handled by reordering. */
22421 if (collisions == 1 && nparts == 3 && collisionparts [1])
22422 {
22423 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22424 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22425 }
22426 else if (collisions == 1
22427 && nparts == 4
22428 && (collisionparts [1] || collisionparts [2]))
22429 {
22430 if (collisionparts [1])
22431 {
22432 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22433 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22434 }
22435 else
22436 {
22437 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22438 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22439 }
22440 }
22441
22442 /* If there are more collisions, we can't handle it by reordering.
22443 Do an lea to the last part and use only one colliding move. */
22444 else if (collisions > 1)
22445 {
22446 rtx base;
22447
22448 collisions = 1;
22449
22450 base = part[0][nparts - 1];
22451
22452 /* Handle the case when the last part isn't valid for lea.
22453 Happens in 64-bit mode storing the 12-byte XFmode. */
22454 if (GET_MODE (base) != Pmode)
22455 base = gen_rtx_REG (Pmode, REGNO (base));
22456
22457 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22458 part[1][0] = replace_equiv_address (part[1][0], base);
22459 for (i = 1; i < nparts; i++)
22460 {
22461 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22462 part[1][i] = replace_equiv_address (part[1][i], tmp);
22463 }
22464 }
22465 }
22466
22467 if (push)
22468 {
22469 if (!TARGET_64BIT)
22470 {
22471 if (nparts == 3)
22472 {
22473 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22474 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22475 stack_pointer_rtx, GEN_INT (-4)));
22476 emit_move_insn (part[0][2], part[1][2]);
22477 }
22478 else if (nparts == 4)
22479 {
22480 emit_move_insn (part[0][3], part[1][3]);
22481 emit_move_insn (part[0][2], part[1][2]);
22482 }
22483 }
22484 else
22485 {
22486 /* In 64bit mode we don't have 32bit push available. In case this is
22487 register, it is OK - we will just use larger counterpart. We also
22488 retype memory - these comes from attempt to avoid REX prefix on
22489 moving of second half of TFmode value. */
22490 if (GET_MODE (part[1][1]) == SImode)
22491 {
22492 switch (GET_CODE (part[1][1]))
22493 {
22494 case MEM:
22495 part[1][1] = adjust_address (part[1][1], DImode, 0);
22496 break;
22497
22498 case REG:
22499 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22500 break;
22501
22502 default:
22503 gcc_unreachable ();
22504 }
22505
22506 if (GET_MODE (part[1][0]) == SImode)
22507 part[1][0] = part[1][1];
22508 }
22509 }
22510 emit_move_insn (part[0][1], part[1][1]);
22511 emit_move_insn (part[0][0], part[1][0]);
22512 return;
22513 }
22514
22515 /* Choose correct order to not overwrite the source before it is copied. */
22516 if ((REG_P (part[0][0])
22517 && REG_P (part[1][1])
22518 && (REGNO (part[0][0]) == REGNO (part[1][1])
22519 || (nparts == 3
22520 && REGNO (part[0][0]) == REGNO (part[1][2]))
22521 || (nparts == 4
22522 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22523 || (collisions > 0
22524 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22525 {
22526 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22527 {
22528 operands[2 + i] = part[0][j];
22529 operands[6 + i] = part[1][j];
22530 }
22531 }
22532 else
22533 {
22534 for (i = 0; i < nparts; i++)
22535 {
22536 operands[2 + i] = part[0][i];
22537 operands[6 + i] = part[1][i];
22538 }
22539 }
22540
22541 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22542 if (optimize_insn_for_size_p ())
22543 {
22544 for (j = 0; j < nparts - 1; j++)
22545 if (CONST_INT_P (operands[6 + j])
22546 && operands[6 + j] != const0_rtx
22547 && REG_P (operands[2 + j]))
22548 for (i = j; i < nparts - 1; i++)
22549 if (CONST_INT_P (operands[7 + i])
22550 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22551 operands[7 + i] = operands[2 + j];
22552 }
22553
22554 for (i = 0; i < nparts; i++)
22555 emit_move_insn (operands[2 + i], operands[6 + i]);
22556
22557 return;
22558 }
22559
22560 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22561 left shift by a constant, either using a single shift or
22562 a sequence of add instructions. */
22563
22564 static void
22565 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22566 {
22567 rtx (*insn)(rtx, rtx, rtx);
22568
22569 if (count == 1
22570 || (count * ix86_cost->add <= ix86_cost->shift_const
22571 && !optimize_insn_for_size_p ()))
22572 {
22573 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22574 while (count-- > 0)
22575 emit_insn (insn (operand, operand, operand));
22576 }
22577 else
22578 {
22579 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22580 emit_insn (insn (operand, operand, GEN_INT (count)));
22581 }
22582 }
22583
22584 void
22585 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22586 {
22587 rtx (*gen_ashl3)(rtx, rtx, rtx);
22588 rtx (*gen_shld)(rtx, rtx, rtx);
22589 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22590
22591 rtx low[2], high[2];
22592 int count;
22593
22594 if (CONST_INT_P (operands[2]))
22595 {
22596 split_double_mode (mode, operands, 2, low, high);
22597 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22598
22599 if (count >= half_width)
22600 {
22601 emit_move_insn (high[0], low[1]);
22602 emit_move_insn (low[0], const0_rtx);
22603
22604 if (count > half_width)
22605 ix86_expand_ashl_const (high[0], count - half_width, mode);
22606 }
22607 else
22608 {
22609 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22610
22611 if (!rtx_equal_p (operands[0], operands[1]))
22612 emit_move_insn (operands[0], operands[1]);
22613
22614 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22615 ix86_expand_ashl_const (low[0], count, mode);
22616 }
22617 return;
22618 }
22619
22620 split_double_mode (mode, operands, 1, low, high);
22621
22622 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22623
22624 if (operands[1] == const1_rtx)
22625 {
22626 /* Assuming we've chosen a QImode capable registers, then 1 << N
22627 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22628 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22629 {
22630 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22631
22632 ix86_expand_clear (low[0]);
22633 ix86_expand_clear (high[0]);
22634 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22635
22636 d = gen_lowpart (QImode, low[0]);
22637 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22638 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22639 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22640
22641 d = gen_lowpart (QImode, high[0]);
22642 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22643 s = gen_rtx_NE (QImode, flags, const0_rtx);
22644 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22645 }
22646
22647 /* Otherwise, we can get the same results by manually performing
22648 a bit extract operation on bit 5/6, and then performing the two
22649 shifts. The two methods of getting 0/1 into low/high are exactly
22650 the same size. Avoiding the shift in the bit extract case helps
22651 pentium4 a bit; no one else seems to care much either way. */
22652 else
22653 {
22654 enum machine_mode half_mode;
22655 rtx (*gen_lshr3)(rtx, rtx, rtx);
22656 rtx (*gen_and3)(rtx, rtx, rtx);
22657 rtx (*gen_xor3)(rtx, rtx, rtx);
22658 HOST_WIDE_INT bits;
22659 rtx x;
22660
22661 if (mode == DImode)
22662 {
22663 half_mode = SImode;
22664 gen_lshr3 = gen_lshrsi3;
22665 gen_and3 = gen_andsi3;
22666 gen_xor3 = gen_xorsi3;
22667 bits = 5;
22668 }
22669 else
22670 {
22671 half_mode = DImode;
22672 gen_lshr3 = gen_lshrdi3;
22673 gen_and3 = gen_anddi3;
22674 gen_xor3 = gen_xordi3;
22675 bits = 6;
22676 }
22677
22678 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22679 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22680 else
22681 x = gen_lowpart (half_mode, operands[2]);
22682 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22683
22684 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22685 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22686 emit_move_insn (low[0], high[0]);
22687 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22688 }
22689
22690 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22691 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22692 return;
22693 }
22694
22695 if (operands[1] == constm1_rtx)
22696 {
22697 /* For -1 << N, we can avoid the shld instruction, because we
22698 know that we're shifting 0...31/63 ones into a -1. */
22699 emit_move_insn (low[0], constm1_rtx);
22700 if (optimize_insn_for_size_p ())
22701 emit_move_insn (high[0], low[0]);
22702 else
22703 emit_move_insn (high[0], constm1_rtx);
22704 }
22705 else
22706 {
22707 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22708
22709 if (!rtx_equal_p (operands[0], operands[1]))
22710 emit_move_insn (operands[0], operands[1]);
22711
22712 split_double_mode (mode, operands, 1, low, high);
22713 emit_insn (gen_shld (high[0], low[0], operands[2]));
22714 }
22715
22716 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22717
22718 if (TARGET_CMOVE && scratch)
22719 {
22720 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22721 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22722
22723 ix86_expand_clear (scratch);
22724 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22725 }
22726 else
22727 {
22728 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22729 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22730
22731 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22732 }
22733 }
22734
22735 void
22736 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22737 {
22738 rtx (*gen_ashr3)(rtx, rtx, rtx)
22739 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22740 rtx (*gen_shrd)(rtx, rtx, rtx);
22741 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22742
22743 rtx low[2], high[2];
22744 int count;
22745
22746 if (CONST_INT_P (operands[2]))
22747 {
22748 split_double_mode (mode, operands, 2, low, high);
22749 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22750
22751 if (count == GET_MODE_BITSIZE (mode) - 1)
22752 {
22753 emit_move_insn (high[0], high[1]);
22754 emit_insn (gen_ashr3 (high[0], high[0],
22755 GEN_INT (half_width - 1)));
22756 emit_move_insn (low[0], high[0]);
22757
22758 }
22759 else if (count >= half_width)
22760 {
22761 emit_move_insn (low[0], high[1]);
22762 emit_move_insn (high[0], low[0]);
22763 emit_insn (gen_ashr3 (high[0], high[0],
22764 GEN_INT (half_width - 1)));
22765
22766 if (count > half_width)
22767 emit_insn (gen_ashr3 (low[0], low[0],
22768 GEN_INT (count - half_width)));
22769 }
22770 else
22771 {
22772 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22773
22774 if (!rtx_equal_p (operands[0], operands[1]))
22775 emit_move_insn (operands[0], operands[1]);
22776
22777 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22778 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22779 }
22780 }
22781 else
22782 {
22783 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22784
22785 if (!rtx_equal_p (operands[0], operands[1]))
22786 emit_move_insn (operands[0], operands[1]);
22787
22788 split_double_mode (mode, operands, 1, low, high);
22789
22790 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22791 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22792
22793 if (TARGET_CMOVE && scratch)
22794 {
22795 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22796 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22797
22798 emit_move_insn (scratch, high[0]);
22799 emit_insn (gen_ashr3 (scratch, scratch,
22800 GEN_INT (half_width - 1)));
22801 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22802 scratch));
22803 }
22804 else
22805 {
22806 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22807 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22808
22809 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22810 }
22811 }
22812 }
22813
22814 void
22815 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22816 {
22817 rtx (*gen_lshr3)(rtx, rtx, rtx)
22818 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22819 rtx (*gen_shrd)(rtx, rtx, rtx);
22820 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22821
22822 rtx low[2], high[2];
22823 int count;
22824
22825 if (CONST_INT_P (operands[2]))
22826 {
22827 split_double_mode (mode, operands, 2, low, high);
22828 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22829
22830 if (count >= half_width)
22831 {
22832 emit_move_insn (low[0], high[1]);
22833 ix86_expand_clear (high[0]);
22834
22835 if (count > half_width)
22836 emit_insn (gen_lshr3 (low[0], low[0],
22837 GEN_INT (count - half_width)));
22838 }
22839 else
22840 {
22841 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22842
22843 if (!rtx_equal_p (operands[0], operands[1]))
22844 emit_move_insn (operands[0], operands[1]);
22845
22846 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22847 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22848 }
22849 }
22850 else
22851 {
22852 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22853
22854 if (!rtx_equal_p (operands[0], operands[1]))
22855 emit_move_insn (operands[0], operands[1]);
22856
22857 split_double_mode (mode, operands, 1, low, high);
22858
22859 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22860 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22861
22862 if (TARGET_CMOVE && scratch)
22863 {
22864 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22865 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22866
22867 ix86_expand_clear (scratch);
22868 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22869 scratch));
22870 }
22871 else
22872 {
22873 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22874 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22875
22876 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22877 }
22878 }
22879 }
22880
22881 /* Predict just emitted jump instruction to be taken with probability PROB. */
22882 static void
22883 predict_jump (int prob)
22884 {
22885 rtx insn = get_last_insn ();
22886 gcc_assert (JUMP_P (insn));
22887 add_int_reg_note (insn, REG_BR_PROB, prob);
22888 }
22889
22890 /* Helper function for the string operations below. Dest VARIABLE whether
22891 it is aligned to VALUE bytes. If true, jump to the label. */
22892 static rtx_code_label *
22893 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22894 {
22895 rtx_code_label *label = gen_label_rtx ();
22896 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22897 if (GET_MODE (variable) == DImode)
22898 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22899 else
22900 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22901 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22902 1, label);
22903 if (epilogue)
22904 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22905 else
22906 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22907 return label;
22908 }
22909
22910 /* Adjust COUNTER by the VALUE. */
22911 static void
22912 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22913 {
22914 rtx (*gen_add)(rtx, rtx, rtx)
22915 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22916
22917 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22918 }
22919
22920 /* Zero extend possibly SImode EXP to Pmode register. */
22921 rtx
22922 ix86_zero_extend_to_Pmode (rtx exp)
22923 {
22924 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22925 }
22926
22927 /* Divide COUNTREG by SCALE. */
22928 static rtx
22929 scale_counter (rtx countreg, int scale)
22930 {
22931 rtx sc;
22932
22933 if (scale == 1)
22934 return countreg;
22935 if (CONST_INT_P (countreg))
22936 return GEN_INT (INTVAL (countreg) / scale);
22937 gcc_assert (REG_P (countreg));
22938
22939 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22940 GEN_INT (exact_log2 (scale)),
22941 NULL, 1, OPTAB_DIRECT);
22942 return sc;
22943 }
22944
22945 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22946 DImode for constant loop counts. */
22947
22948 static enum machine_mode
22949 counter_mode (rtx count_exp)
22950 {
22951 if (GET_MODE (count_exp) != VOIDmode)
22952 return GET_MODE (count_exp);
22953 if (!CONST_INT_P (count_exp))
22954 return Pmode;
22955 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22956 return DImode;
22957 return SImode;
22958 }
22959
22960 /* Copy the address to a Pmode register. This is used for x32 to
22961 truncate DImode TLS address to a SImode register. */
22962
22963 static rtx
22964 ix86_copy_addr_to_reg (rtx addr)
22965 {
22966 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22967 return copy_addr_to_reg (addr);
22968 else
22969 {
22970 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22971 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22972 }
22973 }
22974
22975 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22976 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22977 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22978 memory by VALUE (supposed to be in MODE).
22979
22980 The size is rounded down to whole number of chunk size moved at once.
22981 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22982
22983
22984 static void
22985 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22986 rtx destptr, rtx srcptr, rtx value,
22987 rtx count, enum machine_mode mode, int unroll,
22988 int expected_size, bool issetmem)
22989 {
22990 rtx_code_label *out_label, *top_label;
22991 rtx iter, tmp;
22992 enum machine_mode iter_mode = counter_mode (count);
22993 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22994 rtx piece_size = GEN_INT (piece_size_n);
22995 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22996 rtx size;
22997 int i;
22998
22999 top_label = gen_label_rtx ();
23000 out_label = gen_label_rtx ();
23001 iter = gen_reg_rtx (iter_mode);
23002
23003 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23004 NULL, 1, OPTAB_DIRECT);
23005 /* Those two should combine. */
23006 if (piece_size == const1_rtx)
23007 {
23008 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23009 true, out_label);
23010 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23011 }
23012 emit_move_insn (iter, const0_rtx);
23013
23014 emit_label (top_label);
23015
23016 tmp = convert_modes (Pmode, iter_mode, iter, true);
23017
23018 /* This assert could be relaxed - in this case we'll need to compute
23019 smallest power of two, containing in PIECE_SIZE_N and pass it to
23020 offset_address. */
23021 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23022 destmem = offset_address (destmem, tmp, piece_size_n);
23023 destmem = adjust_address (destmem, mode, 0);
23024
23025 if (!issetmem)
23026 {
23027 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23028 srcmem = adjust_address (srcmem, mode, 0);
23029
23030 /* When unrolling for chips that reorder memory reads and writes,
23031 we can save registers by using single temporary.
23032 Also using 4 temporaries is overkill in 32bit mode. */
23033 if (!TARGET_64BIT && 0)
23034 {
23035 for (i = 0; i < unroll; i++)
23036 {
23037 if (i)
23038 {
23039 destmem =
23040 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23041 srcmem =
23042 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23043 }
23044 emit_move_insn (destmem, srcmem);
23045 }
23046 }
23047 else
23048 {
23049 rtx tmpreg[4];
23050 gcc_assert (unroll <= 4);
23051 for (i = 0; i < unroll; i++)
23052 {
23053 tmpreg[i] = gen_reg_rtx (mode);
23054 if (i)
23055 {
23056 srcmem =
23057 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23058 }
23059 emit_move_insn (tmpreg[i], srcmem);
23060 }
23061 for (i = 0; i < unroll; i++)
23062 {
23063 if (i)
23064 {
23065 destmem =
23066 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23067 }
23068 emit_move_insn (destmem, tmpreg[i]);
23069 }
23070 }
23071 }
23072 else
23073 for (i = 0; i < unroll; i++)
23074 {
23075 if (i)
23076 destmem =
23077 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23078 emit_move_insn (destmem, value);
23079 }
23080
23081 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23082 true, OPTAB_LIB_WIDEN);
23083 if (tmp != iter)
23084 emit_move_insn (iter, tmp);
23085
23086 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23087 true, top_label);
23088 if (expected_size != -1)
23089 {
23090 expected_size /= GET_MODE_SIZE (mode) * unroll;
23091 if (expected_size == 0)
23092 predict_jump (0);
23093 else if (expected_size > REG_BR_PROB_BASE)
23094 predict_jump (REG_BR_PROB_BASE - 1);
23095 else
23096 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23097 }
23098 else
23099 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23100 iter = ix86_zero_extend_to_Pmode (iter);
23101 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23102 true, OPTAB_LIB_WIDEN);
23103 if (tmp != destptr)
23104 emit_move_insn (destptr, tmp);
23105 if (!issetmem)
23106 {
23107 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23108 true, OPTAB_LIB_WIDEN);
23109 if (tmp != srcptr)
23110 emit_move_insn (srcptr, tmp);
23111 }
23112 emit_label (out_label);
23113 }
23114
23115 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23116 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23117 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23118 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23119 ORIG_VALUE is the original value passed to memset to fill the memory with.
23120 Other arguments have same meaning as for previous function. */
23121
23122 static void
23123 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23124 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23125 rtx count,
23126 enum machine_mode mode, bool issetmem)
23127 {
23128 rtx destexp;
23129 rtx srcexp;
23130 rtx countreg;
23131 HOST_WIDE_INT rounded_count;
23132
23133 /* If possible, it is shorter to use rep movs.
23134 TODO: Maybe it is better to move this logic to decide_alg. */
23135 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23136 && (!issetmem || orig_value == const0_rtx))
23137 mode = SImode;
23138
23139 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23140 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23141
23142 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23143 GET_MODE_SIZE (mode)));
23144 if (mode != QImode)
23145 {
23146 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23147 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23148 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23149 }
23150 else
23151 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23152 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23153 {
23154 rounded_count = (INTVAL (count)
23155 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23156 destmem = shallow_copy_rtx (destmem);
23157 set_mem_size (destmem, rounded_count);
23158 }
23159 else if (MEM_SIZE_KNOWN_P (destmem))
23160 clear_mem_size (destmem);
23161
23162 if (issetmem)
23163 {
23164 value = force_reg (mode, gen_lowpart (mode, value));
23165 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23166 }
23167 else
23168 {
23169 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23170 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23171 if (mode != QImode)
23172 {
23173 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23174 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23175 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23176 }
23177 else
23178 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23179 if (CONST_INT_P (count))
23180 {
23181 rounded_count = (INTVAL (count)
23182 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23183 srcmem = shallow_copy_rtx (srcmem);
23184 set_mem_size (srcmem, rounded_count);
23185 }
23186 else
23187 {
23188 if (MEM_SIZE_KNOWN_P (srcmem))
23189 clear_mem_size (srcmem);
23190 }
23191 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23192 destexp, srcexp));
23193 }
23194 }
23195
23196 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23197 DESTMEM.
23198 SRC is passed by pointer to be updated on return.
23199 Return value is updated DST. */
23200 static rtx
23201 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23202 HOST_WIDE_INT size_to_move)
23203 {
23204 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23205 enum insn_code code;
23206 enum machine_mode move_mode;
23207 int piece_size, i;
23208
23209 /* Find the widest mode in which we could perform moves.
23210 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23211 it until move of such size is supported. */
23212 piece_size = 1 << floor_log2 (size_to_move);
23213 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23214 code = optab_handler (mov_optab, move_mode);
23215 while (code == CODE_FOR_nothing && piece_size > 1)
23216 {
23217 piece_size >>= 1;
23218 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23219 code = optab_handler (mov_optab, move_mode);
23220 }
23221
23222 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23223 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23224 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23225 {
23226 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23227 move_mode = mode_for_vector (word_mode, nunits);
23228 code = optab_handler (mov_optab, move_mode);
23229 if (code == CODE_FOR_nothing)
23230 {
23231 move_mode = word_mode;
23232 piece_size = GET_MODE_SIZE (move_mode);
23233 code = optab_handler (mov_optab, move_mode);
23234 }
23235 }
23236 gcc_assert (code != CODE_FOR_nothing);
23237
23238 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23239 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23240
23241 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23242 gcc_assert (size_to_move % piece_size == 0);
23243 adjust = GEN_INT (piece_size);
23244 for (i = 0; i < size_to_move; i += piece_size)
23245 {
23246 /* We move from memory to memory, so we'll need to do it via
23247 a temporary register. */
23248 tempreg = gen_reg_rtx (move_mode);
23249 emit_insn (GEN_FCN (code) (tempreg, src));
23250 emit_insn (GEN_FCN (code) (dst, tempreg));
23251
23252 emit_move_insn (destptr,
23253 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23254 emit_move_insn (srcptr,
23255 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23256
23257 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23258 piece_size);
23259 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23260 piece_size);
23261 }
23262
23263 /* Update DST and SRC rtx. */
23264 *srcmem = src;
23265 return dst;
23266 }
23267
23268 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23269 static void
23270 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23271 rtx destptr, rtx srcptr, rtx count, int max_size)
23272 {
23273 rtx src, dest;
23274 if (CONST_INT_P (count))
23275 {
23276 HOST_WIDE_INT countval = INTVAL (count);
23277 HOST_WIDE_INT epilogue_size = countval % max_size;
23278 int i;
23279
23280 /* For now MAX_SIZE should be a power of 2. This assert could be
23281 relaxed, but it'll require a bit more complicated epilogue
23282 expanding. */
23283 gcc_assert ((max_size & (max_size - 1)) == 0);
23284 for (i = max_size; i >= 1; i >>= 1)
23285 {
23286 if (epilogue_size & i)
23287 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23288 }
23289 return;
23290 }
23291 if (max_size > 8)
23292 {
23293 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23294 count, 1, OPTAB_DIRECT);
23295 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23296 count, QImode, 1, 4, false);
23297 return;
23298 }
23299
23300 /* When there are stringops, we can cheaply increase dest and src pointers.
23301 Otherwise we save code size by maintaining offset (zero is readily
23302 available from preceding rep operation) and using x86 addressing modes.
23303 */
23304 if (TARGET_SINGLE_STRINGOP)
23305 {
23306 if (max_size > 4)
23307 {
23308 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23309 src = change_address (srcmem, SImode, srcptr);
23310 dest = change_address (destmem, SImode, destptr);
23311 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23312 emit_label (label);
23313 LABEL_NUSES (label) = 1;
23314 }
23315 if (max_size > 2)
23316 {
23317 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23318 src = change_address (srcmem, HImode, srcptr);
23319 dest = change_address (destmem, HImode, destptr);
23320 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23321 emit_label (label);
23322 LABEL_NUSES (label) = 1;
23323 }
23324 if (max_size > 1)
23325 {
23326 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23327 src = change_address (srcmem, QImode, srcptr);
23328 dest = change_address (destmem, QImode, destptr);
23329 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23330 emit_label (label);
23331 LABEL_NUSES (label) = 1;
23332 }
23333 }
23334 else
23335 {
23336 rtx offset = force_reg (Pmode, const0_rtx);
23337 rtx tmp;
23338
23339 if (max_size > 4)
23340 {
23341 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23342 src = change_address (srcmem, SImode, srcptr);
23343 dest = change_address (destmem, SImode, destptr);
23344 emit_move_insn (dest, src);
23345 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23346 true, OPTAB_LIB_WIDEN);
23347 if (tmp != offset)
23348 emit_move_insn (offset, tmp);
23349 emit_label (label);
23350 LABEL_NUSES (label) = 1;
23351 }
23352 if (max_size > 2)
23353 {
23354 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23355 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23356 src = change_address (srcmem, HImode, tmp);
23357 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23358 dest = change_address (destmem, HImode, tmp);
23359 emit_move_insn (dest, src);
23360 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23361 true, OPTAB_LIB_WIDEN);
23362 if (tmp != offset)
23363 emit_move_insn (offset, tmp);
23364 emit_label (label);
23365 LABEL_NUSES (label) = 1;
23366 }
23367 if (max_size > 1)
23368 {
23369 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23370 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23371 src = change_address (srcmem, QImode, tmp);
23372 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23373 dest = change_address (destmem, QImode, tmp);
23374 emit_move_insn (dest, src);
23375 emit_label (label);
23376 LABEL_NUSES (label) = 1;
23377 }
23378 }
23379 }
23380
23381 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23382 with value PROMOTED_VAL.
23383 SRC is passed by pointer to be updated on return.
23384 Return value is updated DST. */
23385 static rtx
23386 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23387 HOST_WIDE_INT size_to_move)
23388 {
23389 rtx dst = destmem, adjust;
23390 enum insn_code code;
23391 enum machine_mode move_mode;
23392 int piece_size, i;
23393
23394 /* Find the widest mode in which we could perform moves.
23395 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23396 it until move of such size is supported. */
23397 move_mode = GET_MODE (promoted_val);
23398 if (move_mode == VOIDmode)
23399 move_mode = QImode;
23400 if (size_to_move < GET_MODE_SIZE (move_mode))
23401 {
23402 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23403 promoted_val = gen_lowpart (move_mode, promoted_val);
23404 }
23405 piece_size = GET_MODE_SIZE (move_mode);
23406 code = optab_handler (mov_optab, move_mode);
23407 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23408
23409 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23410
23411 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23412 gcc_assert (size_to_move % piece_size == 0);
23413 adjust = GEN_INT (piece_size);
23414 for (i = 0; i < size_to_move; i += piece_size)
23415 {
23416 if (piece_size <= GET_MODE_SIZE (word_mode))
23417 {
23418 emit_insn (gen_strset (destptr, dst, promoted_val));
23419 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23420 piece_size);
23421 continue;
23422 }
23423
23424 emit_insn (GEN_FCN (code) (dst, promoted_val));
23425
23426 emit_move_insn (destptr,
23427 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23428
23429 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23430 piece_size);
23431 }
23432
23433 /* Update DST rtx. */
23434 return dst;
23435 }
23436 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23437 static void
23438 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23439 rtx count, int max_size)
23440 {
23441 count =
23442 expand_simple_binop (counter_mode (count), AND, count,
23443 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23444 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23445 gen_lowpart (QImode, value), count, QImode,
23446 1, max_size / 2, true);
23447 }
23448
23449 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23450 static void
23451 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23452 rtx count, int max_size)
23453 {
23454 rtx dest;
23455
23456 if (CONST_INT_P (count))
23457 {
23458 HOST_WIDE_INT countval = INTVAL (count);
23459 HOST_WIDE_INT epilogue_size = countval % max_size;
23460 int i;
23461
23462 /* For now MAX_SIZE should be a power of 2. This assert could be
23463 relaxed, but it'll require a bit more complicated epilogue
23464 expanding. */
23465 gcc_assert ((max_size & (max_size - 1)) == 0);
23466 for (i = max_size; i >= 1; i >>= 1)
23467 {
23468 if (epilogue_size & i)
23469 {
23470 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23471 destmem = emit_memset (destmem, destptr, vec_value, i);
23472 else
23473 destmem = emit_memset (destmem, destptr, value, i);
23474 }
23475 }
23476 return;
23477 }
23478 if (max_size > 32)
23479 {
23480 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23481 return;
23482 }
23483 if (max_size > 16)
23484 {
23485 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23486 if (TARGET_64BIT)
23487 {
23488 dest = change_address (destmem, DImode, destptr);
23489 emit_insn (gen_strset (destptr, dest, value));
23490 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23491 emit_insn (gen_strset (destptr, dest, value));
23492 }
23493 else
23494 {
23495 dest = change_address (destmem, SImode, destptr);
23496 emit_insn (gen_strset (destptr, dest, value));
23497 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23498 emit_insn (gen_strset (destptr, dest, value));
23499 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23500 emit_insn (gen_strset (destptr, dest, value));
23501 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23502 emit_insn (gen_strset (destptr, dest, value));
23503 }
23504 emit_label (label);
23505 LABEL_NUSES (label) = 1;
23506 }
23507 if (max_size > 8)
23508 {
23509 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23510 if (TARGET_64BIT)
23511 {
23512 dest = change_address (destmem, DImode, destptr);
23513 emit_insn (gen_strset (destptr, dest, value));
23514 }
23515 else
23516 {
23517 dest = change_address (destmem, SImode, destptr);
23518 emit_insn (gen_strset (destptr, dest, value));
23519 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23520 emit_insn (gen_strset (destptr, dest, value));
23521 }
23522 emit_label (label);
23523 LABEL_NUSES (label) = 1;
23524 }
23525 if (max_size > 4)
23526 {
23527 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23528 dest = change_address (destmem, SImode, destptr);
23529 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23530 emit_label (label);
23531 LABEL_NUSES (label) = 1;
23532 }
23533 if (max_size > 2)
23534 {
23535 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23536 dest = change_address (destmem, HImode, destptr);
23537 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23538 emit_label (label);
23539 LABEL_NUSES (label) = 1;
23540 }
23541 if (max_size > 1)
23542 {
23543 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23544 dest = change_address (destmem, QImode, destptr);
23545 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23546 emit_label (label);
23547 LABEL_NUSES (label) = 1;
23548 }
23549 }
23550
23551 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23552 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23553 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23554 ignored.
23555 Return value is updated DESTMEM. */
23556 static rtx
23557 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23558 rtx destptr, rtx srcptr, rtx value,
23559 rtx vec_value, rtx count, int align,
23560 int desired_alignment, bool issetmem)
23561 {
23562 int i;
23563 for (i = 1; i < desired_alignment; i <<= 1)
23564 {
23565 if (align <= i)
23566 {
23567 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23568 if (issetmem)
23569 {
23570 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23571 destmem = emit_memset (destmem, destptr, vec_value, i);
23572 else
23573 destmem = emit_memset (destmem, destptr, value, i);
23574 }
23575 else
23576 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23577 ix86_adjust_counter (count, i);
23578 emit_label (label);
23579 LABEL_NUSES (label) = 1;
23580 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23581 }
23582 }
23583 return destmem;
23584 }
23585
23586 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23587 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23588 and jump to DONE_LABEL. */
23589 static void
23590 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23591 rtx destptr, rtx srcptr,
23592 rtx value, rtx vec_value,
23593 rtx count, int size,
23594 rtx done_label, bool issetmem)
23595 {
23596 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23597 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23598 rtx modesize;
23599 int n;
23600
23601 /* If we do not have vector value to copy, we must reduce size. */
23602 if (issetmem)
23603 {
23604 if (!vec_value)
23605 {
23606 if (GET_MODE (value) == VOIDmode && size > 8)
23607 mode = Pmode;
23608 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23609 mode = GET_MODE (value);
23610 }
23611 else
23612 mode = GET_MODE (vec_value), value = vec_value;
23613 }
23614 else
23615 {
23616 /* Choose appropriate vector mode. */
23617 if (size >= 32)
23618 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23619 else if (size >= 16)
23620 mode = TARGET_SSE ? V16QImode : DImode;
23621 srcmem = change_address (srcmem, mode, srcptr);
23622 }
23623 destmem = change_address (destmem, mode, destptr);
23624 modesize = GEN_INT (GET_MODE_SIZE (mode));
23625 gcc_assert (GET_MODE_SIZE (mode) <= size);
23626 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23627 {
23628 if (issetmem)
23629 emit_move_insn (destmem, gen_lowpart (mode, value));
23630 else
23631 {
23632 emit_move_insn (destmem, srcmem);
23633 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23634 }
23635 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23636 }
23637
23638 destmem = offset_address (destmem, count, 1);
23639 destmem = offset_address (destmem, GEN_INT (-2 * size),
23640 GET_MODE_SIZE (mode));
23641 if (!issetmem)
23642 {
23643 srcmem = offset_address (srcmem, count, 1);
23644 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23645 GET_MODE_SIZE (mode));
23646 }
23647 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23648 {
23649 if (issetmem)
23650 emit_move_insn (destmem, gen_lowpart (mode, value));
23651 else
23652 {
23653 emit_move_insn (destmem, srcmem);
23654 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23655 }
23656 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23657 }
23658 emit_jump_insn (gen_jump (done_label));
23659 emit_barrier ();
23660
23661 emit_label (label);
23662 LABEL_NUSES (label) = 1;
23663 }
23664
23665 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23666 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23667 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23668 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23669 DONE_LABEL is a label after the whole copying sequence. The label is created
23670 on demand if *DONE_LABEL is NULL.
23671 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23672 bounds after the initial copies.
23673
23674 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23675 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23676 we will dispatch to a library call for large blocks.
23677
23678 In pseudocode we do:
23679
23680 if (COUNT < SIZE)
23681 {
23682 Assume that SIZE is 4. Bigger sizes are handled analogously
23683 if (COUNT & 4)
23684 {
23685 copy 4 bytes from SRCPTR to DESTPTR
23686 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23687 goto done_label
23688 }
23689 if (!COUNT)
23690 goto done_label;
23691 copy 1 byte from SRCPTR to DESTPTR
23692 if (COUNT & 2)
23693 {
23694 copy 2 bytes from SRCPTR to DESTPTR
23695 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23696 }
23697 }
23698 else
23699 {
23700 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23701 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23702
23703 OLD_DESPTR = DESTPTR;
23704 Align DESTPTR up to DESIRED_ALIGN
23705 SRCPTR += DESTPTR - OLD_DESTPTR
23706 COUNT -= DEST_PTR - OLD_DESTPTR
23707 if (DYNAMIC_CHECK)
23708 Round COUNT down to multiple of SIZE
23709 << optional caller supplied zero size guard is here >>
23710 << optional caller suppplied dynamic check is here >>
23711 << caller supplied main copy loop is here >>
23712 }
23713 done_label:
23714 */
23715 static void
23716 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23717 rtx *destptr, rtx *srcptr,
23718 enum machine_mode mode,
23719 rtx value, rtx vec_value,
23720 rtx *count,
23721 rtx_code_label **done_label,
23722 int size,
23723 int desired_align,
23724 int align,
23725 unsigned HOST_WIDE_INT *min_size,
23726 bool dynamic_check,
23727 bool issetmem)
23728 {
23729 rtx_code_label *loop_label = NULL, *label;
23730 int n;
23731 rtx modesize;
23732 int prolog_size = 0;
23733 rtx mode_value;
23734
23735 /* Chose proper value to copy. */
23736 if (issetmem && VECTOR_MODE_P (mode))
23737 mode_value = vec_value;
23738 else
23739 mode_value = value;
23740 gcc_assert (GET_MODE_SIZE (mode) <= size);
23741
23742 /* See if block is big or small, handle small blocks. */
23743 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23744 {
23745 int size2 = size;
23746 loop_label = gen_label_rtx ();
23747
23748 if (!*done_label)
23749 *done_label = gen_label_rtx ();
23750
23751 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23752 1, loop_label);
23753 size2 >>= 1;
23754
23755 /* Handle sizes > 3. */
23756 for (;size2 > 2; size2 >>= 1)
23757 expand_small_movmem_or_setmem (destmem, srcmem,
23758 *destptr, *srcptr,
23759 value, vec_value,
23760 *count,
23761 size2, *done_label, issetmem);
23762 /* Nothing to copy? Jump to DONE_LABEL if so */
23763 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23764 1, *done_label);
23765
23766 /* Do a byte copy. */
23767 destmem = change_address (destmem, QImode, *destptr);
23768 if (issetmem)
23769 emit_move_insn (destmem, gen_lowpart (QImode, value));
23770 else
23771 {
23772 srcmem = change_address (srcmem, QImode, *srcptr);
23773 emit_move_insn (destmem, srcmem);
23774 }
23775
23776 /* Handle sizes 2 and 3. */
23777 label = ix86_expand_aligntest (*count, 2, false);
23778 destmem = change_address (destmem, HImode, *destptr);
23779 destmem = offset_address (destmem, *count, 1);
23780 destmem = offset_address (destmem, GEN_INT (-2), 2);
23781 if (issetmem)
23782 emit_move_insn (destmem, gen_lowpart (HImode, value));
23783 else
23784 {
23785 srcmem = change_address (srcmem, HImode, *srcptr);
23786 srcmem = offset_address (srcmem, *count, 1);
23787 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23788 emit_move_insn (destmem, srcmem);
23789 }
23790
23791 emit_label (label);
23792 LABEL_NUSES (label) = 1;
23793 emit_jump_insn (gen_jump (*done_label));
23794 emit_barrier ();
23795 }
23796 else
23797 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23798 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23799
23800 /* Start memcpy for COUNT >= SIZE. */
23801 if (loop_label)
23802 {
23803 emit_label (loop_label);
23804 LABEL_NUSES (loop_label) = 1;
23805 }
23806
23807 /* Copy first desired_align bytes. */
23808 if (!issetmem)
23809 srcmem = change_address (srcmem, mode, *srcptr);
23810 destmem = change_address (destmem, mode, *destptr);
23811 modesize = GEN_INT (GET_MODE_SIZE (mode));
23812 for (n = 0; prolog_size < desired_align - align; n++)
23813 {
23814 if (issetmem)
23815 emit_move_insn (destmem, mode_value);
23816 else
23817 {
23818 emit_move_insn (destmem, srcmem);
23819 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23820 }
23821 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23822 prolog_size += GET_MODE_SIZE (mode);
23823 }
23824
23825
23826 /* Copy last SIZE bytes. */
23827 destmem = offset_address (destmem, *count, 1);
23828 destmem = offset_address (destmem,
23829 GEN_INT (-size - prolog_size),
23830 1);
23831 if (issetmem)
23832 emit_move_insn (destmem, mode_value);
23833 else
23834 {
23835 srcmem = offset_address (srcmem, *count, 1);
23836 srcmem = offset_address (srcmem,
23837 GEN_INT (-size - prolog_size),
23838 1);
23839 emit_move_insn (destmem, srcmem);
23840 }
23841 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23842 {
23843 destmem = offset_address (destmem, modesize, 1);
23844 if (issetmem)
23845 emit_move_insn (destmem, mode_value);
23846 else
23847 {
23848 srcmem = offset_address (srcmem, modesize, 1);
23849 emit_move_insn (destmem, srcmem);
23850 }
23851 }
23852
23853 /* Align destination. */
23854 if (desired_align > 1 && desired_align > align)
23855 {
23856 rtx saveddest = *destptr;
23857
23858 gcc_assert (desired_align <= size);
23859 /* Align destptr up, place it to new register. */
23860 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23861 GEN_INT (prolog_size),
23862 NULL_RTX, 1, OPTAB_DIRECT);
23863 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23864 GEN_INT (-desired_align),
23865 *destptr, 1, OPTAB_DIRECT);
23866 /* See how many bytes we skipped. */
23867 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23868 *destptr,
23869 saveddest, 1, OPTAB_DIRECT);
23870 /* Adjust srcptr and count. */
23871 if (!issetmem)
23872 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23873 *srcptr, 1, OPTAB_DIRECT);
23874 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23875 saveddest, *count, 1, OPTAB_DIRECT);
23876 /* We copied at most size + prolog_size. */
23877 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23878 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23879 else
23880 *min_size = 0;
23881
23882 /* Our loops always round down the bock size, but for dispatch to library
23883 we need precise value. */
23884 if (dynamic_check)
23885 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23886 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23887 }
23888 else
23889 {
23890 gcc_assert (prolog_size == 0);
23891 /* Decrease count, so we won't end up copying last word twice. */
23892 if (!CONST_INT_P (*count))
23893 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23894 constm1_rtx, *count, 1, OPTAB_DIRECT);
23895 else
23896 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23897 if (*min_size)
23898 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23899 }
23900 }
23901
23902
23903 /* This function is like the previous one, except here we know how many bytes
23904 need to be copied. That allows us to update alignment not only of DST, which
23905 is returned, but also of SRC, which is passed as a pointer for that
23906 reason. */
23907 static rtx
23908 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23909 rtx srcreg, rtx value, rtx vec_value,
23910 int desired_align, int align_bytes,
23911 bool issetmem)
23912 {
23913 rtx src = NULL;
23914 rtx orig_dst = dst;
23915 rtx orig_src = NULL;
23916 int piece_size = 1;
23917 int copied_bytes = 0;
23918
23919 if (!issetmem)
23920 {
23921 gcc_assert (srcp != NULL);
23922 src = *srcp;
23923 orig_src = src;
23924 }
23925
23926 for (piece_size = 1;
23927 piece_size <= desired_align && copied_bytes < align_bytes;
23928 piece_size <<= 1)
23929 {
23930 if (align_bytes & piece_size)
23931 {
23932 if (issetmem)
23933 {
23934 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23935 dst = emit_memset (dst, destreg, vec_value, piece_size);
23936 else
23937 dst = emit_memset (dst, destreg, value, piece_size);
23938 }
23939 else
23940 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23941 copied_bytes += piece_size;
23942 }
23943 }
23944 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23945 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23946 if (MEM_SIZE_KNOWN_P (orig_dst))
23947 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23948
23949 if (!issetmem)
23950 {
23951 int src_align_bytes = get_mem_align_offset (src, desired_align
23952 * BITS_PER_UNIT);
23953 if (src_align_bytes >= 0)
23954 src_align_bytes = desired_align - src_align_bytes;
23955 if (src_align_bytes >= 0)
23956 {
23957 unsigned int src_align;
23958 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23959 {
23960 if ((src_align_bytes & (src_align - 1))
23961 == (align_bytes & (src_align - 1)))
23962 break;
23963 }
23964 if (src_align > (unsigned int) desired_align)
23965 src_align = desired_align;
23966 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23967 set_mem_align (src, src_align * BITS_PER_UNIT);
23968 }
23969 if (MEM_SIZE_KNOWN_P (orig_src))
23970 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23971 *srcp = src;
23972 }
23973
23974 return dst;
23975 }
23976
23977 /* Return true if ALG can be used in current context.
23978 Assume we expand memset if MEMSET is true. */
23979 static bool
23980 alg_usable_p (enum stringop_alg alg, bool memset)
23981 {
23982 if (alg == no_stringop)
23983 return false;
23984 if (alg == vector_loop)
23985 return TARGET_SSE || TARGET_AVX;
23986 /* Algorithms using the rep prefix want at least edi and ecx;
23987 additionally, memset wants eax and memcpy wants esi. Don't
23988 consider such algorithms if the user has appropriated those
23989 registers for their own purposes. */
23990 if (alg == rep_prefix_1_byte
23991 || alg == rep_prefix_4_byte
23992 || alg == rep_prefix_8_byte)
23993 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23994 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23995 return true;
23996 }
23997
23998 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23999 static enum stringop_alg
24000 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24001 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24002 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24003 {
24004 const struct stringop_algs * algs;
24005 bool optimize_for_speed;
24006 int max = 0;
24007 const struct processor_costs *cost;
24008 int i;
24009 bool any_alg_usable_p = false;
24010
24011 *noalign = false;
24012 *dynamic_check = -1;
24013
24014 /* Even if the string operation call is cold, we still might spend a lot
24015 of time processing large blocks. */
24016 if (optimize_function_for_size_p (cfun)
24017 || (optimize_insn_for_size_p ()
24018 && (max_size < 256
24019 || (expected_size != -1 && expected_size < 256))))
24020 optimize_for_speed = false;
24021 else
24022 optimize_for_speed = true;
24023
24024 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24025 if (memset)
24026 algs = &cost->memset[TARGET_64BIT != 0];
24027 else
24028 algs = &cost->memcpy[TARGET_64BIT != 0];
24029
24030 /* See maximal size for user defined algorithm. */
24031 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24032 {
24033 enum stringop_alg candidate = algs->size[i].alg;
24034 bool usable = alg_usable_p (candidate, memset);
24035 any_alg_usable_p |= usable;
24036
24037 if (candidate != libcall && candidate && usable)
24038 max = algs->size[i].max;
24039 }
24040
24041 /* If expected size is not known but max size is small enough
24042 so inline version is a win, set expected size into
24043 the range. */
24044 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24045 && expected_size == -1)
24046 expected_size = min_size / 2 + max_size / 2;
24047
24048 /* If user specified the algorithm, honnor it if possible. */
24049 if (ix86_stringop_alg != no_stringop
24050 && alg_usable_p (ix86_stringop_alg, memset))
24051 return ix86_stringop_alg;
24052 /* rep; movq or rep; movl is the smallest variant. */
24053 else if (!optimize_for_speed)
24054 {
24055 *noalign = true;
24056 if (!count || (count & 3) || (memset && !zero_memset))
24057 return alg_usable_p (rep_prefix_1_byte, memset)
24058 ? rep_prefix_1_byte : loop_1_byte;
24059 else
24060 return alg_usable_p (rep_prefix_4_byte, memset)
24061 ? rep_prefix_4_byte : loop;
24062 }
24063 /* Very tiny blocks are best handled via the loop, REP is expensive to
24064 setup. */
24065 else if (expected_size != -1 && expected_size < 4)
24066 return loop_1_byte;
24067 else if (expected_size != -1)
24068 {
24069 enum stringop_alg alg = libcall;
24070 bool alg_noalign = false;
24071 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24072 {
24073 /* We get here if the algorithms that were not libcall-based
24074 were rep-prefix based and we are unable to use rep prefixes
24075 based on global register usage. Break out of the loop and
24076 use the heuristic below. */
24077 if (algs->size[i].max == 0)
24078 break;
24079 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24080 {
24081 enum stringop_alg candidate = algs->size[i].alg;
24082
24083 if (candidate != libcall && alg_usable_p (candidate, memset))
24084 {
24085 alg = candidate;
24086 alg_noalign = algs->size[i].noalign;
24087 }
24088 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24089 last non-libcall inline algorithm. */
24090 if (TARGET_INLINE_ALL_STRINGOPS)
24091 {
24092 /* When the current size is best to be copied by a libcall,
24093 but we are still forced to inline, run the heuristic below
24094 that will pick code for medium sized blocks. */
24095 if (alg != libcall)
24096 {
24097 *noalign = alg_noalign;
24098 return alg;
24099 }
24100 break;
24101 }
24102 else if (alg_usable_p (candidate, memset))
24103 {
24104 *noalign = algs->size[i].noalign;
24105 return candidate;
24106 }
24107 }
24108 }
24109 }
24110 /* When asked to inline the call anyway, try to pick meaningful choice.
24111 We look for maximal size of block that is faster to copy by hand and
24112 take blocks of at most of that size guessing that average size will
24113 be roughly half of the block.
24114
24115 If this turns out to be bad, we might simply specify the preferred
24116 choice in ix86_costs. */
24117 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24118 && (algs->unknown_size == libcall
24119 || !alg_usable_p (algs->unknown_size, memset)))
24120 {
24121 enum stringop_alg alg;
24122
24123 /* If there aren't any usable algorithms, then recursing on
24124 smaller sizes isn't going to find anything. Just return the
24125 simple byte-at-a-time copy loop. */
24126 if (!any_alg_usable_p)
24127 {
24128 /* Pick something reasonable. */
24129 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24130 *dynamic_check = 128;
24131 return loop_1_byte;
24132 }
24133 if (max <= 0)
24134 max = 4096;
24135 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24136 zero_memset, dynamic_check, noalign);
24137 gcc_assert (*dynamic_check == -1);
24138 gcc_assert (alg != libcall);
24139 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24140 *dynamic_check = max;
24141 return alg;
24142 }
24143 return (alg_usable_p (algs->unknown_size, memset)
24144 ? algs->unknown_size : libcall);
24145 }
24146
24147 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24148 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24149 static int
24150 decide_alignment (int align,
24151 enum stringop_alg alg,
24152 int expected_size,
24153 enum machine_mode move_mode)
24154 {
24155 int desired_align = 0;
24156
24157 gcc_assert (alg != no_stringop);
24158
24159 if (alg == libcall)
24160 return 0;
24161 if (move_mode == VOIDmode)
24162 return 0;
24163
24164 desired_align = GET_MODE_SIZE (move_mode);
24165 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24166 copying whole cacheline at once. */
24167 if (TARGET_PENTIUMPRO
24168 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24169 desired_align = 8;
24170
24171 if (optimize_size)
24172 desired_align = 1;
24173 if (desired_align < align)
24174 desired_align = align;
24175 if (expected_size != -1 && expected_size < 4)
24176 desired_align = align;
24177
24178 return desired_align;
24179 }
24180
24181
24182 /* Helper function for memcpy. For QImode value 0xXY produce
24183 0xXYXYXYXY of wide specified by MODE. This is essentially
24184 a * 0x10101010, but we can do slightly better than
24185 synth_mult by unwinding the sequence by hand on CPUs with
24186 slow multiply. */
24187 static rtx
24188 promote_duplicated_reg (enum machine_mode mode, rtx val)
24189 {
24190 enum machine_mode valmode = GET_MODE (val);
24191 rtx tmp;
24192 int nops = mode == DImode ? 3 : 2;
24193
24194 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24195 if (val == const0_rtx)
24196 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24197 if (CONST_INT_P (val))
24198 {
24199 HOST_WIDE_INT v = INTVAL (val) & 255;
24200
24201 v |= v << 8;
24202 v |= v << 16;
24203 if (mode == DImode)
24204 v |= (v << 16) << 16;
24205 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24206 }
24207
24208 if (valmode == VOIDmode)
24209 valmode = QImode;
24210 if (valmode != QImode)
24211 val = gen_lowpart (QImode, val);
24212 if (mode == QImode)
24213 return val;
24214 if (!TARGET_PARTIAL_REG_STALL)
24215 nops--;
24216 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24217 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24218 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24219 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24220 {
24221 rtx reg = convert_modes (mode, QImode, val, true);
24222 tmp = promote_duplicated_reg (mode, const1_rtx);
24223 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24224 OPTAB_DIRECT);
24225 }
24226 else
24227 {
24228 rtx reg = convert_modes (mode, QImode, val, true);
24229
24230 if (!TARGET_PARTIAL_REG_STALL)
24231 if (mode == SImode)
24232 emit_insn (gen_movsi_insv_1 (reg, reg));
24233 else
24234 emit_insn (gen_movdi_insv_1 (reg, reg));
24235 else
24236 {
24237 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24238 NULL, 1, OPTAB_DIRECT);
24239 reg =
24240 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24241 }
24242 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24243 NULL, 1, OPTAB_DIRECT);
24244 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24245 if (mode == SImode)
24246 return reg;
24247 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24248 NULL, 1, OPTAB_DIRECT);
24249 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24250 return reg;
24251 }
24252 }
24253
24254 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24255 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24256 alignment from ALIGN to DESIRED_ALIGN. */
24257 static rtx
24258 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24259 int align)
24260 {
24261 rtx promoted_val;
24262
24263 if (TARGET_64BIT
24264 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24265 promoted_val = promote_duplicated_reg (DImode, val);
24266 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24267 promoted_val = promote_duplicated_reg (SImode, val);
24268 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24269 promoted_val = promote_duplicated_reg (HImode, val);
24270 else
24271 promoted_val = val;
24272
24273 return promoted_val;
24274 }
24275
24276 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24277 operations when profitable. The code depends upon architecture, block size
24278 and alignment, but always has one of the following overall structures:
24279
24280 Aligned move sequence:
24281
24282 1) Prologue guard: Conditional that jumps up to epilogues for small
24283 blocks that can be handled by epilogue alone. This is faster
24284 but also needed for correctness, since prologue assume the block
24285 is larger than the desired alignment.
24286
24287 Optional dynamic check for size and libcall for large
24288 blocks is emitted here too, with -minline-stringops-dynamically.
24289
24290 2) Prologue: copy first few bytes in order to get destination
24291 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24292 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24293 copied. We emit either a jump tree on power of two sized
24294 blocks, or a byte loop.
24295
24296 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24297 with specified algorithm.
24298
24299 4) Epilogue: code copying tail of the block that is too small to be
24300 handled by main body (or up to size guarded by prologue guard).
24301
24302 Misaligned move sequence
24303
24304 1) missaligned move prologue/epilogue containing:
24305 a) Prologue handling small memory blocks and jumping to done_label
24306 (skipped if blocks are known to be large enough)
24307 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24308 needed by single possibly misaligned move
24309 (skipped if alignment is not needed)
24310 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24311
24312 2) Zero size guard dispatching to done_label, if needed
24313
24314 3) dispatch to library call, if needed,
24315
24316 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24317 with specified algorithm. */
24318 bool
24319 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24320 rtx align_exp, rtx expected_align_exp,
24321 rtx expected_size_exp, rtx min_size_exp,
24322 rtx max_size_exp, rtx probable_max_size_exp,
24323 bool issetmem)
24324 {
24325 rtx destreg;
24326 rtx srcreg = NULL;
24327 rtx_code_label *label = NULL;
24328 rtx tmp;
24329 rtx_code_label *jump_around_label = NULL;
24330 HOST_WIDE_INT align = 1;
24331 unsigned HOST_WIDE_INT count = 0;
24332 HOST_WIDE_INT expected_size = -1;
24333 int size_needed = 0, epilogue_size_needed;
24334 int desired_align = 0, align_bytes = 0;
24335 enum stringop_alg alg;
24336 rtx promoted_val = NULL;
24337 rtx vec_promoted_val = NULL;
24338 bool force_loopy_epilogue = false;
24339 int dynamic_check;
24340 bool need_zero_guard = false;
24341 bool noalign;
24342 enum machine_mode move_mode = VOIDmode;
24343 int unroll_factor = 1;
24344 /* TODO: Once value ranges are available, fill in proper data. */
24345 unsigned HOST_WIDE_INT min_size = 0;
24346 unsigned HOST_WIDE_INT max_size = -1;
24347 unsigned HOST_WIDE_INT probable_max_size = -1;
24348 bool misaligned_prologue_used = false;
24349
24350 if (CONST_INT_P (align_exp))
24351 align = INTVAL (align_exp);
24352 /* i386 can do misaligned access on reasonably increased cost. */
24353 if (CONST_INT_P (expected_align_exp)
24354 && INTVAL (expected_align_exp) > align)
24355 align = INTVAL (expected_align_exp);
24356 /* ALIGN is the minimum of destination and source alignment, but we care here
24357 just about destination alignment. */
24358 else if (!issetmem
24359 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24360 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24361
24362 if (CONST_INT_P (count_exp))
24363 {
24364 min_size = max_size = probable_max_size = count = expected_size
24365 = INTVAL (count_exp);
24366 /* When COUNT is 0, there is nothing to do. */
24367 if (!count)
24368 return true;
24369 }
24370 else
24371 {
24372 if (min_size_exp)
24373 min_size = INTVAL (min_size_exp);
24374 if (max_size_exp)
24375 max_size = INTVAL (max_size_exp);
24376 if (probable_max_size_exp)
24377 probable_max_size = INTVAL (probable_max_size_exp);
24378 if (CONST_INT_P (expected_size_exp))
24379 expected_size = INTVAL (expected_size_exp);
24380 }
24381
24382 /* Make sure we don't need to care about overflow later on. */
24383 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24384 return false;
24385
24386 /* Step 0: Decide on preferred algorithm, desired alignment and
24387 size of chunks to be copied by main loop. */
24388 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24389 issetmem,
24390 issetmem && val_exp == const0_rtx,
24391 &dynamic_check, &noalign);
24392 if (alg == libcall)
24393 return false;
24394 gcc_assert (alg != no_stringop);
24395
24396 /* For now vector-version of memset is generated only for memory zeroing, as
24397 creating of promoted vector value is very cheap in this case. */
24398 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24399 alg = unrolled_loop;
24400
24401 if (!count)
24402 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24403 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24404 if (!issetmem)
24405 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24406
24407 unroll_factor = 1;
24408 move_mode = word_mode;
24409 switch (alg)
24410 {
24411 case libcall:
24412 case no_stringop:
24413 case last_alg:
24414 gcc_unreachable ();
24415 case loop_1_byte:
24416 need_zero_guard = true;
24417 move_mode = QImode;
24418 break;
24419 case loop:
24420 need_zero_guard = true;
24421 break;
24422 case unrolled_loop:
24423 need_zero_guard = true;
24424 unroll_factor = (TARGET_64BIT ? 4 : 2);
24425 break;
24426 case vector_loop:
24427 need_zero_guard = true;
24428 unroll_factor = 4;
24429 /* Find the widest supported mode. */
24430 move_mode = word_mode;
24431 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24432 != CODE_FOR_nothing)
24433 move_mode = GET_MODE_WIDER_MODE (move_mode);
24434
24435 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24436 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24437 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24438 {
24439 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24440 move_mode = mode_for_vector (word_mode, nunits);
24441 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24442 move_mode = word_mode;
24443 }
24444 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24445 break;
24446 case rep_prefix_8_byte:
24447 move_mode = DImode;
24448 break;
24449 case rep_prefix_4_byte:
24450 move_mode = SImode;
24451 break;
24452 case rep_prefix_1_byte:
24453 move_mode = QImode;
24454 break;
24455 }
24456 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24457 epilogue_size_needed = size_needed;
24458
24459 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24460 if (!TARGET_ALIGN_STRINGOPS || noalign)
24461 align = desired_align;
24462
24463 /* Step 1: Prologue guard. */
24464
24465 /* Alignment code needs count to be in register. */
24466 if (CONST_INT_P (count_exp) && desired_align > align)
24467 {
24468 if (INTVAL (count_exp) > desired_align
24469 && INTVAL (count_exp) > size_needed)
24470 {
24471 align_bytes
24472 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24473 if (align_bytes <= 0)
24474 align_bytes = 0;
24475 else
24476 align_bytes = desired_align - align_bytes;
24477 }
24478 if (align_bytes == 0)
24479 count_exp = force_reg (counter_mode (count_exp), count_exp);
24480 }
24481 gcc_assert (desired_align >= 1 && align >= 1);
24482
24483 /* Misaligned move sequences handle both prologue and epilogue at once.
24484 Default code generation results in a smaller code for large alignments
24485 and also avoids redundant job when sizes are known precisely. */
24486 misaligned_prologue_used
24487 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24488 && MAX (desired_align, epilogue_size_needed) <= 32
24489 && desired_align <= epilogue_size_needed
24490 && ((desired_align > align && !align_bytes)
24491 || (!count && epilogue_size_needed > 1)));
24492
24493 /* Do the cheap promotion to allow better CSE across the
24494 main loop and epilogue (ie one load of the big constant in the
24495 front of all code.
24496 For now the misaligned move sequences do not have fast path
24497 without broadcasting. */
24498 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24499 {
24500 if (alg == vector_loop)
24501 {
24502 gcc_assert (val_exp == const0_rtx);
24503 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24504 promoted_val = promote_duplicated_reg_to_size (val_exp,
24505 GET_MODE_SIZE (word_mode),
24506 desired_align, align);
24507 }
24508 else
24509 {
24510 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24511 desired_align, align);
24512 }
24513 }
24514 /* Misaligned move sequences handles both prologues and epilogues at once.
24515 Default code generation results in smaller code for large alignments and
24516 also avoids redundant job when sizes are known precisely. */
24517 if (misaligned_prologue_used)
24518 {
24519 /* Misaligned move prologue handled small blocks by itself. */
24520 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24521 (dst, src, &destreg, &srcreg,
24522 move_mode, promoted_val, vec_promoted_val,
24523 &count_exp,
24524 &jump_around_label,
24525 desired_align < align
24526 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24527 desired_align, align, &min_size, dynamic_check, issetmem);
24528 if (!issetmem)
24529 src = change_address (src, BLKmode, srcreg);
24530 dst = change_address (dst, BLKmode, destreg);
24531 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24532 epilogue_size_needed = 0;
24533 if (need_zero_guard && !min_size)
24534 {
24535 /* It is possible that we copied enough so the main loop will not
24536 execute. */
24537 gcc_assert (size_needed > 1);
24538 if (jump_around_label == NULL_RTX)
24539 jump_around_label = gen_label_rtx ();
24540 emit_cmp_and_jump_insns (count_exp,
24541 GEN_INT (size_needed),
24542 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24543 if (expected_size == -1
24544 || expected_size < (desired_align - align) / 2 + size_needed)
24545 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24546 else
24547 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24548 }
24549 }
24550 /* Ensure that alignment prologue won't copy past end of block. */
24551 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24552 {
24553 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24554 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24555 Make sure it is power of 2. */
24556 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24557
24558 /* To improve performance of small blocks, we jump around the VAL
24559 promoting mode. This mean that if the promoted VAL is not constant,
24560 we might not use it in the epilogue and have to use byte
24561 loop variant. */
24562 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24563 force_loopy_epilogue = true;
24564 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24565 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24566 {
24567 /* If main algorithm works on QImode, no epilogue is needed.
24568 For small sizes just don't align anything. */
24569 if (size_needed == 1)
24570 desired_align = align;
24571 else
24572 goto epilogue;
24573 }
24574 else if (!count
24575 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24576 {
24577 label = gen_label_rtx ();
24578 emit_cmp_and_jump_insns (count_exp,
24579 GEN_INT (epilogue_size_needed),
24580 LTU, 0, counter_mode (count_exp), 1, label);
24581 if (expected_size == -1 || expected_size < epilogue_size_needed)
24582 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24583 else
24584 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24585 }
24586 }
24587
24588 /* Emit code to decide on runtime whether library call or inline should be
24589 used. */
24590 if (dynamic_check != -1)
24591 {
24592 if (!issetmem && CONST_INT_P (count_exp))
24593 {
24594 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24595 {
24596 emit_block_move_via_libcall (dst, src, count_exp, false);
24597 count_exp = const0_rtx;
24598 goto epilogue;
24599 }
24600 }
24601 else
24602 {
24603 rtx_code_label *hot_label = gen_label_rtx ();
24604 if (jump_around_label == NULL_RTX)
24605 jump_around_label = gen_label_rtx ();
24606 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24607 LEU, 0, counter_mode (count_exp),
24608 1, hot_label);
24609 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24610 if (issetmem)
24611 set_storage_via_libcall (dst, count_exp, val_exp, false);
24612 else
24613 emit_block_move_via_libcall (dst, src, count_exp, false);
24614 emit_jump (jump_around_label);
24615 emit_label (hot_label);
24616 }
24617 }
24618
24619 /* Step 2: Alignment prologue. */
24620 /* Do the expensive promotion once we branched off the small blocks. */
24621 if (issetmem && !promoted_val)
24622 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24623 desired_align, align);
24624
24625 if (desired_align > align && !misaligned_prologue_used)
24626 {
24627 if (align_bytes == 0)
24628 {
24629 /* Except for the first move in prologue, we no longer know
24630 constant offset in aliasing info. It don't seems to worth
24631 the pain to maintain it for the first move, so throw away
24632 the info early. */
24633 dst = change_address (dst, BLKmode, destreg);
24634 if (!issetmem)
24635 src = change_address (src, BLKmode, srcreg);
24636 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24637 promoted_val, vec_promoted_val,
24638 count_exp, align, desired_align,
24639 issetmem);
24640 /* At most desired_align - align bytes are copied. */
24641 if (min_size < (unsigned)(desired_align - align))
24642 min_size = 0;
24643 else
24644 min_size -= desired_align - align;
24645 }
24646 else
24647 {
24648 /* If we know how many bytes need to be stored before dst is
24649 sufficiently aligned, maintain aliasing info accurately. */
24650 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24651 srcreg,
24652 promoted_val,
24653 vec_promoted_val,
24654 desired_align,
24655 align_bytes,
24656 issetmem);
24657
24658 count_exp = plus_constant (counter_mode (count_exp),
24659 count_exp, -align_bytes);
24660 count -= align_bytes;
24661 min_size -= align_bytes;
24662 max_size -= align_bytes;
24663 }
24664 if (need_zero_guard
24665 && !min_size
24666 && (count < (unsigned HOST_WIDE_INT) size_needed
24667 || (align_bytes == 0
24668 && count < ((unsigned HOST_WIDE_INT) size_needed
24669 + desired_align - align))))
24670 {
24671 /* It is possible that we copied enough so the main loop will not
24672 execute. */
24673 gcc_assert (size_needed > 1);
24674 if (label == NULL_RTX)
24675 label = gen_label_rtx ();
24676 emit_cmp_and_jump_insns (count_exp,
24677 GEN_INT (size_needed),
24678 LTU, 0, counter_mode (count_exp), 1, label);
24679 if (expected_size == -1
24680 || expected_size < (desired_align - align) / 2 + size_needed)
24681 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24682 else
24683 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24684 }
24685 }
24686 if (label && size_needed == 1)
24687 {
24688 emit_label (label);
24689 LABEL_NUSES (label) = 1;
24690 label = NULL;
24691 epilogue_size_needed = 1;
24692 if (issetmem)
24693 promoted_val = val_exp;
24694 }
24695 else if (label == NULL_RTX && !misaligned_prologue_used)
24696 epilogue_size_needed = size_needed;
24697
24698 /* Step 3: Main loop. */
24699
24700 switch (alg)
24701 {
24702 case libcall:
24703 case no_stringop:
24704 case last_alg:
24705 gcc_unreachable ();
24706 case loop_1_byte:
24707 case loop:
24708 case unrolled_loop:
24709 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24710 count_exp, move_mode, unroll_factor,
24711 expected_size, issetmem);
24712 break;
24713 case vector_loop:
24714 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24715 vec_promoted_val, count_exp, move_mode,
24716 unroll_factor, expected_size, issetmem);
24717 break;
24718 case rep_prefix_8_byte:
24719 case rep_prefix_4_byte:
24720 case rep_prefix_1_byte:
24721 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24722 val_exp, count_exp, move_mode, issetmem);
24723 break;
24724 }
24725 /* Adjust properly the offset of src and dest memory for aliasing. */
24726 if (CONST_INT_P (count_exp))
24727 {
24728 if (!issetmem)
24729 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24730 (count / size_needed) * size_needed);
24731 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24732 (count / size_needed) * size_needed);
24733 }
24734 else
24735 {
24736 if (!issetmem)
24737 src = change_address (src, BLKmode, srcreg);
24738 dst = change_address (dst, BLKmode, destreg);
24739 }
24740
24741 /* Step 4: Epilogue to copy the remaining bytes. */
24742 epilogue:
24743 if (label)
24744 {
24745 /* When the main loop is done, COUNT_EXP might hold original count,
24746 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24747 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24748 bytes. Compensate if needed. */
24749
24750 if (size_needed < epilogue_size_needed)
24751 {
24752 tmp =
24753 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24754 GEN_INT (size_needed - 1), count_exp, 1,
24755 OPTAB_DIRECT);
24756 if (tmp != count_exp)
24757 emit_move_insn (count_exp, tmp);
24758 }
24759 emit_label (label);
24760 LABEL_NUSES (label) = 1;
24761 }
24762
24763 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24764 {
24765 if (force_loopy_epilogue)
24766 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24767 epilogue_size_needed);
24768 else
24769 {
24770 if (issetmem)
24771 expand_setmem_epilogue (dst, destreg, promoted_val,
24772 vec_promoted_val, count_exp,
24773 epilogue_size_needed);
24774 else
24775 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24776 epilogue_size_needed);
24777 }
24778 }
24779 if (jump_around_label)
24780 emit_label (jump_around_label);
24781 return true;
24782 }
24783
24784
24785 /* Expand the appropriate insns for doing strlen if not just doing
24786 repnz; scasb
24787
24788 out = result, initialized with the start address
24789 align_rtx = alignment of the address.
24790 scratch = scratch register, initialized with the startaddress when
24791 not aligned, otherwise undefined
24792
24793 This is just the body. It needs the initializations mentioned above and
24794 some address computing at the end. These things are done in i386.md. */
24795
24796 static void
24797 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24798 {
24799 int align;
24800 rtx tmp;
24801 rtx_code_label *align_2_label = NULL;
24802 rtx_code_label *align_3_label = NULL;
24803 rtx_code_label *align_4_label = gen_label_rtx ();
24804 rtx_code_label *end_0_label = gen_label_rtx ();
24805 rtx mem;
24806 rtx tmpreg = gen_reg_rtx (SImode);
24807 rtx scratch = gen_reg_rtx (SImode);
24808 rtx cmp;
24809
24810 align = 0;
24811 if (CONST_INT_P (align_rtx))
24812 align = INTVAL (align_rtx);
24813
24814 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24815
24816 /* Is there a known alignment and is it less than 4? */
24817 if (align < 4)
24818 {
24819 rtx scratch1 = gen_reg_rtx (Pmode);
24820 emit_move_insn (scratch1, out);
24821 /* Is there a known alignment and is it not 2? */
24822 if (align != 2)
24823 {
24824 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24825 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24826
24827 /* Leave just the 3 lower bits. */
24828 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24829 NULL_RTX, 0, OPTAB_WIDEN);
24830
24831 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24832 Pmode, 1, align_4_label);
24833 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24834 Pmode, 1, align_2_label);
24835 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24836 Pmode, 1, align_3_label);
24837 }
24838 else
24839 {
24840 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24841 check if is aligned to 4 - byte. */
24842
24843 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24844 NULL_RTX, 0, OPTAB_WIDEN);
24845
24846 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24847 Pmode, 1, align_4_label);
24848 }
24849
24850 mem = change_address (src, QImode, out);
24851
24852 /* Now compare the bytes. */
24853
24854 /* Compare the first n unaligned byte on a byte per byte basis. */
24855 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24856 QImode, 1, end_0_label);
24857
24858 /* Increment the address. */
24859 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24860
24861 /* Not needed with an alignment of 2 */
24862 if (align != 2)
24863 {
24864 emit_label (align_2_label);
24865
24866 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24867 end_0_label);
24868
24869 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24870
24871 emit_label (align_3_label);
24872 }
24873
24874 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24875 end_0_label);
24876
24877 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24878 }
24879
24880 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24881 align this loop. It gives only huge programs, but does not help to
24882 speed up. */
24883 emit_label (align_4_label);
24884
24885 mem = change_address (src, SImode, out);
24886 emit_move_insn (scratch, mem);
24887 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24888
24889 /* This formula yields a nonzero result iff one of the bytes is zero.
24890 This saves three branches inside loop and many cycles. */
24891
24892 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24893 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24894 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24895 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24896 gen_int_mode (0x80808080, SImode)));
24897 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24898 align_4_label);
24899
24900 if (TARGET_CMOVE)
24901 {
24902 rtx reg = gen_reg_rtx (SImode);
24903 rtx reg2 = gen_reg_rtx (Pmode);
24904 emit_move_insn (reg, tmpreg);
24905 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24906
24907 /* If zero is not in the first two bytes, move two bytes forward. */
24908 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24909 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24910 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24911 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24912 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24913 reg,
24914 tmpreg)));
24915 /* Emit lea manually to avoid clobbering of flags. */
24916 emit_insn (gen_rtx_SET (SImode, reg2,
24917 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24918
24919 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24920 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24921 emit_insn (gen_rtx_SET (VOIDmode, out,
24922 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24923 reg2,
24924 out)));
24925 }
24926 else
24927 {
24928 rtx_code_label *end_2_label = gen_label_rtx ();
24929 /* Is zero in the first two bytes? */
24930
24931 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24932 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24933 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24934 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24935 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24936 pc_rtx);
24937 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24938 JUMP_LABEL (tmp) = end_2_label;
24939
24940 /* Not in the first two. Move two bytes forward. */
24941 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24942 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24943
24944 emit_label (end_2_label);
24945
24946 }
24947
24948 /* Avoid branch in fixing the byte. */
24949 tmpreg = gen_lowpart (QImode, tmpreg);
24950 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24951 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24952 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24953 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24954
24955 emit_label (end_0_label);
24956 }
24957
24958 /* Expand strlen. */
24959
24960 bool
24961 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24962 {
24963 rtx addr, scratch1, scratch2, scratch3, scratch4;
24964
24965 /* The generic case of strlen expander is long. Avoid it's
24966 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24967
24968 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24969 && !TARGET_INLINE_ALL_STRINGOPS
24970 && !optimize_insn_for_size_p ()
24971 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24972 return false;
24973
24974 addr = force_reg (Pmode, XEXP (src, 0));
24975 scratch1 = gen_reg_rtx (Pmode);
24976
24977 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24978 && !optimize_insn_for_size_p ())
24979 {
24980 /* Well it seems that some optimizer does not combine a call like
24981 foo(strlen(bar), strlen(bar));
24982 when the move and the subtraction is done here. It does calculate
24983 the length just once when these instructions are done inside of
24984 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24985 often used and I use one fewer register for the lifetime of
24986 output_strlen_unroll() this is better. */
24987
24988 emit_move_insn (out, addr);
24989
24990 ix86_expand_strlensi_unroll_1 (out, src, align);
24991
24992 /* strlensi_unroll_1 returns the address of the zero at the end of
24993 the string, like memchr(), so compute the length by subtracting
24994 the start address. */
24995 emit_insn (ix86_gen_sub3 (out, out, addr));
24996 }
24997 else
24998 {
24999 rtx unspec;
25000
25001 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25002 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25003 return false;
25004
25005 scratch2 = gen_reg_rtx (Pmode);
25006 scratch3 = gen_reg_rtx (Pmode);
25007 scratch4 = force_reg (Pmode, constm1_rtx);
25008
25009 emit_move_insn (scratch3, addr);
25010 eoschar = force_reg (QImode, eoschar);
25011
25012 src = replace_equiv_address_nv (src, scratch3);
25013
25014 /* If .md starts supporting :P, this can be done in .md. */
25015 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25016 scratch4), UNSPEC_SCAS);
25017 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25018 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25019 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25020 }
25021 return true;
25022 }
25023
25024 /* For given symbol (function) construct code to compute address of it's PLT
25025 entry in large x86-64 PIC model. */
25026 static rtx
25027 construct_plt_address (rtx symbol)
25028 {
25029 rtx tmp, unspec;
25030
25031 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25032 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25033 gcc_assert (Pmode == DImode);
25034
25035 tmp = gen_reg_rtx (Pmode);
25036 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25037
25038 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25039 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25040 return tmp;
25041 }
25042
25043 rtx
25044 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25045 rtx callarg2,
25046 rtx pop, bool sibcall)
25047 {
25048 rtx vec[3];
25049 rtx use = NULL, call;
25050 unsigned int vec_len = 0;
25051
25052 if (pop == const0_rtx)
25053 pop = NULL;
25054 gcc_assert (!TARGET_64BIT || !pop);
25055
25056 if (TARGET_MACHO && !TARGET_64BIT)
25057 {
25058 #if TARGET_MACHO
25059 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25060 fnaddr = machopic_indirect_call_target (fnaddr);
25061 #endif
25062 }
25063 else
25064 {
25065 /* Static functions and indirect calls don't need the pic register. */
25066 if (flag_pic
25067 && (!TARGET_64BIT
25068 || (ix86_cmodel == CM_LARGE_PIC
25069 && DEFAULT_ABI != MS_ABI))
25070 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25071 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25072 {
25073 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25074 if (ix86_use_pseudo_pic_reg ())
25075 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25076 pic_offset_table_rtx);
25077 }
25078 }
25079
25080 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25081 {
25082 rtx al = gen_rtx_REG (QImode, AX_REG);
25083 emit_move_insn (al, callarg2);
25084 use_reg (&use, al);
25085 }
25086
25087 if (ix86_cmodel == CM_LARGE_PIC
25088 && !TARGET_PECOFF
25089 && MEM_P (fnaddr)
25090 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25091 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25092 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25093 else if (sibcall
25094 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25095 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25096 {
25097 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25098 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25099 }
25100
25101 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25102 if (retval)
25103 call = gen_rtx_SET (VOIDmode, retval, call);
25104 vec[vec_len++] = call;
25105
25106 if (pop)
25107 {
25108 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25109 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25110 vec[vec_len++] = pop;
25111 }
25112
25113 if (TARGET_64BIT_MS_ABI
25114 && (!callarg2 || INTVAL (callarg2) != -2))
25115 {
25116 int const cregs_size
25117 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25118 int i;
25119
25120 for (i = 0; i < cregs_size; i++)
25121 {
25122 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25123 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25124
25125 clobber_reg (&use, gen_rtx_REG (mode, regno));
25126 }
25127 }
25128
25129 if (vec_len > 1)
25130 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25131 call = emit_call_insn (call);
25132 if (use)
25133 CALL_INSN_FUNCTION_USAGE (call) = use;
25134
25135 return call;
25136 }
25137
25138 /* Output the assembly for a call instruction. */
25139
25140 const char *
25141 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25142 {
25143 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25144 bool seh_nop_p = false;
25145 const char *xasm;
25146
25147 if (SIBLING_CALL_P (insn))
25148 {
25149 if (direct_p)
25150 xasm = "jmp\t%P0";
25151 /* SEH epilogue detection requires the indirect branch case
25152 to include REX.W. */
25153 else if (TARGET_SEH)
25154 xasm = "rex.W jmp %A0";
25155 else
25156 xasm = "jmp\t%A0";
25157
25158 output_asm_insn (xasm, &call_op);
25159 return "";
25160 }
25161
25162 /* SEH unwinding can require an extra nop to be emitted in several
25163 circumstances. Determine if we have one of those. */
25164 if (TARGET_SEH)
25165 {
25166 rtx_insn *i;
25167
25168 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25169 {
25170 /* If we get to another real insn, we don't need the nop. */
25171 if (INSN_P (i))
25172 break;
25173
25174 /* If we get to the epilogue note, prevent a catch region from
25175 being adjacent to the standard epilogue sequence. If non-
25176 call-exceptions, we'll have done this during epilogue emission. */
25177 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25178 && !flag_non_call_exceptions
25179 && !can_throw_internal (insn))
25180 {
25181 seh_nop_p = true;
25182 break;
25183 }
25184 }
25185
25186 /* If we didn't find a real insn following the call, prevent the
25187 unwinder from looking into the next function. */
25188 if (i == NULL)
25189 seh_nop_p = true;
25190 }
25191
25192 if (direct_p)
25193 xasm = "call\t%P0";
25194 else
25195 xasm = "call\t%A0";
25196
25197 output_asm_insn (xasm, &call_op);
25198
25199 if (seh_nop_p)
25200 return "nop";
25201
25202 return "";
25203 }
25204 \f
25205 /* Clear stack slot assignments remembered from previous functions.
25206 This is called from INIT_EXPANDERS once before RTL is emitted for each
25207 function. */
25208
25209 static struct machine_function *
25210 ix86_init_machine_status (void)
25211 {
25212 struct machine_function *f;
25213
25214 f = ggc_cleared_alloc<machine_function> ();
25215 f->use_fast_prologue_epilogue_nregs = -1;
25216 f->call_abi = ix86_abi;
25217
25218 return f;
25219 }
25220
25221 /* Return a MEM corresponding to a stack slot with mode MODE.
25222 Allocate a new slot if necessary.
25223
25224 The RTL for a function can have several slots available: N is
25225 which slot to use. */
25226
25227 rtx
25228 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25229 {
25230 struct stack_local_entry *s;
25231
25232 gcc_assert (n < MAX_386_STACK_LOCALS);
25233
25234 for (s = ix86_stack_locals; s; s = s->next)
25235 if (s->mode == mode && s->n == n)
25236 return validize_mem (copy_rtx (s->rtl));
25237
25238 s = ggc_alloc<stack_local_entry> ();
25239 s->n = n;
25240 s->mode = mode;
25241 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25242
25243 s->next = ix86_stack_locals;
25244 ix86_stack_locals = s;
25245 return validize_mem (copy_rtx (s->rtl));
25246 }
25247
25248 static void
25249 ix86_instantiate_decls (void)
25250 {
25251 struct stack_local_entry *s;
25252
25253 for (s = ix86_stack_locals; s; s = s->next)
25254 if (s->rtl != NULL_RTX)
25255 instantiate_decl_rtl (s->rtl);
25256 }
25257 \f
25258 /* Check whether x86 address PARTS is a pc-relative address. */
25259
25260 static bool
25261 rip_relative_addr_p (struct ix86_address *parts)
25262 {
25263 rtx base, index, disp;
25264
25265 base = parts->base;
25266 index = parts->index;
25267 disp = parts->disp;
25268
25269 if (disp && !base && !index)
25270 {
25271 if (TARGET_64BIT)
25272 {
25273 rtx symbol = disp;
25274
25275 if (GET_CODE (disp) == CONST)
25276 symbol = XEXP (disp, 0);
25277 if (GET_CODE (symbol) == PLUS
25278 && CONST_INT_P (XEXP (symbol, 1)))
25279 symbol = XEXP (symbol, 0);
25280
25281 if (GET_CODE (symbol) == LABEL_REF
25282 || (GET_CODE (symbol) == SYMBOL_REF
25283 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25284 || (GET_CODE (symbol) == UNSPEC
25285 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25286 || XINT (symbol, 1) == UNSPEC_PCREL
25287 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25288 return true;
25289 }
25290 }
25291 return false;
25292 }
25293
25294 /* Calculate the length of the memory address in the instruction encoding.
25295 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25296 or other prefixes. We never generate addr32 prefix for LEA insn. */
25297
25298 int
25299 memory_address_length (rtx addr, bool lea)
25300 {
25301 struct ix86_address parts;
25302 rtx base, index, disp;
25303 int len;
25304 int ok;
25305
25306 if (GET_CODE (addr) == PRE_DEC
25307 || GET_CODE (addr) == POST_INC
25308 || GET_CODE (addr) == PRE_MODIFY
25309 || GET_CODE (addr) == POST_MODIFY)
25310 return 0;
25311
25312 ok = ix86_decompose_address (addr, &parts);
25313 gcc_assert (ok);
25314
25315 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25316
25317 /* If this is not LEA instruction, add the length of addr32 prefix. */
25318 if (TARGET_64BIT && !lea
25319 && (SImode_address_operand (addr, VOIDmode)
25320 || (parts.base && GET_MODE (parts.base) == SImode)
25321 || (parts.index && GET_MODE (parts.index) == SImode)))
25322 len++;
25323
25324 base = parts.base;
25325 index = parts.index;
25326 disp = parts.disp;
25327
25328 if (base && GET_CODE (base) == SUBREG)
25329 base = SUBREG_REG (base);
25330 if (index && GET_CODE (index) == SUBREG)
25331 index = SUBREG_REG (index);
25332
25333 gcc_assert (base == NULL_RTX || REG_P (base));
25334 gcc_assert (index == NULL_RTX || REG_P (index));
25335
25336 /* Rule of thumb:
25337 - esp as the base always wants an index,
25338 - ebp as the base always wants a displacement,
25339 - r12 as the base always wants an index,
25340 - r13 as the base always wants a displacement. */
25341
25342 /* Register Indirect. */
25343 if (base && !index && !disp)
25344 {
25345 /* esp (for its index) and ebp (for its displacement) need
25346 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25347 code. */
25348 if (base == arg_pointer_rtx
25349 || base == frame_pointer_rtx
25350 || REGNO (base) == SP_REG
25351 || REGNO (base) == BP_REG
25352 || REGNO (base) == R12_REG
25353 || REGNO (base) == R13_REG)
25354 len++;
25355 }
25356
25357 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25358 is not disp32, but disp32(%rip), so for disp32
25359 SIB byte is needed, unless print_operand_address
25360 optimizes it into disp32(%rip) or (%rip) is implied
25361 by UNSPEC. */
25362 else if (disp && !base && !index)
25363 {
25364 len += 4;
25365 if (rip_relative_addr_p (&parts))
25366 len++;
25367 }
25368 else
25369 {
25370 /* Find the length of the displacement constant. */
25371 if (disp)
25372 {
25373 if (base && satisfies_constraint_K (disp))
25374 len += 1;
25375 else
25376 len += 4;
25377 }
25378 /* ebp always wants a displacement. Similarly r13. */
25379 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25380 len++;
25381
25382 /* An index requires the two-byte modrm form.... */
25383 if (index
25384 /* ...like esp (or r12), which always wants an index. */
25385 || base == arg_pointer_rtx
25386 || base == frame_pointer_rtx
25387 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25388 len++;
25389 }
25390
25391 return len;
25392 }
25393
25394 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25395 is set, expect that insn have 8bit immediate alternative. */
25396 int
25397 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25398 {
25399 int len = 0;
25400 int i;
25401 extract_insn_cached (insn);
25402 for (i = recog_data.n_operands - 1; i >= 0; --i)
25403 if (CONSTANT_P (recog_data.operand[i]))
25404 {
25405 enum attr_mode mode = get_attr_mode (insn);
25406
25407 gcc_assert (!len);
25408 if (shortform && CONST_INT_P (recog_data.operand[i]))
25409 {
25410 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25411 switch (mode)
25412 {
25413 case MODE_QI:
25414 len = 1;
25415 continue;
25416 case MODE_HI:
25417 ival = trunc_int_for_mode (ival, HImode);
25418 break;
25419 case MODE_SI:
25420 ival = trunc_int_for_mode (ival, SImode);
25421 break;
25422 default:
25423 break;
25424 }
25425 if (IN_RANGE (ival, -128, 127))
25426 {
25427 len = 1;
25428 continue;
25429 }
25430 }
25431 switch (mode)
25432 {
25433 case MODE_QI:
25434 len = 1;
25435 break;
25436 case MODE_HI:
25437 len = 2;
25438 break;
25439 case MODE_SI:
25440 len = 4;
25441 break;
25442 /* Immediates for DImode instructions are encoded
25443 as 32bit sign extended values. */
25444 case MODE_DI:
25445 len = 4;
25446 break;
25447 default:
25448 fatal_insn ("unknown insn mode", insn);
25449 }
25450 }
25451 return len;
25452 }
25453
25454 /* Compute default value for "length_address" attribute. */
25455 int
25456 ix86_attr_length_address_default (rtx_insn *insn)
25457 {
25458 int i;
25459
25460 if (get_attr_type (insn) == TYPE_LEA)
25461 {
25462 rtx set = PATTERN (insn), addr;
25463
25464 if (GET_CODE (set) == PARALLEL)
25465 set = XVECEXP (set, 0, 0);
25466
25467 gcc_assert (GET_CODE (set) == SET);
25468
25469 addr = SET_SRC (set);
25470
25471 return memory_address_length (addr, true);
25472 }
25473
25474 extract_insn_cached (insn);
25475 for (i = recog_data.n_operands - 1; i >= 0; --i)
25476 if (MEM_P (recog_data.operand[i]))
25477 {
25478 constrain_operands_cached (insn, reload_completed);
25479 if (which_alternative != -1)
25480 {
25481 const char *constraints = recog_data.constraints[i];
25482 int alt = which_alternative;
25483
25484 while (*constraints == '=' || *constraints == '+')
25485 constraints++;
25486 while (alt-- > 0)
25487 while (*constraints++ != ',')
25488 ;
25489 /* Skip ignored operands. */
25490 if (*constraints == 'X')
25491 continue;
25492 }
25493 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25494 }
25495 return 0;
25496 }
25497
25498 /* Compute default value for "length_vex" attribute. It includes
25499 2 or 3 byte VEX prefix and 1 opcode byte. */
25500
25501 int
25502 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25503 bool has_vex_w)
25504 {
25505 int i;
25506
25507 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25508 byte VEX prefix. */
25509 if (!has_0f_opcode || has_vex_w)
25510 return 3 + 1;
25511
25512 /* We can always use 2 byte VEX prefix in 32bit. */
25513 if (!TARGET_64BIT)
25514 return 2 + 1;
25515
25516 extract_insn_cached (insn);
25517
25518 for (i = recog_data.n_operands - 1; i >= 0; --i)
25519 if (REG_P (recog_data.operand[i]))
25520 {
25521 /* REX.W bit uses 3 byte VEX prefix. */
25522 if (GET_MODE (recog_data.operand[i]) == DImode
25523 && GENERAL_REG_P (recog_data.operand[i]))
25524 return 3 + 1;
25525 }
25526 else
25527 {
25528 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25529 if (MEM_P (recog_data.operand[i])
25530 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25531 return 3 + 1;
25532 }
25533
25534 return 2 + 1;
25535 }
25536 \f
25537 /* Return the maximum number of instructions a cpu can issue. */
25538
25539 static int
25540 ix86_issue_rate (void)
25541 {
25542 switch (ix86_tune)
25543 {
25544 case PROCESSOR_PENTIUM:
25545 case PROCESSOR_BONNELL:
25546 case PROCESSOR_SILVERMONT:
25547 case PROCESSOR_INTEL:
25548 case PROCESSOR_K6:
25549 case PROCESSOR_BTVER2:
25550 case PROCESSOR_PENTIUM4:
25551 case PROCESSOR_NOCONA:
25552 return 2;
25553
25554 case PROCESSOR_PENTIUMPRO:
25555 case PROCESSOR_ATHLON:
25556 case PROCESSOR_K8:
25557 case PROCESSOR_AMDFAM10:
25558 case PROCESSOR_GENERIC:
25559 case PROCESSOR_BTVER1:
25560 return 3;
25561
25562 case PROCESSOR_BDVER1:
25563 case PROCESSOR_BDVER2:
25564 case PROCESSOR_BDVER3:
25565 case PROCESSOR_BDVER4:
25566 case PROCESSOR_CORE2:
25567 case PROCESSOR_NEHALEM:
25568 case PROCESSOR_SANDYBRIDGE:
25569 case PROCESSOR_HASWELL:
25570 return 4;
25571
25572 default:
25573 return 1;
25574 }
25575 }
25576
25577 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25578 by DEP_INSN and nothing set by DEP_INSN. */
25579
25580 static bool
25581 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25582 {
25583 rtx set, set2;
25584
25585 /* Simplify the test for uninteresting insns. */
25586 if (insn_type != TYPE_SETCC
25587 && insn_type != TYPE_ICMOV
25588 && insn_type != TYPE_FCMOV
25589 && insn_type != TYPE_IBR)
25590 return false;
25591
25592 if ((set = single_set (dep_insn)) != 0)
25593 {
25594 set = SET_DEST (set);
25595 set2 = NULL_RTX;
25596 }
25597 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25598 && XVECLEN (PATTERN (dep_insn), 0) == 2
25599 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25600 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25601 {
25602 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25603 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25604 }
25605 else
25606 return false;
25607
25608 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25609 return false;
25610
25611 /* This test is true if the dependent insn reads the flags but
25612 not any other potentially set register. */
25613 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25614 return false;
25615
25616 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25617 return false;
25618
25619 return true;
25620 }
25621
25622 /* Return true iff USE_INSN has a memory address with operands set by
25623 SET_INSN. */
25624
25625 bool
25626 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25627 {
25628 int i;
25629 extract_insn_cached (use_insn);
25630 for (i = recog_data.n_operands - 1; i >= 0; --i)
25631 if (MEM_P (recog_data.operand[i]))
25632 {
25633 rtx addr = XEXP (recog_data.operand[i], 0);
25634 return modified_in_p (addr, set_insn) != 0;
25635 }
25636 return false;
25637 }
25638
25639 /* Helper function for exact_store_load_dependency.
25640 Return true if addr is found in insn. */
25641 static bool
25642 exact_dependency_1 (rtx addr, rtx insn)
25643 {
25644 enum rtx_code code;
25645 const char *format_ptr;
25646 int i, j;
25647
25648 code = GET_CODE (insn);
25649 switch (code)
25650 {
25651 case MEM:
25652 if (rtx_equal_p (addr, insn))
25653 return true;
25654 break;
25655 case REG:
25656 CASE_CONST_ANY:
25657 case SYMBOL_REF:
25658 case CODE_LABEL:
25659 case PC:
25660 case CC0:
25661 case EXPR_LIST:
25662 return false;
25663 default:
25664 break;
25665 }
25666
25667 format_ptr = GET_RTX_FORMAT (code);
25668 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25669 {
25670 switch (*format_ptr++)
25671 {
25672 case 'e':
25673 if (exact_dependency_1 (addr, XEXP (insn, i)))
25674 return true;
25675 break;
25676 case 'E':
25677 for (j = 0; j < XVECLEN (insn, i); j++)
25678 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25679 return true;
25680 break;
25681 }
25682 }
25683 return false;
25684 }
25685
25686 /* Return true if there exists exact dependency for store & load, i.e.
25687 the same memory address is used in them. */
25688 static bool
25689 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25690 {
25691 rtx set1, set2;
25692
25693 set1 = single_set (store);
25694 if (!set1)
25695 return false;
25696 if (!MEM_P (SET_DEST (set1)))
25697 return false;
25698 set2 = single_set (load);
25699 if (!set2)
25700 return false;
25701 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25702 return true;
25703 return false;
25704 }
25705
25706 static int
25707 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25708 {
25709 enum attr_type insn_type, dep_insn_type;
25710 enum attr_memory memory;
25711 rtx set, set2;
25712 int dep_insn_code_number;
25713
25714 /* Anti and output dependencies have zero cost on all CPUs. */
25715 if (REG_NOTE_KIND (link) != 0)
25716 return 0;
25717
25718 dep_insn_code_number = recog_memoized (dep_insn);
25719
25720 /* If we can't recognize the insns, we can't really do anything. */
25721 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25722 return cost;
25723
25724 insn_type = get_attr_type (insn);
25725 dep_insn_type = get_attr_type (dep_insn);
25726
25727 switch (ix86_tune)
25728 {
25729 case PROCESSOR_PENTIUM:
25730 /* Address Generation Interlock adds a cycle of latency. */
25731 if (insn_type == TYPE_LEA)
25732 {
25733 rtx addr = PATTERN (insn);
25734
25735 if (GET_CODE (addr) == PARALLEL)
25736 addr = XVECEXP (addr, 0, 0);
25737
25738 gcc_assert (GET_CODE (addr) == SET);
25739
25740 addr = SET_SRC (addr);
25741 if (modified_in_p (addr, dep_insn))
25742 cost += 1;
25743 }
25744 else if (ix86_agi_dependent (dep_insn, insn))
25745 cost += 1;
25746
25747 /* ??? Compares pair with jump/setcc. */
25748 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25749 cost = 0;
25750
25751 /* Floating point stores require value to be ready one cycle earlier. */
25752 if (insn_type == TYPE_FMOV
25753 && get_attr_memory (insn) == MEMORY_STORE
25754 && !ix86_agi_dependent (dep_insn, insn))
25755 cost += 1;
25756 break;
25757
25758 case PROCESSOR_PENTIUMPRO:
25759 /* INT->FP conversion is expensive. */
25760 if (get_attr_fp_int_src (dep_insn))
25761 cost += 5;
25762
25763 /* There is one cycle extra latency between an FP op and a store. */
25764 if (insn_type == TYPE_FMOV
25765 && (set = single_set (dep_insn)) != NULL_RTX
25766 && (set2 = single_set (insn)) != NULL_RTX
25767 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25768 && MEM_P (SET_DEST (set2)))
25769 cost += 1;
25770
25771 memory = get_attr_memory (insn);
25772
25773 /* Show ability of reorder buffer to hide latency of load by executing
25774 in parallel with previous instruction in case
25775 previous instruction is not needed to compute the address. */
25776 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25777 && !ix86_agi_dependent (dep_insn, insn))
25778 {
25779 /* Claim moves to take one cycle, as core can issue one load
25780 at time and the next load can start cycle later. */
25781 if (dep_insn_type == TYPE_IMOV
25782 || dep_insn_type == TYPE_FMOV)
25783 cost = 1;
25784 else if (cost > 1)
25785 cost--;
25786 }
25787 break;
25788
25789 case PROCESSOR_K6:
25790 /* The esp dependency is resolved before
25791 the instruction is really finished. */
25792 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25793 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25794 return 1;
25795
25796 /* INT->FP conversion is expensive. */
25797 if (get_attr_fp_int_src (dep_insn))
25798 cost += 5;
25799
25800 memory = get_attr_memory (insn);
25801
25802 /* Show ability of reorder buffer to hide latency of load by executing
25803 in parallel with previous instruction in case
25804 previous instruction is not needed to compute the address. */
25805 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25806 && !ix86_agi_dependent (dep_insn, insn))
25807 {
25808 /* Claim moves to take one cycle, as core can issue one load
25809 at time and the next load can start cycle later. */
25810 if (dep_insn_type == TYPE_IMOV
25811 || dep_insn_type == TYPE_FMOV)
25812 cost = 1;
25813 else if (cost > 2)
25814 cost -= 2;
25815 else
25816 cost = 1;
25817 }
25818 break;
25819
25820 case PROCESSOR_AMDFAM10:
25821 case PROCESSOR_BDVER1:
25822 case PROCESSOR_BDVER2:
25823 case PROCESSOR_BDVER3:
25824 case PROCESSOR_BDVER4:
25825 case PROCESSOR_BTVER1:
25826 case PROCESSOR_BTVER2:
25827 case PROCESSOR_GENERIC:
25828 /* Stack engine allows to execute push&pop instructions in parall. */
25829 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25830 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25831 return 0;
25832 /* FALLTHRU */
25833
25834 case PROCESSOR_ATHLON:
25835 case PROCESSOR_K8:
25836 memory = get_attr_memory (insn);
25837
25838 /* Show ability of reorder buffer to hide latency of load by executing
25839 in parallel with previous instruction in case
25840 previous instruction is not needed to compute the address. */
25841 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25842 && !ix86_agi_dependent (dep_insn, insn))
25843 {
25844 enum attr_unit unit = get_attr_unit (insn);
25845 int loadcost = 3;
25846
25847 /* Because of the difference between the length of integer and
25848 floating unit pipeline preparation stages, the memory operands
25849 for floating point are cheaper.
25850
25851 ??? For Athlon it the difference is most probably 2. */
25852 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25853 loadcost = 3;
25854 else
25855 loadcost = TARGET_ATHLON ? 2 : 0;
25856
25857 if (cost >= loadcost)
25858 cost -= loadcost;
25859 else
25860 cost = 0;
25861 }
25862 break;
25863
25864 case PROCESSOR_CORE2:
25865 case PROCESSOR_NEHALEM:
25866 case PROCESSOR_SANDYBRIDGE:
25867 case PROCESSOR_HASWELL:
25868 /* Stack engine allows to execute push&pop instructions in parall. */
25869 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25870 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25871 return 0;
25872
25873 memory = get_attr_memory (insn);
25874
25875 /* Show ability of reorder buffer to hide latency of load by executing
25876 in parallel with previous instruction in case
25877 previous instruction is not needed to compute the address. */
25878 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25879 && !ix86_agi_dependent (dep_insn, insn))
25880 {
25881 if (cost >= 4)
25882 cost -= 4;
25883 else
25884 cost = 0;
25885 }
25886 break;
25887
25888 case PROCESSOR_SILVERMONT:
25889 case PROCESSOR_INTEL:
25890 if (!reload_completed)
25891 return cost;
25892
25893 /* Increase cost of integer loads. */
25894 memory = get_attr_memory (dep_insn);
25895 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25896 {
25897 enum attr_unit unit = get_attr_unit (dep_insn);
25898 if (unit == UNIT_INTEGER && cost == 1)
25899 {
25900 if (memory == MEMORY_LOAD)
25901 cost = 3;
25902 else
25903 {
25904 /* Increase cost of ld/st for short int types only
25905 because of store forwarding issue. */
25906 rtx set = single_set (dep_insn);
25907 if (set && (GET_MODE (SET_DEST (set)) == QImode
25908 || GET_MODE (SET_DEST (set)) == HImode))
25909 {
25910 /* Increase cost of store/load insn if exact
25911 dependence exists and it is load insn. */
25912 enum attr_memory insn_memory = get_attr_memory (insn);
25913 if (insn_memory == MEMORY_LOAD
25914 && exact_store_load_dependency (dep_insn, insn))
25915 cost = 3;
25916 }
25917 }
25918 }
25919 }
25920
25921 default:
25922 break;
25923 }
25924
25925 return cost;
25926 }
25927
25928 /* How many alternative schedules to try. This should be as wide as the
25929 scheduling freedom in the DFA, but no wider. Making this value too
25930 large results extra work for the scheduler. */
25931
25932 static int
25933 ia32_multipass_dfa_lookahead (void)
25934 {
25935 switch (ix86_tune)
25936 {
25937 case PROCESSOR_PENTIUM:
25938 return 2;
25939
25940 case PROCESSOR_PENTIUMPRO:
25941 case PROCESSOR_K6:
25942 return 1;
25943
25944 case PROCESSOR_BDVER1:
25945 case PROCESSOR_BDVER2:
25946 case PROCESSOR_BDVER3:
25947 case PROCESSOR_BDVER4:
25948 /* We use lookahead value 4 for BD both before and after reload
25949 schedules. Plan is to have value 8 included for O3. */
25950 return 4;
25951
25952 case PROCESSOR_CORE2:
25953 case PROCESSOR_NEHALEM:
25954 case PROCESSOR_SANDYBRIDGE:
25955 case PROCESSOR_HASWELL:
25956 case PROCESSOR_BONNELL:
25957 case PROCESSOR_SILVERMONT:
25958 case PROCESSOR_INTEL:
25959 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25960 as many instructions can be executed on a cycle, i.e.,
25961 issue_rate. I wonder why tuning for many CPUs does not do this. */
25962 if (reload_completed)
25963 return ix86_issue_rate ();
25964 /* Don't use lookahead for pre-reload schedule to save compile time. */
25965 return 0;
25966
25967 default:
25968 return 0;
25969 }
25970 }
25971
25972 /* Return true if target platform supports macro-fusion. */
25973
25974 static bool
25975 ix86_macro_fusion_p ()
25976 {
25977 return TARGET_FUSE_CMP_AND_BRANCH;
25978 }
25979
25980 /* Check whether current microarchitecture support macro fusion
25981 for insn pair "CONDGEN + CONDJMP". Refer to
25982 "Intel Architectures Optimization Reference Manual". */
25983
25984 static bool
25985 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25986 {
25987 rtx src, dest;
25988 enum rtx_code ccode;
25989 rtx compare_set = NULL_RTX, test_if, cond;
25990 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25991
25992 if (!any_condjump_p (condjmp))
25993 return false;
25994
25995 if (get_attr_type (condgen) != TYPE_TEST
25996 && get_attr_type (condgen) != TYPE_ICMP
25997 && get_attr_type (condgen) != TYPE_INCDEC
25998 && get_attr_type (condgen) != TYPE_ALU)
25999 return false;
26000
26001 compare_set = single_set (condgen);
26002 if (compare_set == NULL_RTX
26003 && !TARGET_FUSE_ALU_AND_BRANCH)
26004 return false;
26005
26006 if (compare_set == NULL_RTX)
26007 {
26008 int i;
26009 rtx pat = PATTERN (condgen);
26010 for (i = 0; i < XVECLEN (pat, 0); i++)
26011 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26012 {
26013 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26014 if (GET_CODE (set_src) == COMPARE)
26015 compare_set = XVECEXP (pat, 0, i);
26016 else
26017 alu_set = XVECEXP (pat, 0, i);
26018 }
26019 }
26020 if (compare_set == NULL_RTX)
26021 return false;
26022 src = SET_SRC (compare_set);
26023 if (GET_CODE (src) != COMPARE)
26024 return false;
26025
26026 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26027 supported. */
26028 if ((MEM_P (XEXP (src, 0))
26029 && CONST_INT_P (XEXP (src, 1)))
26030 || (MEM_P (XEXP (src, 1))
26031 && CONST_INT_P (XEXP (src, 0))))
26032 return false;
26033
26034 /* No fusion for RIP-relative address. */
26035 if (MEM_P (XEXP (src, 0)))
26036 addr = XEXP (XEXP (src, 0), 0);
26037 else if (MEM_P (XEXP (src, 1)))
26038 addr = XEXP (XEXP (src, 1), 0);
26039
26040 if (addr) {
26041 ix86_address parts;
26042 int ok = ix86_decompose_address (addr, &parts);
26043 gcc_assert (ok);
26044
26045 if (rip_relative_addr_p (&parts))
26046 return false;
26047 }
26048
26049 test_if = SET_SRC (pc_set (condjmp));
26050 cond = XEXP (test_if, 0);
26051 ccode = GET_CODE (cond);
26052 /* Check whether conditional jump use Sign or Overflow Flags. */
26053 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26054 && (ccode == GE
26055 || ccode == GT
26056 || ccode == LE
26057 || ccode == LT))
26058 return false;
26059
26060 /* Return true for TYPE_TEST and TYPE_ICMP. */
26061 if (get_attr_type (condgen) == TYPE_TEST
26062 || get_attr_type (condgen) == TYPE_ICMP)
26063 return true;
26064
26065 /* The following is the case that macro-fusion for alu + jmp. */
26066 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26067 return false;
26068
26069 /* No fusion for alu op with memory destination operand. */
26070 dest = SET_DEST (alu_set);
26071 if (MEM_P (dest))
26072 return false;
26073
26074 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26075 supported. */
26076 if (get_attr_type (condgen) == TYPE_INCDEC
26077 && (ccode == GEU
26078 || ccode == GTU
26079 || ccode == LEU
26080 || ccode == LTU))
26081 return false;
26082
26083 return true;
26084 }
26085
26086 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26087 execution. It is applied if
26088 (1) IMUL instruction is on the top of list;
26089 (2) There exists the only producer of independent IMUL instruction in
26090 ready list.
26091 Return index of IMUL producer if it was found and -1 otherwise. */
26092 static int
26093 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26094 {
26095 rtx_insn *insn;
26096 rtx set, insn1, insn2;
26097 sd_iterator_def sd_it;
26098 dep_t dep;
26099 int index = -1;
26100 int i;
26101
26102 if (!TARGET_BONNELL)
26103 return index;
26104
26105 /* Check that IMUL instruction is on the top of ready list. */
26106 insn = ready[n_ready - 1];
26107 set = single_set (insn);
26108 if (!set)
26109 return index;
26110 if (!(GET_CODE (SET_SRC (set)) == MULT
26111 && GET_MODE (SET_SRC (set)) == SImode))
26112 return index;
26113
26114 /* Search for producer of independent IMUL instruction. */
26115 for (i = n_ready - 2; i >= 0; i--)
26116 {
26117 insn = ready[i];
26118 if (!NONDEBUG_INSN_P (insn))
26119 continue;
26120 /* Skip IMUL instruction. */
26121 insn2 = PATTERN (insn);
26122 if (GET_CODE (insn2) == PARALLEL)
26123 insn2 = XVECEXP (insn2, 0, 0);
26124 if (GET_CODE (insn2) == SET
26125 && GET_CODE (SET_SRC (insn2)) == MULT
26126 && GET_MODE (SET_SRC (insn2)) == SImode)
26127 continue;
26128
26129 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26130 {
26131 rtx con;
26132 con = DEP_CON (dep);
26133 if (!NONDEBUG_INSN_P (con))
26134 continue;
26135 insn1 = PATTERN (con);
26136 if (GET_CODE (insn1) == PARALLEL)
26137 insn1 = XVECEXP (insn1, 0, 0);
26138
26139 if (GET_CODE (insn1) == SET
26140 && GET_CODE (SET_SRC (insn1)) == MULT
26141 && GET_MODE (SET_SRC (insn1)) == SImode)
26142 {
26143 sd_iterator_def sd_it1;
26144 dep_t dep1;
26145 /* Check if there is no other dependee for IMUL. */
26146 index = i;
26147 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26148 {
26149 rtx pro;
26150 pro = DEP_PRO (dep1);
26151 if (!NONDEBUG_INSN_P (pro))
26152 continue;
26153 if (pro != insn)
26154 index = -1;
26155 }
26156 if (index >= 0)
26157 break;
26158 }
26159 }
26160 if (index >= 0)
26161 break;
26162 }
26163 return index;
26164 }
26165
26166 /* Try to find the best candidate on the top of ready list if two insns
26167 have the same priority - candidate is best if its dependees were
26168 scheduled earlier. Applied for Silvermont only.
26169 Return true if top 2 insns must be interchanged. */
26170 static bool
26171 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26172 {
26173 rtx_insn *top = ready[n_ready - 1];
26174 rtx_insn *next = ready[n_ready - 2];
26175 rtx set;
26176 sd_iterator_def sd_it;
26177 dep_t dep;
26178 int clock1 = -1;
26179 int clock2 = -1;
26180 #define INSN_TICK(INSN) (HID (INSN)->tick)
26181
26182 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26183 return false;
26184
26185 if (!NONDEBUG_INSN_P (top))
26186 return false;
26187 if (!NONJUMP_INSN_P (top))
26188 return false;
26189 if (!NONDEBUG_INSN_P (next))
26190 return false;
26191 if (!NONJUMP_INSN_P (next))
26192 return false;
26193 set = single_set (top);
26194 if (!set)
26195 return false;
26196 set = single_set (next);
26197 if (!set)
26198 return false;
26199
26200 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26201 {
26202 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26203 return false;
26204 /* Determine winner more precise. */
26205 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26206 {
26207 rtx pro;
26208 pro = DEP_PRO (dep);
26209 if (!NONDEBUG_INSN_P (pro))
26210 continue;
26211 if (INSN_TICK (pro) > clock1)
26212 clock1 = INSN_TICK (pro);
26213 }
26214 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26215 {
26216 rtx pro;
26217 pro = DEP_PRO (dep);
26218 if (!NONDEBUG_INSN_P (pro))
26219 continue;
26220 if (INSN_TICK (pro) > clock2)
26221 clock2 = INSN_TICK (pro);
26222 }
26223
26224 if (clock1 == clock2)
26225 {
26226 /* Determine winner - load must win. */
26227 enum attr_memory memory1, memory2;
26228 memory1 = get_attr_memory (top);
26229 memory2 = get_attr_memory (next);
26230 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26231 return true;
26232 }
26233 return (bool) (clock2 < clock1);
26234 }
26235 return false;
26236 #undef INSN_TICK
26237 }
26238
26239 /* Perform possible reodering of ready list for Atom/Silvermont only.
26240 Return issue rate. */
26241 static int
26242 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26243 int *pn_ready, int clock_var)
26244 {
26245 int issue_rate = -1;
26246 int n_ready = *pn_ready;
26247 int i;
26248 rtx_insn *insn;
26249 int index = -1;
26250
26251 /* Set up issue rate. */
26252 issue_rate = ix86_issue_rate ();
26253
26254 /* Do reodering for BONNELL/SILVERMONT only. */
26255 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26256 return issue_rate;
26257
26258 /* Nothing to do if ready list contains only 1 instruction. */
26259 if (n_ready <= 1)
26260 return issue_rate;
26261
26262 /* Do reodering for post-reload scheduler only. */
26263 if (!reload_completed)
26264 return issue_rate;
26265
26266 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26267 {
26268 if (sched_verbose > 1)
26269 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26270 INSN_UID (ready[index]));
26271
26272 /* Put IMUL producer (ready[index]) at the top of ready list. */
26273 insn = ready[index];
26274 for (i = index; i < n_ready - 1; i++)
26275 ready[i] = ready[i + 1];
26276 ready[n_ready - 1] = insn;
26277 return issue_rate;
26278 }
26279 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26280 {
26281 if (sched_verbose > 1)
26282 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26283 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26284 /* Swap 2 top elements of ready list. */
26285 insn = ready[n_ready - 1];
26286 ready[n_ready - 1] = ready[n_ready - 2];
26287 ready[n_ready - 2] = insn;
26288 }
26289 return issue_rate;
26290 }
26291
26292 static bool
26293 ix86_class_likely_spilled_p (reg_class_t);
26294
26295 /* Returns true if lhs of insn is HW function argument register and set up
26296 is_spilled to true if it is likely spilled HW register. */
26297 static bool
26298 insn_is_function_arg (rtx insn, bool* is_spilled)
26299 {
26300 rtx dst;
26301
26302 if (!NONDEBUG_INSN_P (insn))
26303 return false;
26304 /* Call instructions are not movable, ignore it. */
26305 if (CALL_P (insn))
26306 return false;
26307 insn = PATTERN (insn);
26308 if (GET_CODE (insn) == PARALLEL)
26309 insn = XVECEXP (insn, 0, 0);
26310 if (GET_CODE (insn) != SET)
26311 return false;
26312 dst = SET_DEST (insn);
26313 if (REG_P (dst) && HARD_REGISTER_P (dst)
26314 && ix86_function_arg_regno_p (REGNO (dst)))
26315 {
26316 /* Is it likely spilled HW register? */
26317 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26318 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26319 *is_spilled = true;
26320 return true;
26321 }
26322 return false;
26323 }
26324
26325 /* Add output dependencies for chain of function adjacent arguments if only
26326 there is a move to likely spilled HW register. Return first argument
26327 if at least one dependence was added or NULL otherwise. */
26328 static rtx_insn *
26329 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26330 {
26331 rtx_insn *insn;
26332 rtx_insn *last = call;
26333 rtx_insn *first_arg = NULL;
26334 bool is_spilled = false;
26335
26336 head = PREV_INSN (head);
26337
26338 /* Find nearest to call argument passing instruction. */
26339 while (true)
26340 {
26341 last = PREV_INSN (last);
26342 if (last == head)
26343 return NULL;
26344 if (!NONDEBUG_INSN_P (last))
26345 continue;
26346 if (insn_is_function_arg (last, &is_spilled))
26347 break;
26348 return NULL;
26349 }
26350
26351 first_arg = last;
26352 while (true)
26353 {
26354 insn = PREV_INSN (last);
26355 if (!INSN_P (insn))
26356 break;
26357 if (insn == head)
26358 break;
26359 if (!NONDEBUG_INSN_P (insn))
26360 {
26361 last = insn;
26362 continue;
26363 }
26364 if (insn_is_function_arg (insn, &is_spilled))
26365 {
26366 /* Add output depdendence between two function arguments if chain
26367 of output arguments contains likely spilled HW registers. */
26368 if (is_spilled)
26369 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26370 first_arg = last = insn;
26371 }
26372 else
26373 break;
26374 }
26375 if (!is_spilled)
26376 return NULL;
26377 return first_arg;
26378 }
26379
26380 /* Add output or anti dependency from insn to first_arg to restrict its code
26381 motion. */
26382 static void
26383 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26384 {
26385 rtx set;
26386 rtx tmp;
26387
26388 set = single_set (insn);
26389 if (!set)
26390 return;
26391 tmp = SET_DEST (set);
26392 if (REG_P (tmp))
26393 {
26394 /* Add output dependency to the first function argument. */
26395 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26396 return;
26397 }
26398 /* Add anti dependency. */
26399 add_dependence (first_arg, insn, REG_DEP_ANTI);
26400 }
26401
26402 /* Avoid cross block motion of function argument through adding dependency
26403 from the first non-jump instruction in bb. */
26404 static void
26405 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26406 {
26407 rtx_insn *insn = BB_END (bb);
26408
26409 while (insn)
26410 {
26411 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26412 {
26413 rtx set = single_set (insn);
26414 if (set)
26415 {
26416 avoid_func_arg_motion (arg, insn);
26417 return;
26418 }
26419 }
26420 if (insn == BB_HEAD (bb))
26421 return;
26422 insn = PREV_INSN (insn);
26423 }
26424 }
26425
26426 /* Hook for pre-reload schedule - avoid motion of function arguments
26427 passed in likely spilled HW registers. */
26428 static void
26429 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26430 {
26431 rtx_insn *insn;
26432 rtx_insn *first_arg = NULL;
26433 if (reload_completed)
26434 return;
26435 while (head != tail && DEBUG_INSN_P (head))
26436 head = NEXT_INSN (head);
26437 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26438 if (INSN_P (insn) && CALL_P (insn))
26439 {
26440 first_arg = add_parameter_dependencies (insn, head);
26441 if (first_arg)
26442 {
26443 /* Add dependee for first argument to predecessors if only
26444 region contains more than one block. */
26445 basic_block bb = BLOCK_FOR_INSN (insn);
26446 int rgn = CONTAINING_RGN (bb->index);
26447 int nr_blks = RGN_NR_BLOCKS (rgn);
26448 /* Skip trivial regions and region head blocks that can have
26449 predecessors outside of region. */
26450 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26451 {
26452 edge e;
26453 edge_iterator ei;
26454
26455 /* Regions are SCCs with the exception of selective
26456 scheduling with pipelining of outer blocks enabled.
26457 So also check that immediate predecessors of a non-head
26458 block are in the same region. */
26459 FOR_EACH_EDGE (e, ei, bb->preds)
26460 {
26461 /* Avoid creating of loop-carried dependencies through
26462 using topological ordering in the region. */
26463 if (rgn == CONTAINING_RGN (e->src->index)
26464 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26465 add_dependee_for_func_arg (first_arg, e->src);
26466 }
26467 }
26468 insn = first_arg;
26469 if (insn == head)
26470 break;
26471 }
26472 }
26473 else if (first_arg)
26474 avoid_func_arg_motion (first_arg, insn);
26475 }
26476
26477 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26478 HW registers to maximum, to schedule them at soon as possible. These are
26479 moves from function argument registers at the top of the function entry
26480 and moves from function return value registers after call. */
26481 static int
26482 ix86_adjust_priority (rtx_insn *insn, int priority)
26483 {
26484 rtx set;
26485
26486 if (reload_completed)
26487 return priority;
26488
26489 if (!NONDEBUG_INSN_P (insn))
26490 return priority;
26491
26492 set = single_set (insn);
26493 if (set)
26494 {
26495 rtx tmp = SET_SRC (set);
26496 if (REG_P (tmp)
26497 && HARD_REGISTER_P (tmp)
26498 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26499 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26500 return current_sched_info->sched_max_insns_priority;
26501 }
26502
26503 return priority;
26504 }
26505
26506 /* Model decoder of Core 2/i7.
26507 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26508 track the instruction fetch block boundaries and make sure that long
26509 (9+ bytes) instructions are assigned to D0. */
26510
26511 /* Maximum length of an insn that can be handled by
26512 a secondary decoder unit. '8' for Core 2/i7. */
26513 static int core2i7_secondary_decoder_max_insn_size;
26514
26515 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26516 '16' for Core 2/i7. */
26517 static int core2i7_ifetch_block_size;
26518
26519 /* Maximum number of instructions decoder can handle per cycle.
26520 '6' for Core 2/i7. */
26521 static int core2i7_ifetch_block_max_insns;
26522
26523 typedef struct ix86_first_cycle_multipass_data_ *
26524 ix86_first_cycle_multipass_data_t;
26525 typedef const struct ix86_first_cycle_multipass_data_ *
26526 const_ix86_first_cycle_multipass_data_t;
26527
26528 /* A variable to store target state across calls to max_issue within
26529 one cycle. */
26530 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26531 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26532
26533 /* Initialize DATA. */
26534 static void
26535 core2i7_first_cycle_multipass_init (void *_data)
26536 {
26537 ix86_first_cycle_multipass_data_t data
26538 = (ix86_first_cycle_multipass_data_t) _data;
26539
26540 data->ifetch_block_len = 0;
26541 data->ifetch_block_n_insns = 0;
26542 data->ready_try_change = NULL;
26543 data->ready_try_change_size = 0;
26544 }
26545
26546 /* Advancing the cycle; reset ifetch block counts. */
26547 static void
26548 core2i7_dfa_post_advance_cycle (void)
26549 {
26550 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26551
26552 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26553
26554 data->ifetch_block_len = 0;
26555 data->ifetch_block_n_insns = 0;
26556 }
26557
26558 static int min_insn_size (rtx_insn *);
26559
26560 /* Filter out insns from ready_try that the core will not be able to issue
26561 on current cycle due to decoder. */
26562 static void
26563 core2i7_first_cycle_multipass_filter_ready_try
26564 (const_ix86_first_cycle_multipass_data_t data,
26565 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26566 {
26567 while (n_ready--)
26568 {
26569 rtx_insn *insn;
26570 int insn_size;
26571
26572 if (ready_try[n_ready])
26573 continue;
26574
26575 insn = get_ready_element (n_ready);
26576 insn_size = min_insn_size (insn);
26577
26578 if (/* If this is a too long an insn for a secondary decoder ... */
26579 (!first_cycle_insn_p
26580 && insn_size > core2i7_secondary_decoder_max_insn_size)
26581 /* ... or it would not fit into the ifetch block ... */
26582 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26583 /* ... or the decoder is full already ... */
26584 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26585 /* ... mask the insn out. */
26586 {
26587 ready_try[n_ready] = 1;
26588
26589 if (data->ready_try_change)
26590 bitmap_set_bit (data->ready_try_change, n_ready);
26591 }
26592 }
26593 }
26594
26595 /* Prepare for a new round of multipass lookahead scheduling. */
26596 static void
26597 core2i7_first_cycle_multipass_begin (void *_data,
26598 signed char *ready_try, int n_ready,
26599 bool first_cycle_insn_p)
26600 {
26601 ix86_first_cycle_multipass_data_t data
26602 = (ix86_first_cycle_multipass_data_t) _data;
26603 const_ix86_first_cycle_multipass_data_t prev_data
26604 = ix86_first_cycle_multipass_data;
26605
26606 /* Restore the state from the end of the previous round. */
26607 data->ifetch_block_len = prev_data->ifetch_block_len;
26608 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26609
26610 /* Filter instructions that cannot be issued on current cycle due to
26611 decoder restrictions. */
26612 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26613 first_cycle_insn_p);
26614 }
26615
26616 /* INSN is being issued in current solution. Account for its impact on
26617 the decoder model. */
26618 static void
26619 core2i7_first_cycle_multipass_issue (void *_data,
26620 signed char *ready_try, int n_ready,
26621 rtx_insn *insn, const void *_prev_data)
26622 {
26623 ix86_first_cycle_multipass_data_t data
26624 = (ix86_first_cycle_multipass_data_t) _data;
26625 const_ix86_first_cycle_multipass_data_t prev_data
26626 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26627
26628 int insn_size = min_insn_size (insn);
26629
26630 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26631 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26632 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26633 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26634
26635 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26636 if (!data->ready_try_change)
26637 {
26638 data->ready_try_change = sbitmap_alloc (n_ready);
26639 data->ready_try_change_size = n_ready;
26640 }
26641 else if (data->ready_try_change_size < n_ready)
26642 {
26643 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26644 n_ready, 0);
26645 data->ready_try_change_size = n_ready;
26646 }
26647 bitmap_clear (data->ready_try_change);
26648
26649 /* Filter out insns from ready_try that the core will not be able to issue
26650 on current cycle due to decoder. */
26651 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26652 false);
26653 }
26654
26655 /* Revert the effect on ready_try. */
26656 static void
26657 core2i7_first_cycle_multipass_backtrack (const void *_data,
26658 signed char *ready_try,
26659 int n_ready ATTRIBUTE_UNUSED)
26660 {
26661 const_ix86_first_cycle_multipass_data_t data
26662 = (const_ix86_first_cycle_multipass_data_t) _data;
26663 unsigned int i = 0;
26664 sbitmap_iterator sbi;
26665
26666 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26667 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26668 {
26669 ready_try[i] = 0;
26670 }
26671 }
26672
26673 /* Save the result of multipass lookahead scheduling for the next round. */
26674 static void
26675 core2i7_first_cycle_multipass_end (const void *_data)
26676 {
26677 const_ix86_first_cycle_multipass_data_t data
26678 = (const_ix86_first_cycle_multipass_data_t) _data;
26679 ix86_first_cycle_multipass_data_t next_data
26680 = ix86_first_cycle_multipass_data;
26681
26682 if (data != NULL)
26683 {
26684 next_data->ifetch_block_len = data->ifetch_block_len;
26685 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26686 }
26687 }
26688
26689 /* Deallocate target data. */
26690 static void
26691 core2i7_first_cycle_multipass_fini (void *_data)
26692 {
26693 ix86_first_cycle_multipass_data_t data
26694 = (ix86_first_cycle_multipass_data_t) _data;
26695
26696 if (data->ready_try_change)
26697 {
26698 sbitmap_free (data->ready_try_change);
26699 data->ready_try_change = NULL;
26700 data->ready_try_change_size = 0;
26701 }
26702 }
26703
26704 /* Prepare for scheduling pass. */
26705 static void
26706 ix86_sched_init_global (FILE *, int, int)
26707 {
26708 /* Install scheduling hooks for current CPU. Some of these hooks are used
26709 in time-critical parts of the scheduler, so we only set them up when
26710 they are actually used. */
26711 switch (ix86_tune)
26712 {
26713 case PROCESSOR_CORE2:
26714 case PROCESSOR_NEHALEM:
26715 case PROCESSOR_SANDYBRIDGE:
26716 case PROCESSOR_HASWELL:
26717 /* Do not perform multipass scheduling for pre-reload schedule
26718 to save compile time. */
26719 if (reload_completed)
26720 {
26721 targetm.sched.dfa_post_advance_cycle
26722 = core2i7_dfa_post_advance_cycle;
26723 targetm.sched.first_cycle_multipass_init
26724 = core2i7_first_cycle_multipass_init;
26725 targetm.sched.first_cycle_multipass_begin
26726 = core2i7_first_cycle_multipass_begin;
26727 targetm.sched.first_cycle_multipass_issue
26728 = core2i7_first_cycle_multipass_issue;
26729 targetm.sched.first_cycle_multipass_backtrack
26730 = core2i7_first_cycle_multipass_backtrack;
26731 targetm.sched.first_cycle_multipass_end
26732 = core2i7_first_cycle_multipass_end;
26733 targetm.sched.first_cycle_multipass_fini
26734 = core2i7_first_cycle_multipass_fini;
26735
26736 /* Set decoder parameters. */
26737 core2i7_secondary_decoder_max_insn_size = 8;
26738 core2i7_ifetch_block_size = 16;
26739 core2i7_ifetch_block_max_insns = 6;
26740 break;
26741 }
26742 /* ... Fall through ... */
26743 default:
26744 targetm.sched.dfa_post_advance_cycle = NULL;
26745 targetm.sched.first_cycle_multipass_init = NULL;
26746 targetm.sched.first_cycle_multipass_begin = NULL;
26747 targetm.sched.first_cycle_multipass_issue = NULL;
26748 targetm.sched.first_cycle_multipass_backtrack = NULL;
26749 targetm.sched.first_cycle_multipass_end = NULL;
26750 targetm.sched.first_cycle_multipass_fini = NULL;
26751 break;
26752 }
26753 }
26754
26755 \f
26756 /* Compute the alignment given to a constant that is being placed in memory.
26757 EXP is the constant and ALIGN is the alignment that the object would
26758 ordinarily have.
26759 The value of this function is used instead of that alignment to align
26760 the object. */
26761
26762 int
26763 ix86_constant_alignment (tree exp, int align)
26764 {
26765 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26766 || TREE_CODE (exp) == INTEGER_CST)
26767 {
26768 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26769 return 64;
26770 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26771 return 128;
26772 }
26773 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26774 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26775 return BITS_PER_WORD;
26776
26777 return align;
26778 }
26779
26780 /* Compute the alignment for a static variable.
26781 TYPE is the data type, and ALIGN is the alignment that
26782 the object would ordinarily have. The value of this function is used
26783 instead of that alignment to align the object. */
26784
26785 int
26786 ix86_data_alignment (tree type, int align, bool opt)
26787 {
26788 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26789 for symbols from other compilation units or symbols that don't need
26790 to bind locally. In order to preserve some ABI compatibility with
26791 those compilers, ensure we don't decrease alignment from what we
26792 used to assume. */
26793
26794 int max_align_compat
26795 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26796
26797 /* A data structure, equal or greater than the size of a cache line
26798 (64 bytes in the Pentium 4 and other recent Intel processors, including
26799 processors based on Intel Core microarchitecture) should be aligned
26800 so that its base address is a multiple of a cache line size. */
26801
26802 int max_align
26803 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26804
26805 if (max_align < BITS_PER_WORD)
26806 max_align = BITS_PER_WORD;
26807
26808 if (opt
26809 && AGGREGATE_TYPE_P (type)
26810 && TYPE_SIZE (type)
26811 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26812 {
26813 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26814 && align < max_align_compat)
26815 align = max_align_compat;
26816 if (wi::geu_p (TYPE_SIZE (type), max_align)
26817 && align < max_align)
26818 align = max_align;
26819 }
26820
26821 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26822 to 16byte boundary. */
26823 if (TARGET_64BIT)
26824 {
26825 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26826 && TYPE_SIZE (type)
26827 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26828 && wi::geu_p (TYPE_SIZE (type), 128)
26829 && align < 128)
26830 return 128;
26831 }
26832
26833 if (!opt)
26834 return align;
26835
26836 if (TREE_CODE (type) == ARRAY_TYPE)
26837 {
26838 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26839 return 64;
26840 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26841 return 128;
26842 }
26843 else if (TREE_CODE (type) == COMPLEX_TYPE)
26844 {
26845
26846 if (TYPE_MODE (type) == DCmode && align < 64)
26847 return 64;
26848 if ((TYPE_MODE (type) == XCmode
26849 || TYPE_MODE (type) == TCmode) && align < 128)
26850 return 128;
26851 }
26852 else if ((TREE_CODE (type) == RECORD_TYPE
26853 || TREE_CODE (type) == UNION_TYPE
26854 || TREE_CODE (type) == QUAL_UNION_TYPE)
26855 && TYPE_FIELDS (type))
26856 {
26857 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26858 return 64;
26859 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26860 return 128;
26861 }
26862 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26863 || TREE_CODE (type) == INTEGER_TYPE)
26864 {
26865 if (TYPE_MODE (type) == DFmode && align < 64)
26866 return 64;
26867 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26868 return 128;
26869 }
26870
26871 return align;
26872 }
26873
26874 /* Compute the alignment for a local variable or a stack slot. EXP is
26875 the data type or decl itself, MODE is the widest mode available and
26876 ALIGN is the alignment that the object would ordinarily have. The
26877 value of this macro is used instead of that alignment to align the
26878 object. */
26879
26880 unsigned int
26881 ix86_local_alignment (tree exp, enum machine_mode mode,
26882 unsigned int align)
26883 {
26884 tree type, decl;
26885
26886 if (exp && DECL_P (exp))
26887 {
26888 type = TREE_TYPE (exp);
26889 decl = exp;
26890 }
26891 else
26892 {
26893 type = exp;
26894 decl = NULL;
26895 }
26896
26897 /* Don't do dynamic stack realignment for long long objects with
26898 -mpreferred-stack-boundary=2. */
26899 if (!TARGET_64BIT
26900 && align == 64
26901 && ix86_preferred_stack_boundary < 64
26902 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26903 && (!type || !TYPE_USER_ALIGN (type))
26904 && (!decl || !DECL_USER_ALIGN (decl)))
26905 align = 32;
26906
26907 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26908 register in MODE. We will return the largest alignment of XF
26909 and DF. */
26910 if (!type)
26911 {
26912 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26913 align = GET_MODE_ALIGNMENT (DFmode);
26914 return align;
26915 }
26916
26917 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26918 to 16byte boundary. Exact wording is:
26919
26920 An array uses the same alignment as its elements, except that a local or
26921 global array variable of length at least 16 bytes or
26922 a C99 variable-length array variable always has alignment of at least 16 bytes.
26923
26924 This was added to allow use of aligned SSE instructions at arrays. This
26925 rule is meant for static storage (where compiler can not do the analysis
26926 by itself). We follow it for automatic variables only when convenient.
26927 We fully control everything in the function compiled and functions from
26928 other unit can not rely on the alignment.
26929
26930 Exclude va_list type. It is the common case of local array where
26931 we can not benefit from the alignment.
26932
26933 TODO: Probably one should optimize for size only when var is not escaping. */
26934 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26935 && TARGET_SSE)
26936 {
26937 if (AGGREGATE_TYPE_P (type)
26938 && (va_list_type_node == NULL_TREE
26939 || (TYPE_MAIN_VARIANT (type)
26940 != TYPE_MAIN_VARIANT (va_list_type_node)))
26941 && TYPE_SIZE (type)
26942 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26943 && wi::geu_p (TYPE_SIZE (type), 16)
26944 && align < 128)
26945 return 128;
26946 }
26947 if (TREE_CODE (type) == ARRAY_TYPE)
26948 {
26949 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26950 return 64;
26951 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26952 return 128;
26953 }
26954 else if (TREE_CODE (type) == COMPLEX_TYPE)
26955 {
26956 if (TYPE_MODE (type) == DCmode && align < 64)
26957 return 64;
26958 if ((TYPE_MODE (type) == XCmode
26959 || TYPE_MODE (type) == TCmode) && align < 128)
26960 return 128;
26961 }
26962 else if ((TREE_CODE (type) == RECORD_TYPE
26963 || TREE_CODE (type) == UNION_TYPE
26964 || TREE_CODE (type) == QUAL_UNION_TYPE)
26965 && TYPE_FIELDS (type))
26966 {
26967 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26968 return 64;
26969 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26970 return 128;
26971 }
26972 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26973 || TREE_CODE (type) == INTEGER_TYPE)
26974 {
26975
26976 if (TYPE_MODE (type) == DFmode && align < 64)
26977 return 64;
26978 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26979 return 128;
26980 }
26981 return align;
26982 }
26983
26984 /* Compute the minimum required alignment for dynamic stack realignment
26985 purposes for a local variable, parameter or a stack slot. EXP is
26986 the data type or decl itself, MODE is its mode and ALIGN is the
26987 alignment that the object would ordinarily have. */
26988
26989 unsigned int
26990 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26991 unsigned int align)
26992 {
26993 tree type, decl;
26994
26995 if (exp && DECL_P (exp))
26996 {
26997 type = TREE_TYPE (exp);
26998 decl = exp;
26999 }
27000 else
27001 {
27002 type = exp;
27003 decl = NULL;
27004 }
27005
27006 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27007 return align;
27008
27009 /* Don't do dynamic stack realignment for long long objects with
27010 -mpreferred-stack-boundary=2. */
27011 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27012 && (!type || !TYPE_USER_ALIGN (type))
27013 && (!decl || !DECL_USER_ALIGN (decl)))
27014 return 32;
27015
27016 return align;
27017 }
27018 \f
27019 /* Find a location for the static chain incoming to a nested function.
27020 This is a register, unless all free registers are used by arguments. */
27021
27022 static rtx
27023 ix86_static_chain (const_tree fndecl, bool incoming_p)
27024 {
27025 unsigned regno;
27026
27027 if (!DECL_STATIC_CHAIN (fndecl))
27028 return NULL;
27029
27030 if (TARGET_64BIT)
27031 {
27032 /* We always use R10 in 64-bit mode. */
27033 regno = R10_REG;
27034 }
27035 else
27036 {
27037 tree fntype;
27038 unsigned int ccvt;
27039
27040 /* By default in 32-bit mode we use ECX to pass the static chain. */
27041 regno = CX_REG;
27042
27043 fntype = TREE_TYPE (fndecl);
27044 ccvt = ix86_get_callcvt (fntype);
27045 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27046 {
27047 /* Fastcall functions use ecx/edx for arguments, which leaves
27048 us with EAX for the static chain.
27049 Thiscall functions use ecx for arguments, which also
27050 leaves us with EAX for the static chain. */
27051 regno = AX_REG;
27052 }
27053 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27054 {
27055 /* Thiscall functions use ecx for arguments, which leaves
27056 us with EAX and EDX for the static chain.
27057 We are using for abi-compatibility EAX. */
27058 regno = AX_REG;
27059 }
27060 else if (ix86_function_regparm (fntype, fndecl) == 3)
27061 {
27062 /* For regparm 3, we have no free call-clobbered registers in
27063 which to store the static chain. In order to implement this,
27064 we have the trampoline push the static chain to the stack.
27065 However, we can't push a value below the return address when
27066 we call the nested function directly, so we have to use an
27067 alternate entry point. For this we use ESI, and have the
27068 alternate entry point push ESI, so that things appear the
27069 same once we're executing the nested function. */
27070 if (incoming_p)
27071 {
27072 if (fndecl == current_function_decl)
27073 ix86_static_chain_on_stack = true;
27074 return gen_frame_mem (SImode,
27075 plus_constant (Pmode,
27076 arg_pointer_rtx, -8));
27077 }
27078 regno = SI_REG;
27079 }
27080 }
27081
27082 return gen_rtx_REG (Pmode, regno);
27083 }
27084
27085 /* Emit RTL insns to initialize the variable parts of a trampoline.
27086 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27087 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27088 to be passed to the target function. */
27089
27090 static void
27091 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27092 {
27093 rtx mem, fnaddr;
27094 int opcode;
27095 int offset = 0;
27096
27097 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27098
27099 if (TARGET_64BIT)
27100 {
27101 int size;
27102
27103 /* Load the function address to r11. Try to load address using
27104 the shorter movl instead of movabs. We may want to support
27105 movq for kernel mode, but kernel does not use trampolines at
27106 the moment. FNADDR is a 32bit address and may not be in
27107 DImode when ptr_mode == SImode. Always use movl in this
27108 case. */
27109 if (ptr_mode == SImode
27110 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27111 {
27112 fnaddr = copy_addr_to_reg (fnaddr);
27113
27114 mem = adjust_address (m_tramp, HImode, offset);
27115 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27116
27117 mem = adjust_address (m_tramp, SImode, offset + 2);
27118 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27119 offset += 6;
27120 }
27121 else
27122 {
27123 mem = adjust_address (m_tramp, HImode, offset);
27124 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27125
27126 mem = adjust_address (m_tramp, DImode, offset + 2);
27127 emit_move_insn (mem, fnaddr);
27128 offset += 10;
27129 }
27130
27131 /* Load static chain using movabs to r10. Use the shorter movl
27132 instead of movabs when ptr_mode == SImode. */
27133 if (ptr_mode == SImode)
27134 {
27135 opcode = 0xba41;
27136 size = 6;
27137 }
27138 else
27139 {
27140 opcode = 0xba49;
27141 size = 10;
27142 }
27143
27144 mem = adjust_address (m_tramp, HImode, offset);
27145 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27146
27147 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27148 emit_move_insn (mem, chain_value);
27149 offset += size;
27150
27151 /* Jump to r11; the last (unused) byte is a nop, only there to
27152 pad the write out to a single 32-bit store. */
27153 mem = adjust_address (m_tramp, SImode, offset);
27154 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27155 offset += 4;
27156 }
27157 else
27158 {
27159 rtx disp, chain;
27160
27161 /* Depending on the static chain location, either load a register
27162 with a constant, or push the constant to the stack. All of the
27163 instructions are the same size. */
27164 chain = ix86_static_chain (fndecl, true);
27165 if (REG_P (chain))
27166 {
27167 switch (REGNO (chain))
27168 {
27169 case AX_REG:
27170 opcode = 0xb8; break;
27171 case CX_REG:
27172 opcode = 0xb9; break;
27173 default:
27174 gcc_unreachable ();
27175 }
27176 }
27177 else
27178 opcode = 0x68;
27179
27180 mem = adjust_address (m_tramp, QImode, offset);
27181 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27182
27183 mem = adjust_address (m_tramp, SImode, offset + 1);
27184 emit_move_insn (mem, chain_value);
27185 offset += 5;
27186
27187 mem = adjust_address (m_tramp, QImode, offset);
27188 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27189
27190 mem = adjust_address (m_tramp, SImode, offset + 1);
27191
27192 /* Compute offset from the end of the jmp to the target function.
27193 In the case in which the trampoline stores the static chain on
27194 the stack, we need to skip the first insn which pushes the
27195 (call-saved) register static chain; this push is 1 byte. */
27196 offset += 5;
27197 disp = expand_binop (SImode, sub_optab, fnaddr,
27198 plus_constant (Pmode, XEXP (m_tramp, 0),
27199 offset - (MEM_P (chain) ? 1 : 0)),
27200 NULL_RTX, 1, OPTAB_DIRECT);
27201 emit_move_insn (mem, disp);
27202 }
27203
27204 gcc_assert (offset <= TRAMPOLINE_SIZE);
27205
27206 #ifdef HAVE_ENABLE_EXECUTE_STACK
27207 #ifdef CHECK_EXECUTE_STACK_ENABLED
27208 if (CHECK_EXECUTE_STACK_ENABLED)
27209 #endif
27210 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27211 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27212 #endif
27213 }
27214 \f
27215 /* The following file contains several enumerations and data structures
27216 built from the definitions in i386-builtin-types.def. */
27217
27218 #include "i386-builtin-types.inc"
27219
27220 /* Table for the ix86 builtin non-function types. */
27221 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27222
27223 /* Retrieve an element from the above table, building some of
27224 the types lazily. */
27225
27226 static tree
27227 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27228 {
27229 unsigned int index;
27230 tree type, itype;
27231
27232 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27233
27234 type = ix86_builtin_type_tab[(int) tcode];
27235 if (type != NULL)
27236 return type;
27237
27238 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27239 if (tcode <= IX86_BT_LAST_VECT)
27240 {
27241 enum machine_mode mode;
27242
27243 index = tcode - IX86_BT_LAST_PRIM - 1;
27244 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27245 mode = ix86_builtin_type_vect_mode[index];
27246
27247 type = build_vector_type_for_mode (itype, mode);
27248 }
27249 else
27250 {
27251 int quals;
27252
27253 index = tcode - IX86_BT_LAST_VECT - 1;
27254 if (tcode <= IX86_BT_LAST_PTR)
27255 quals = TYPE_UNQUALIFIED;
27256 else
27257 quals = TYPE_QUAL_CONST;
27258
27259 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27260 if (quals != TYPE_UNQUALIFIED)
27261 itype = build_qualified_type (itype, quals);
27262
27263 type = build_pointer_type (itype);
27264 }
27265
27266 ix86_builtin_type_tab[(int) tcode] = type;
27267 return type;
27268 }
27269
27270 /* Table for the ix86 builtin function types. */
27271 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27272
27273 /* Retrieve an element from the above table, building some of
27274 the types lazily. */
27275
27276 static tree
27277 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27278 {
27279 tree type;
27280
27281 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27282
27283 type = ix86_builtin_func_type_tab[(int) tcode];
27284 if (type != NULL)
27285 return type;
27286
27287 if (tcode <= IX86_BT_LAST_FUNC)
27288 {
27289 unsigned start = ix86_builtin_func_start[(int) tcode];
27290 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27291 tree rtype, atype, args = void_list_node;
27292 unsigned i;
27293
27294 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27295 for (i = after - 1; i > start; --i)
27296 {
27297 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27298 args = tree_cons (NULL, atype, args);
27299 }
27300
27301 type = build_function_type (rtype, args);
27302 }
27303 else
27304 {
27305 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27306 enum ix86_builtin_func_type icode;
27307
27308 icode = ix86_builtin_func_alias_base[index];
27309 type = ix86_get_builtin_func_type (icode);
27310 }
27311
27312 ix86_builtin_func_type_tab[(int) tcode] = type;
27313 return type;
27314 }
27315
27316
27317 /* Codes for all the SSE/MMX builtins. */
27318 enum ix86_builtins
27319 {
27320 IX86_BUILTIN_ADDPS,
27321 IX86_BUILTIN_ADDSS,
27322 IX86_BUILTIN_DIVPS,
27323 IX86_BUILTIN_DIVSS,
27324 IX86_BUILTIN_MULPS,
27325 IX86_BUILTIN_MULSS,
27326 IX86_BUILTIN_SUBPS,
27327 IX86_BUILTIN_SUBSS,
27328
27329 IX86_BUILTIN_CMPEQPS,
27330 IX86_BUILTIN_CMPLTPS,
27331 IX86_BUILTIN_CMPLEPS,
27332 IX86_BUILTIN_CMPGTPS,
27333 IX86_BUILTIN_CMPGEPS,
27334 IX86_BUILTIN_CMPNEQPS,
27335 IX86_BUILTIN_CMPNLTPS,
27336 IX86_BUILTIN_CMPNLEPS,
27337 IX86_BUILTIN_CMPNGTPS,
27338 IX86_BUILTIN_CMPNGEPS,
27339 IX86_BUILTIN_CMPORDPS,
27340 IX86_BUILTIN_CMPUNORDPS,
27341 IX86_BUILTIN_CMPEQSS,
27342 IX86_BUILTIN_CMPLTSS,
27343 IX86_BUILTIN_CMPLESS,
27344 IX86_BUILTIN_CMPNEQSS,
27345 IX86_BUILTIN_CMPNLTSS,
27346 IX86_BUILTIN_CMPNLESS,
27347 IX86_BUILTIN_CMPORDSS,
27348 IX86_BUILTIN_CMPUNORDSS,
27349
27350 IX86_BUILTIN_COMIEQSS,
27351 IX86_BUILTIN_COMILTSS,
27352 IX86_BUILTIN_COMILESS,
27353 IX86_BUILTIN_COMIGTSS,
27354 IX86_BUILTIN_COMIGESS,
27355 IX86_BUILTIN_COMINEQSS,
27356 IX86_BUILTIN_UCOMIEQSS,
27357 IX86_BUILTIN_UCOMILTSS,
27358 IX86_BUILTIN_UCOMILESS,
27359 IX86_BUILTIN_UCOMIGTSS,
27360 IX86_BUILTIN_UCOMIGESS,
27361 IX86_BUILTIN_UCOMINEQSS,
27362
27363 IX86_BUILTIN_CVTPI2PS,
27364 IX86_BUILTIN_CVTPS2PI,
27365 IX86_BUILTIN_CVTSI2SS,
27366 IX86_BUILTIN_CVTSI642SS,
27367 IX86_BUILTIN_CVTSS2SI,
27368 IX86_BUILTIN_CVTSS2SI64,
27369 IX86_BUILTIN_CVTTPS2PI,
27370 IX86_BUILTIN_CVTTSS2SI,
27371 IX86_BUILTIN_CVTTSS2SI64,
27372
27373 IX86_BUILTIN_MAXPS,
27374 IX86_BUILTIN_MAXSS,
27375 IX86_BUILTIN_MINPS,
27376 IX86_BUILTIN_MINSS,
27377
27378 IX86_BUILTIN_LOADUPS,
27379 IX86_BUILTIN_STOREUPS,
27380 IX86_BUILTIN_MOVSS,
27381
27382 IX86_BUILTIN_MOVHLPS,
27383 IX86_BUILTIN_MOVLHPS,
27384 IX86_BUILTIN_LOADHPS,
27385 IX86_BUILTIN_LOADLPS,
27386 IX86_BUILTIN_STOREHPS,
27387 IX86_BUILTIN_STORELPS,
27388
27389 IX86_BUILTIN_MASKMOVQ,
27390 IX86_BUILTIN_MOVMSKPS,
27391 IX86_BUILTIN_PMOVMSKB,
27392
27393 IX86_BUILTIN_MOVNTPS,
27394 IX86_BUILTIN_MOVNTQ,
27395
27396 IX86_BUILTIN_LOADDQU,
27397 IX86_BUILTIN_STOREDQU,
27398
27399 IX86_BUILTIN_PACKSSWB,
27400 IX86_BUILTIN_PACKSSDW,
27401 IX86_BUILTIN_PACKUSWB,
27402
27403 IX86_BUILTIN_PADDB,
27404 IX86_BUILTIN_PADDW,
27405 IX86_BUILTIN_PADDD,
27406 IX86_BUILTIN_PADDQ,
27407 IX86_BUILTIN_PADDSB,
27408 IX86_BUILTIN_PADDSW,
27409 IX86_BUILTIN_PADDUSB,
27410 IX86_BUILTIN_PADDUSW,
27411 IX86_BUILTIN_PSUBB,
27412 IX86_BUILTIN_PSUBW,
27413 IX86_BUILTIN_PSUBD,
27414 IX86_BUILTIN_PSUBQ,
27415 IX86_BUILTIN_PSUBSB,
27416 IX86_BUILTIN_PSUBSW,
27417 IX86_BUILTIN_PSUBUSB,
27418 IX86_BUILTIN_PSUBUSW,
27419
27420 IX86_BUILTIN_PAND,
27421 IX86_BUILTIN_PANDN,
27422 IX86_BUILTIN_POR,
27423 IX86_BUILTIN_PXOR,
27424
27425 IX86_BUILTIN_PAVGB,
27426 IX86_BUILTIN_PAVGW,
27427
27428 IX86_BUILTIN_PCMPEQB,
27429 IX86_BUILTIN_PCMPEQW,
27430 IX86_BUILTIN_PCMPEQD,
27431 IX86_BUILTIN_PCMPGTB,
27432 IX86_BUILTIN_PCMPGTW,
27433 IX86_BUILTIN_PCMPGTD,
27434
27435 IX86_BUILTIN_PMADDWD,
27436
27437 IX86_BUILTIN_PMAXSW,
27438 IX86_BUILTIN_PMAXUB,
27439 IX86_BUILTIN_PMINSW,
27440 IX86_BUILTIN_PMINUB,
27441
27442 IX86_BUILTIN_PMULHUW,
27443 IX86_BUILTIN_PMULHW,
27444 IX86_BUILTIN_PMULLW,
27445
27446 IX86_BUILTIN_PSADBW,
27447 IX86_BUILTIN_PSHUFW,
27448
27449 IX86_BUILTIN_PSLLW,
27450 IX86_BUILTIN_PSLLD,
27451 IX86_BUILTIN_PSLLQ,
27452 IX86_BUILTIN_PSRAW,
27453 IX86_BUILTIN_PSRAD,
27454 IX86_BUILTIN_PSRLW,
27455 IX86_BUILTIN_PSRLD,
27456 IX86_BUILTIN_PSRLQ,
27457 IX86_BUILTIN_PSLLWI,
27458 IX86_BUILTIN_PSLLDI,
27459 IX86_BUILTIN_PSLLQI,
27460 IX86_BUILTIN_PSRAWI,
27461 IX86_BUILTIN_PSRADI,
27462 IX86_BUILTIN_PSRLWI,
27463 IX86_BUILTIN_PSRLDI,
27464 IX86_BUILTIN_PSRLQI,
27465
27466 IX86_BUILTIN_PUNPCKHBW,
27467 IX86_BUILTIN_PUNPCKHWD,
27468 IX86_BUILTIN_PUNPCKHDQ,
27469 IX86_BUILTIN_PUNPCKLBW,
27470 IX86_BUILTIN_PUNPCKLWD,
27471 IX86_BUILTIN_PUNPCKLDQ,
27472
27473 IX86_BUILTIN_SHUFPS,
27474
27475 IX86_BUILTIN_RCPPS,
27476 IX86_BUILTIN_RCPSS,
27477 IX86_BUILTIN_RSQRTPS,
27478 IX86_BUILTIN_RSQRTPS_NR,
27479 IX86_BUILTIN_RSQRTSS,
27480 IX86_BUILTIN_RSQRTF,
27481 IX86_BUILTIN_SQRTPS,
27482 IX86_BUILTIN_SQRTPS_NR,
27483 IX86_BUILTIN_SQRTSS,
27484
27485 IX86_BUILTIN_UNPCKHPS,
27486 IX86_BUILTIN_UNPCKLPS,
27487
27488 IX86_BUILTIN_ANDPS,
27489 IX86_BUILTIN_ANDNPS,
27490 IX86_BUILTIN_ORPS,
27491 IX86_BUILTIN_XORPS,
27492
27493 IX86_BUILTIN_EMMS,
27494 IX86_BUILTIN_LDMXCSR,
27495 IX86_BUILTIN_STMXCSR,
27496 IX86_BUILTIN_SFENCE,
27497
27498 IX86_BUILTIN_FXSAVE,
27499 IX86_BUILTIN_FXRSTOR,
27500 IX86_BUILTIN_FXSAVE64,
27501 IX86_BUILTIN_FXRSTOR64,
27502
27503 IX86_BUILTIN_XSAVE,
27504 IX86_BUILTIN_XRSTOR,
27505 IX86_BUILTIN_XSAVE64,
27506 IX86_BUILTIN_XRSTOR64,
27507
27508 IX86_BUILTIN_XSAVEOPT,
27509 IX86_BUILTIN_XSAVEOPT64,
27510
27511 IX86_BUILTIN_XSAVEC,
27512 IX86_BUILTIN_XSAVEC64,
27513
27514 IX86_BUILTIN_XSAVES,
27515 IX86_BUILTIN_XRSTORS,
27516 IX86_BUILTIN_XSAVES64,
27517 IX86_BUILTIN_XRSTORS64,
27518
27519 /* 3DNow! Original */
27520 IX86_BUILTIN_FEMMS,
27521 IX86_BUILTIN_PAVGUSB,
27522 IX86_BUILTIN_PF2ID,
27523 IX86_BUILTIN_PFACC,
27524 IX86_BUILTIN_PFADD,
27525 IX86_BUILTIN_PFCMPEQ,
27526 IX86_BUILTIN_PFCMPGE,
27527 IX86_BUILTIN_PFCMPGT,
27528 IX86_BUILTIN_PFMAX,
27529 IX86_BUILTIN_PFMIN,
27530 IX86_BUILTIN_PFMUL,
27531 IX86_BUILTIN_PFRCP,
27532 IX86_BUILTIN_PFRCPIT1,
27533 IX86_BUILTIN_PFRCPIT2,
27534 IX86_BUILTIN_PFRSQIT1,
27535 IX86_BUILTIN_PFRSQRT,
27536 IX86_BUILTIN_PFSUB,
27537 IX86_BUILTIN_PFSUBR,
27538 IX86_BUILTIN_PI2FD,
27539 IX86_BUILTIN_PMULHRW,
27540
27541 /* 3DNow! Athlon Extensions */
27542 IX86_BUILTIN_PF2IW,
27543 IX86_BUILTIN_PFNACC,
27544 IX86_BUILTIN_PFPNACC,
27545 IX86_BUILTIN_PI2FW,
27546 IX86_BUILTIN_PSWAPDSI,
27547 IX86_BUILTIN_PSWAPDSF,
27548
27549 /* SSE2 */
27550 IX86_BUILTIN_ADDPD,
27551 IX86_BUILTIN_ADDSD,
27552 IX86_BUILTIN_DIVPD,
27553 IX86_BUILTIN_DIVSD,
27554 IX86_BUILTIN_MULPD,
27555 IX86_BUILTIN_MULSD,
27556 IX86_BUILTIN_SUBPD,
27557 IX86_BUILTIN_SUBSD,
27558
27559 IX86_BUILTIN_CMPEQPD,
27560 IX86_BUILTIN_CMPLTPD,
27561 IX86_BUILTIN_CMPLEPD,
27562 IX86_BUILTIN_CMPGTPD,
27563 IX86_BUILTIN_CMPGEPD,
27564 IX86_BUILTIN_CMPNEQPD,
27565 IX86_BUILTIN_CMPNLTPD,
27566 IX86_BUILTIN_CMPNLEPD,
27567 IX86_BUILTIN_CMPNGTPD,
27568 IX86_BUILTIN_CMPNGEPD,
27569 IX86_BUILTIN_CMPORDPD,
27570 IX86_BUILTIN_CMPUNORDPD,
27571 IX86_BUILTIN_CMPEQSD,
27572 IX86_BUILTIN_CMPLTSD,
27573 IX86_BUILTIN_CMPLESD,
27574 IX86_BUILTIN_CMPNEQSD,
27575 IX86_BUILTIN_CMPNLTSD,
27576 IX86_BUILTIN_CMPNLESD,
27577 IX86_BUILTIN_CMPORDSD,
27578 IX86_BUILTIN_CMPUNORDSD,
27579
27580 IX86_BUILTIN_COMIEQSD,
27581 IX86_BUILTIN_COMILTSD,
27582 IX86_BUILTIN_COMILESD,
27583 IX86_BUILTIN_COMIGTSD,
27584 IX86_BUILTIN_COMIGESD,
27585 IX86_BUILTIN_COMINEQSD,
27586 IX86_BUILTIN_UCOMIEQSD,
27587 IX86_BUILTIN_UCOMILTSD,
27588 IX86_BUILTIN_UCOMILESD,
27589 IX86_BUILTIN_UCOMIGTSD,
27590 IX86_BUILTIN_UCOMIGESD,
27591 IX86_BUILTIN_UCOMINEQSD,
27592
27593 IX86_BUILTIN_MAXPD,
27594 IX86_BUILTIN_MAXSD,
27595 IX86_BUILTIN_MINPD,
27596 IX86_BUILTIN_MINSD,
27597
27598 IX86_BUILTIN_ANDPD,
27599 IX86_BUILTIN_ANDNPD,
27600 IX86_BUILTIN_ORPD,
27601 IX86_BUILTIN_XORPD,
27602
27603 IX86_BUILTIN_SQRTPD,
27604 IX86_BUILTIN_SQRTSD,
27605
27606 IX86_BUILTIN_UNPCKHPD,
27607 IX86_BUILTIN_UNPCKLPD,
27608
27609 IX86_BUILTIN_SHUFPD,
27610
27611 IX86_BUILTIN_LOADUPD,
27612 IX86_BUILTIN_STOREUPD,
27613 IX86_BUILTIN_MOVSD,
27614
27615 IX86_BUILTIN_LOADHPD,
27616 IX86_BUILTIN_LOADLPD,
27617
27618 IX86_BUILTIN_CVTDQ2PD,
27619 IX86_BUILTIN_CVTDQ2PS,
27620
27621 IX86_BUILTIN_CVTPD2DQ,
27622 IX86_BUILTIN_CVTPD2PI,
27623 IX86_BUILTIN_CVTPD2PS,
27624 IX86_BUILTIN_CVTTPD2DQ,
27625 IX86_BUILTIN_CVTTPD2PI,
27626
27627 IX86_BUILTIN_CVTPI2PD,
27628 IX86_BUILTIN_CVTSI2SD,
27629 IX86_BUILTIN_CVTSI642SD,
27630
27631 IX86_BUILTIN_CVTSD2SI,
27632 IX86_BUILTIN_CVTSD2SI64,
27633 IX86_BUILTIN_CVTSD2SS,
27634 IX86_BUILTIN_CVTSS2SD,
27635 IX86_BUILTIN_CVTTSD2SI,
27636 IX86_BUILTIN_CVTTSD2SI64,
27637
27638 IX86_BUILTIN_CVTPS2DQ,
27639 IX86_BUILTIN_CVTPS2PD,
27640 IX86_BUILTIN_CVTTPS2DQ,
27641
27642 IX86_BUILTIN_MOVNTI,
27643 IX86_BUILTIN_MOVNTI64,
27644 IX86_BUILTIN_MOVNTPD,
27645 IX86_BUILTIN_MOVNTDQ,
27646
27647 IX86_BUILTIN_MOVQ128,
27648
27649 /* SSE2 MMX */
27650 IX86_BUILTIN_MASKMOVDQU,
27651 IX86_BUILTIN_MOVMSKPD,
27652 IX86_BUILTIN_PMOVMSKB128,
27653
27654 IX86_BUILTIN_PACKSSWB128,
27655 IX86_BUILTIN_PACKSSDW128,
27656 IX86_BUILTIN_PACKUSWB128,
27657
27658 IX86_BUILTIN_PADDB128,
27659 IX86_BUILTIN_PADDW128,
27660 IX86_BUILTIN_PADDD128,
27661 IX86_BUILTIN_PADDQ128,
27662 IX86_BUILTIN_PADDSB128,
27663 IX86_BUILTIN_PADDSW128,
27664 IX86_BUILTIN_PADDUSB128,
27665 IX86_BUILTIN_PADDUSW128,
27666 IX86_BUILTIN_PSUBB128,
27667 IX86_BUILTIN_PSUBW128,
27668 IX86_BUILTIN_PSUBD128,
27669 IX86_BUILTIN_PSUBQ128,
27670 IX86_BUILTIN_PSUBSB128,
27671 IX86_BUILTIN_PSUBSW128,
27672 IX86_BUILTIN_PSUBUSB128,
27673 IX86_BUILTIN_PSUBUSW128,
27674
27675 IX86_BUILTIN_PAND128,
27676 IX86_BUILTIN_PANDN128,
27677 IX86_BUILTIN_POR128,
27678 IX86_BUILTIN_PXOR128,
27679
27680 IX86_BUILTIN_PAVGB128,
27681 IX86_BUILTIN_PAVGW128,
27682
27683 IX86_BUILTIN_PCMPEQB128,
27684 IX86_BUILTIN_PCMPEQW128,
27685 IX86_BUILTIN_PCMPEQD128,
27686 IX86_BUILTIN_PCMPGTB128,
27687 IX86_BUILTIN_PCMPGTW128,
27688 IX86_BUILTIN_PCMPGTD128,
27689
27690 IX86_BUILTIN_PMADDWD128,
27691
27692 IX86_BUILTIN_PMAXSW128,
27693 IX86_BUILTIN_PMAXUB128,
27694 IX86_BUILTIN_PMINSW128,
27695 IX86_BUILTIN_PMINUB128,
27696
27697 IX86_BUILTIN_PMULUDQ,
27698 IX86_BUILTIN_PMULUDQ128,
27699 IX86_BUILTIN_PMULHUW128,
27700 IX86_BUILTIN_PMULHW128,
27701 IX86_BUILTIN_PMULLW128,
27702
27703 IX86_BUILTIN_PSADBW128,
27704 IX86_BUILTIN_PSHUFHW,
27705 IX86_BUILTIN_PSHUFLW,
27706 IX86_BUILTIN_PSHUFD,
27707
27708 IX86_BUILTIN_PSLLDQI128,
27709 IX86_BUILTIN_PSLLWI128,
27710 IX86_BUILTIN_PSLLDI128,
27711 IX86_BUILTIN_PSLLQI128,
27712 IX86_BUILTIN_PSRAWI128,
27713 IX86_BUILTIN_PSRADI128,
27714 IX86_BUILTIN_PSRLDQI128,
27715 IX86_BUILTIN_PSRLWI128,
27716 IX86_BUILTIN_PSRLDI128,
27717 IX86_BUILTIN_PSRLQI128,
27718
27719 IX86_BUILTIN_PSLLDQ128,
27720 IX86_BUILTIN_PSLLW128,
27721 IX86_BUILTIN_PSLLD128,
27722 IX86_BUILTIN_PSLLQ128,
27723 IX86_BUILTIN_PSRAW128,
27724 IX86_BUILTIN_PSRAD128,
27725 IX86_BUILTIN_PSRLW128,
27726 IX86_BUILTIN_PSRLD128,
27727 IX86_BUILTIN_PSRLQ128,
27728
27729 IX86_BUILTIN_PUNPCKHBW128,
27730 IX86_BUILTIN_PUNPCKHWD128,
27731 IX86_BUILTIN_PUNPCKHDQ128,
27732 IX86_BUILTIN_PUNPCKHQDQ128,
27733 IX86_BUILTIN_PUNPCKLBW128,
27734 IX86_BUILTIN_PUNPCKLWD128,
27735 IX86_BUILTIN_PUNPCKLDQ128,
27736 IX86_BUILTIN_PUNPCKLQDQ128,
27737
27738 IX86_BUILTIN_CLFLUSH,
27739 IX86_BUILTIN_MFENCE,
27740 IX86_BUILTIN_LFENCE,
27741 IX86_BUILTIN_PAUSE,
27742
27743 IX86_BUILTIN_FNSTENV,
27744 IX86_BUILTIN_FLDENV,
27745 IX86_BUILTIN_FNSTSW,
27746 IX86_BUILTIN_FNCLEX,
27747
27748 IX86_BUILTIN_BSRSI,
27749 IX86_BUILTIN_BSRDI,
27750 IX86_BUILTIN_RDPMC,
27751 IX86_BUILTIN_RDTSC,
27752 IX86_BUILTIN_RDTSCP,
27753 IX86_BUILTIN_ROLQI,
27754 IX86_BUILTIN_ROLHI,
27755 IX86_BUILTIN_RORQI,
27756 IX86_BUILTIN_RORHI,
27757
27758 /* SSE3. */
27759 IX86_BUILTIN_ADDSUBPS,
27760 IX86_BUILTIN_HADDPS,
27761 IX86_BUILTIN_HSUBPS,
27762 IX86_BUILTIN_MOVSHDUP,
27763 IX86_BUILTIN_MOVSLDUP,
27764 IX86_BUILTIN_ADDSUBPD,
27765 IX86_BUILTIN_HADDPD,
27766 IX86_BUILTIN_HSUBPD,
27767 IX86_BUILTIN_LDDQU,
27768
27769 IX86_BUILTIN_MONITOR,
27770 IX86_BUILTIN_MWAIT,
27771
27772 /* SSSE3. */
27773 IX86_BUILTIN_PHADDW,
27774 IX86_BUILTIN_PHADDD,
27775 IX86_BUILTIN_PHADDSW,
27776 IX86_BUILTIN_PHSUBW,
27777 IX86_BUILTIN_PHSUBD,
27778 IX86_BUILTIN_PHSUBSW,
27779 IX86_BUILTIN_PMADDUBSW,
27780 IX86_BUILTIN_PMULHRSW,
27781 IX86_BUILTIN_PSHUFB,
27782 IX86_BUILTIN_PSIGNB,
27783 IX86_BUILTIN_PSIGNW,
27784 IX86_BUILTIN_PSIGND,
27785 IX86_BUILTIN_PALIGNR,
27786 IX86_BUILTIN_PABSB,
27787 IX86_BUILTIN_PABSW,
27788 IX86_BUILTIN_PABSD,
27789
27790 IX86_BUILTIN_PHADDW128,
27791 IX86_BUILTIN_PHADDD128,
27792 IX86_BUILTIN_PHADDSW128,
27793 IX86_BUILTIN_PHSUBW128,
27794 IX86_BUILTIN_PHSUBD128,
27795 IX86_BUILTIN_PHSUBSW128,
27796 IX86_BUILTIN_PMADDUBSW128,
27797 IX86_BUILTIN_PMULHRSW128,
27798 IX86_BUILTIN_PSHUFB128,
27799 IX86_BUILTIN_PSIGNB128,
27800 IX86_BUILTIN_PSIGNW128,
27801 IX86_BUILTIN_PSIGND128,
27802 IX86_BUILTIN_PALIGNR128,
27803 IX86_BUILTIN_PABSB128,
27804 IX86_BUILTIN_PABSW128,
27805 IX86_BUILTIN_PABSD128,
27806
27807 /* AMDFAM10 - SSE4A New Instructions. */
27808 IX86_BUILTIN_MOVNTSD,
27809 IX86_BUILTIN_MOVNTSS,
27810 IX86_BUILTIN_EXTRQI,
27811 IX86_BUILTIN_EXTRQ,
27812 IX86_BUILTIN_INSERTQI,
27813 IX86_BUILTIN_INSERTQ,
27814
27815 /* SSE4.1. */
27816 IX86_BUILTIN_BLENDPD,
27817 IX86_BUILTIN_BLENDPS,
27818 IX86_BUILTIN_BLENDVPD,
27819 IX86_BUILTIN_BLENDVPS,
27820 IX86_BUILTIN_PBLENDVB128,
27821 IX86_BUILTIN_PBLENDW128,
27822
27823 IX86_BUILTIN_DPPD,
27824 IX86_BUILTIN_DPPS,
27825
27826 IX86_BUILTIN_INSERTPS128,
27827
27828 IX86_BUILTIN_MOVNTDQA,
27829 IX86_BUILTIN_MPSADBW128,
27830 IX86_BUILTIN_PACKUSDW128,
27831 IX86_BUILTIN_PCMPEQQ,
27832 IX86_BUILTIN_PHMINPOSUW128,
27833
27834 IX86_BUILTIN_PMAXSB128,
27835 IX86_BUILTIN_PMAXSD128,
27836 IX86_BUILTIN_PMAXUD128,
27837 IX86_BUILTIN_PMAXUW128,
27838
27839 IX86_BUILTIN_PMINSB128,
27840 IX86_BUILTIN_PMINSD128,
27841 IX86_BUILTIN_PMINUD128,
27842 IX86_BUILTIN_PMINUW128,
27843
27844 IX86_BUILTIN_PMOVSXBW128,
27845 IX86_BUILTIN_PMOVSXBD128,
27846 IX86_BUILTIN_PMOVSXBQ128,
27847 IX86_BUILTIN_PMOVSXWD128,
27848 IX86_BUILTIN_PMOVSXWQ128,
27849 IX86_BUILTIN_PMOVSXDQ128,
27850
27851 IX86_BUILTIN_PMOVZXBW128,
27852 IX86_BUILTIN_PMOVZXBD128,
27853 IX86_BUILTIN_PMOVZXBQ128,
27854 IX86_BUILTIN_PMOVZXWD128,
27855 IX86_BUILTIN_PMOVZXWQ128,
27856 IX86_BUILTIN_PMOVZXDQ128,
27857
27858 IX86_BUILTIN_PMULDQ128,
27859 IX86_BUILTIN_PMULLD128,
27860
27861 IX86_BUILTIN_ROUNDSD,
27862 IX86_BUILTIN_ROUNDSS,
27863
27864 IX86_BUILTIN_ROUNDPD,
27865 IX86_BUILTIN_ROUNDPS,
27866
27867 IX86_BUILTIN_FLOORPD,
27868 IX86_BUILTIN_CEILPD,
27869 IX86_BUILTIN_TRUNCPD,
27870 IX86_BUILTIN_RINTPD,
27871 IX86_BUILTIN_ROUNDPD_AZ,
27872
27873 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27874 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27875 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27876
27877 IX86_BUILTIN_FLOORPS,
27878 IX86_BUILTIN_CEILPS,
27879 IX86_BUILTIN_TRUNCPS,
27880 IX86_BUILTIN_RINTPS,
27881 IX86_BUILTIN_ROUNDPS_AZ,
27882
27883 IX86_BUILTIN_FLOORPS_SFIX,
27884 IX86_BUILTIN_CEILPS_SFIX,
27885 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27886
27887 IX86_BUILTIN_PTESTZ,
27888 IX86_BUILTIN_PTESTC,
27889 IX86_BUILTIN_PTESTNZC,
27890
27891 IX86_BUILTIN_VEC_INIT_V2SI,
27892 IX86_BUILTIN_VEC_INIT_V4HI,
27893 IX86_BUILTIN_VEC_INIT_V8QI,
27894 IX86_BUILTIN_VEC_EXT_V2DF,
27895 IX86_BUILTIN_VEC_EXT_V2DI,
27896 IX86_BUILTIN_VEC_EXT_V4SF,
27897 IX86_BUILTIN_VEC_EXT_V4SI,
27898 IX86_BUILTIN_VEC_EXT_V8HI,
27899 IX86_BUILTIN_VEC_EXT_V2SI,
27900 IX86_BUILTIN_VEC_EXT_V4HI,
27901 IX86_BUILTIN_VEC_EXT_V16QI,
27902 IX86_BUILTIN_VEC_SET_V2DI,
27903 IX86_BUILTIN_VEC_SET_V4SF,
27904 IX86_BUILTIN_VEC_SET_V4SI,
27905 IX86_BUILTIN_VEC_SET_V8HI,
27906 IX86_BUILTIN_VEC_SET_V4HI,
27907 IX86_BUILTIN_VEC_SET_V16QI,
27908
27909 IX86_BUILTIN_VEC_PACK_SFIX,
27910 IX86_BUILTIN_VEC_PACK_SFIX256,
27911
27912 /* SSE4.2. */
27913 IX86_BUILTIN_CRC32QI,
27914 IX86_BUILTIN_CRC32HI,
27915 IX86_BUILTIN_CRC32SI,
27916 IX86_BUILTIN_CRC32DI,
27917
27918 IX86_BUILTIN_PCMPESTRI128,
27919 IX86_BUILTIN_PCMPESTRM128,
27920 IX86_BUILTIN_PCMPESTRA128,
27921 IX86_BUILTIN_PCMPESTRC128,
27922 IX86_BUILTIN_PCMPESTRO128,
27923 IX86_BUILTIN_PCMPESTRS128,
27924 IX86_BUILTIN_PCMPESTRZ128,
27925 IX86_BUILTIN_PCMPISTRI128,
27926 IX86_BUILTIN_PCMPISTRM128,
27927 IX86_BUILTIN_PCMPISTRA128,
27928 IX86_BUILTIN_PCMPISTRC128,
27929 IX86_BUILTIN_PCMPISTRO128,
27930 IX86_BUILTIN_PCMPISTRS128,
27931 IX86_BUILTIN_PCMPISTRZ128,
27932
27933 IX86_BUILTIN_PCMPGTQ,
27934
27935 /* AES instructions */
27936 IX86_BUILTIN_AESENC128,
27937 IX86_BUILTIN_AESENCLAST128,
27938 IX86_BUILTIN_AESDEC128,
27939 IX86_BUILTIN_AESDECLAST128,
27940 IX86_BUILTIN_AESIMC128,
27941 IX86_BUILTIN_AESKEYGENASSIST128,
27942
27943 /* PCLMUL instruction */
27944 IX86_BUILTIN_PCLMULQDQ128,
27945
27946 /* AVX */
27947 IX86_BUILTIN_ADDPD256,
27948 IX86_BUILTIN_ADDPS256,
27949 IX86_BUILTIN_ADDSUBPD256,
27950 IX86_BUILTIN_ADDSUBPS256,
27951 IX86_BUILTIN_ANDPD256,
27952 IX86_BUILTIN_ANDPS256,
27953 IX86_BUILTIN_ANDNPD256,
27954 IX86_BUILTIN_ANDNPS256,
27955 IX86_BUILTIN_BLENDPD256,
27956 IX86_BUILTIN_BLENDPS256,
27957 IX86_BUILTIN_BLENDVPD256,
27958 IX86_BUILTIN_BLENDVPS256,
27959 IX86_BUILTIN_DIVPD256,
27960 IX86_BUILTIN_DIVPS256,
27961 IX86_BUILTIN_DPPS256,
27962 IX86_BUILTIN_HADDPD256,
27963 IX86_BUILTIN_HADDPS256,
27964 IX86_BUILTIN_HSUBPD256,
27965 IX86_BUILTIN_HSUBPS256,
27966 IX86_BUILTIN_MAXPD256,
27967 IX86_BUILTIN_MAXPS256,
27968 IX86_BUILTIN_MINPD256,
27969 IX86_BUILTIN_MINPS256,
27970 IX86_BUILTIN_MULPD256,
27971 IX86_BUILTIN_MULPS256,
27972 IX86_BUILTIN_ORPD256,
27973 IX86_BUILTIN_ORPS256,
27974 IX86_BUILTIN_SHUFPD256,
27975 IX86_BUILTIN_SHUFPS256,
27976 IX86_BUILTIN_SUBPD256,
27977 IX86_BUILTIN_SUBPS256,
27978 IX86_BUILTIN_XORPD256,
27979 IX86_BUILTIN_XORPS256,
27980 IX86_BUILTIN_CMPSD,
27981 IX86_BUILTIN_CMPSS,
27982 IX86_BUILTIN_CMPPD,
27983 IX86_BUILTIN_CMPPS,
27984 IX86_BUILTIN_CMPPD256,
27985 IX86_BUILTIN_CMPPS256,
27986 IX86_BUILTIN_CVTDQ2PD256,
27987 IX86_BUILTIN_CVTDQ2PS256,
27988 IX86_BUILTIN_CVTPD2PS256,
27989 IX86_BUILTIN_CVTPS2DQ256,
27990 IX86_BUILTIN_CVTPS2PD256,
27991 IX86_BUILTIN_CVTTPD2DQ256,
27992 IX86_BUILTIN_CVTPD2DQ256,
27993 IX86_BUILTIN_CVTTPS2DQ256,
27994 IX86_BUILTIN_EXTRACTF128PD256,
27995 IX86_BUILTIN_EXTRACTF128PS256,
27996 IX86_BUILTIN_EXTRACTF128SI256,
27997 IX86_BUILTIN_VZEROALL,
27998 IX86_BUILTIN_VZEROUPPER,
27999 IX86_BUILTIN_VPERMILVARPD,
28000 IX86_BUILTIN_VPERMILVARPS,
28001 IX86_BUILTIN_VPERMILVARPD256,
28002 IX86_BUILTIN_VPERMILVARPS256,
28003 IX86_BUILTIN_VPERMILPD,
28004 IX86_BUILTIN_VPERMILPS,
28005 IX86_BUILTIN_VPERMILPD256,
28006 IX86_BUILTIN_VPERMILPS256,
28007 IX86_BUILTIN_VPERMIL2PD,
28008 IX86_BUILTIN_VPERMIL2PS,
28009 IX86_BUILTIN_VPERMIL2PD256,
28010 IX86_BUILTIN_VPERMIL2PS256,
28011 IX86_BUILTIN_VPERM2F128PD256,
28012 IX86_BUILTIN_VPERM2F128PS256,
28013 IX86_BUILTIN_VPERM2F128SI256,
28014 IX86_BUILTIN_VBROADCASTSS,
28015 IX86_BUILTIN_VBROADCASTSD256,
28016 IX86_BUILTIN_VBROADCASTSS256,
28017 IX86_BUILTIN_VBROADCASTPD256,
28018 IX86_BUILTIN_VBROADCASTPS256,
28019 IX86_BUILTIN_VINSERTF128PD256,
28020 IX86_BUILTIN_VINSERTF128PS256,
28021 IX86_BUILTIN_VINSERTF128SI256,
28022 IX86_BUILTIN_LOADUPD256,
28023 IX86_BUILTIN_LOADUPS256,
28024 IX86_BUILTIN_STOREUPD256,
28025 IX86_BUILTIN_STOREUPS256,
28026 IX86_BUILTIN_LDDQU256,
28027 IX86_BUILTIN_MOVNTDQ256,
28028 IX86_BUILTIN_MOVNTPD256,
28029 IX86_BUILTIN_MOVNTPS256,
28030 IX86_BUILTIN_LOADDQU256,
28031 IX86_BUILTIN_STOREDQU256,
28032 IX86_BUILTIN_MASKLOADPD,
28033 IX86_BUILTIN_MASKLOADPS,
28034 IX86_BUILTIN_MASKSTOREPD,
28035 IX86_BUILTIN_MASKSTOREPS,
28036 IX86_BUILTIN_MASKLOADPD256,
28037 IX86_BUILTIN_MASKLOADPS256,
28038 IX86_BUILTIN_MASKSTOREPD256,
28039 IX86_BUILTIN_MASKSTOREPS256,
28040 IX86_BUILTIN_MOVSHDUP256,
28041 IX86_BUILTIN_MOVSLDUP256,
28042 IX86_BUILTIN_MOVDDUP256,
28043
28044 IX86_BUILTIN_SQRTPD256,
28045 IX86_BUILTIN_SQRTPS256,
28046 IX86_BUILTIN_SQRTPS_NR256,
28047 IX86_BUILTIN_RSQRTPS256,
28048 IX86_BUILTIN_RSQRTPS_NR256,
28049
28050 IX86_BUILTIN_RCPPS256,
28051
28052 IX86_BUILTIN_ROUNDPD256,
28053 IX86_BUILTIN_ROUNDPS256,
28054
28055 IX86_BUILTIN_FLOORPD256,
28056 IX86_BUILTIN_CEILPD256,
28057 IX86_BUILTIN_TRUNCPD256,
28058 IX86_BUILTIN_RINTPD256,
28059 IX86_BUILTIN_ROUNDPD_AZ256,
28060
28061 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28062 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28063 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28064
28065 IX86_BUILTIN_FLOORPS256,
28066 IX86_BUILTIN_CEILPS256,
28067 IX86_BUILTIN_TRUNCPS256,
28068 IX86_BUILTIN_RINTPS256,
28069 IX86_BUILTIN_ROUNDPS_AZ256,
28070
28071 IX86_BUILTIN_FLOORPS_SFIX256,
28072 IX86_BUILTIN_CEILPS_SFIX256,
28073 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28074
28075 IX86_BUILTIN_UNPCKHPD256,
28076 IX86_BUILTIN_UNPCKLPD256,
28077 IX86_BUILTIN_UNPCKHPS256,
28078 IX86_BUILTIN_UNPCKLPS256,
28079
28080 IX86_BUILTIN_SI256_SI,
28081 IX86_BUILTIN_PS256_PS,
28082 IX86_BUILTIN_PD256_PD,
28083 IX86_BUILTIN_SI_SI256,
28084 IX86_BUILTIN_PS_PS256,
28085 IX86_BUILTIN_PD_PD256,
28086
28087 IX86_BUILTIN_VTESTZPD,
28088 IX86_BUILTIN_VTESTCPD,
28089 IX86_BUILTIN_VTESTNZCPD,
28090 IX86_BUILTIN_VTESTZPS,
28091 IX86_BUILTIN_VTESTCPS,
28092 IX86_BUILTIN_VTESTNZCPS,
28093 IX86_BUILTIN_VTESTZPD256,
28094 IX86_BUILTIN_VTESTCPD256,
28095 IX86_BUILTIN_VTESTNZCPD256,
28096 IX86_BUILTIN_VTESTZPS256,
28097 IX86_BUILTIN_VTESTCPS256,
28098 IX86_BUILTIN_VTESTNZCPS256,
28099 IX86_BUILTIN_PTESTZ256,
28100 IX86_BUILTIN_PTESTC256,
28101 IX86_BUILTIN_PTESTNZC256,
28102
28103 IX86_BUILTIN_MOVMSKPD256,
28104 IX86_BUILTIN_MOVMSKPS256,
28105
28106 /* AVX2 */
28107 IX86_BUILTIN_MPSADBW256,
28108 IX86_BUILTIN_PABSB256,
28109 IX86_BUILTIN_PABSW256,
28110 IX86_BUILTIN_PABSD256,
28111 IX86_BUILTIN_PACKSSDW256,
28112 IX86_BUILTIN_PACKSSWB256,
28113 IX86_BUILTIN_PACKUSDW256,
28114 IX86_BUILTIN_PACKUSWB256,
28115 IX86_BUILTIN_PADDB256,
28116 IX86_BUILTIN_PADDW256,
28117 IX86_BUILTIN_PADDD256,
28118 IX86_BUILTIN_PADDQ256,
28119 IX86_BUILTIN_PADDSB256,
28120 IX86_BUILTIN_PADDSW256,
28121 IX86_BUILTIN_PADDUSB256,
28122 IX86_BUILTIN_PADDUSW256,
28123 IX86_BUILTIN_PALIGNR256,
28124 IX86_BUILTIN_AND256I,
28125 IX86_BUILTIN_ANDNOT256I,
28126 IX86_BUILTIN_PAVGB256,
28127 IX86_BUILTIN_PAVGW256,
28128 IX86_BUILTIN_PBLENDVB256,
28129 IX86_BUILTIN_PBLENDVW256,
28130 IX86_BUILTIN_PCMPEQB256,
28131 IX86_BUILTIN_PCMPEQW256,
28132 IX86_BUILTIN_PCMPEQD256,
28133 IX86_BUILTIN_PCMPEQQ256,
28134 IX86_BUILTIN_PCMPGTB256,
28135 IX86_BUILTIN_PCMPGTW256,
28136 IX86_BUILTIN_PCMPGTD256,
28137 IX86_BUILTIN_PCMPGTQ256,
28138 IX86_BUILTIN_PHADDW256,
28139 IX86_BUILTIN_PHADDD256,
28140 IX86_BUILTIN_PHADDSW256,
28141 IX86_BUILTIN_PHSUBW256,
28142 IX86_BUILTIN_PHSUBD256,
28143 IX86_BUILTIN_PHSUBSW256,
28144 IX86_BUILTIN_PMADDUBSW256,
28145 IX86_BUILTIN_PMADDWD256,
28146 IX86_BUILTIN_PMAXSB256,
28147 IX86_BUILTIN_PMAXSW256,
28148 IX86_BUILTIN_PMAXSD256,
28149 IX86_BUILTIN_PMAXUB256,
28150 IX86_BUILTIN_PMAXUW256,
28151 IX86_BUILTIN_PMAXUD256,
28152 IX86_BUILTIN_PMINSB256,
28153 IX86_BUILTIN_PMINSW256,
28154 IX86_BUILTIN_PMINSD256,
28155 IX86_BUILTIN_PMINUB256,
28156 IX86_BUILTIN_PMINUW256,
28157 IX86_BUILTIN_PMINUD256,
28158 IX86_BUILTIN_PMOVMSKB256,
28159 IX86_BUILTIN_PMOVSXBW256,
28160 IX86_BUILTIN_PMOVSXBD256,
28161 IX86_BUILTIN_PMOVSXBQ256,
28162 IX86_BUILTIN_PMOVSXWD256,
28163 IX86_BUILTIN_PMOVSXWQ256,
28164 IX86_BUILTIN_PMOVSXDQ256,
28165 IX86_BUILTIN_PMOVZXBW256,
28166 IX86_BUILTIN_PMOVZXBD256,
28167 IX86_BUILTIN_PMOVZXBQ256,
28168 IX86_BUILTIN_PMOVZXWD256,
28169 IX86_BUILTIN_PMOVZXWQ256,
28170 IX86_BUILTIN_PMOVZXDQ256,
28171 IX86_BUILTIN_PMULDQ256,
28172 IX86_BUILTIN_PMULHRSW256,
28173 IX86_BUILTIN_PMULHUW256,
28174 IX86_BUILTIN_PMULHW256,
28175 IX86_BUILTIN_PMULLW256,
28176 IX86_BUILTIN_PMULLD256,
28177 IX86_BUILTIN_PMULUDQ256,
28178 IX86_BUILTIN_POR256,
28179 IX86_BUILTIN_PSADBW256,
28180 IX86_BUILTIN_PSHUFB256,
28181 IX86_BUILTIN_PSHUFD256,
28182 IX86_BUILTIN_PSHUFHW256,
28183 IX86_BUILTIN_PSHUFLW256,
28184 IX86_BUILTIN_PSIGNB256,
28185 IX86_BUILTIN_PSIGNW256,
28186 IX86_BUILTIN_PSIGND256,
28187 IX86_BUILTIN_PSLLDQI256,
28188 IX86_BUILTIN_PSLLWI256,
28189 IX86_BUILTIN_PSLLW256,
28190 IX86_BUILTIN_PSLLDI256,
28191 IX86_BUILTIN_PSLLD256,
28192 IX86_BUILTIN_PSLLQI256,
28193 IX86_BUILTIN_PSLLQ256,
28194 IX86_BUILTIN_PSRAWI256,
28195 IX86_BUILTIN_PSRAW256,
28196 IX86_BUILTIN_PSRADI256,
28197 IX86_BUILTIN_PSRAD256,
28198 IX86_BUILTIN_PSRLDQI256,
28199 IX86_BUILTIN_PSRLWI256,
28200 IX86_BUILTIN_PSRLW256,
28201 IX86_BUILTIN_PSRLDI256,
28202 IX86_BUILTIN_PSRLD256,
28203 IX86_BUILTIN_PSRLQI256,
28204 IX86_BUILTIN_PSRLQ256,
28205 IX86_BUILTIN_PSUBB256,
28206 IX86_BUILTIN_PSUBW256,
28207 IX86_BUILTIN_PSUBD256,
28208 IX86_BUILTIN_PSUBQ256,
28209 IX86_BUILTIN_PSUBSB256,
28210 IX86_BUILTIN_PSUBSW256,
28211 IX86_BUILTIN_PSUBUSB256,
28212 IX86_BUILTIN_PSUBUSW256,
28213 IX86_BUILTIN_PUNPCKHBW256,
28214 IX86_BUILTIN_PUNPCKHWD256,
28215 IX86_BUILTIN_PUNPCKHDQ256,
28216 IX86_BUILTIN_PUNPCKHQDQ256,
28217 IX86_BUILTIN_PUNPCKLBW256,
28218 IX86_BUILTIN_PUNPCKLWD256,
28219 IX86_BUILTIN_PUNPCKLDQ256,
28220 IX86_BUILTIN_PUNPCKLQDQ256,
28221 IX86_BUILTIN_PXOR256,
28222 IX86_BUILTIN_MOVNTDQA256,
28223 IX86_BUILTIN_VBROADCASTSS_PS,
28224 IX86_BUILTIN_VBROADCASTSS_PS256,
28225 IX86_BUILTIN_VBROADCASTSD_PD256,
28226 IX86_BUILTIN_VBROADCASTSI256,
28227 IX86_BUILTIN_PBLENDD256,
28228 IX86_BUILTIN_PBLENDD128,
28229 IX86_BUILTIN_PBROADCASTB256,
28230 IX86_BUILTIN_PBROADCASTW256,
28231 IX86_BUILTIN_PBROADCASTD256,
28232 IX86_BUILTIN_PBROADCASTQ256,
28233 IX86_BUILTIN_PBROADCASTB128,
28234 IX86_BUILTIN_PBROADCASTW128,
28235 IX86_BUILTIN_PBROADCASTD128,
28236 IX86_BUILTIN_PBROADCASTQ128,
28237 IX86_BUILTIN_VPERMVARSI256,
28238 IX86_BUILTIN_VPERMDF256,
28239 IX86_BUILTIN_VPERMVARSF256,
28240 IX86_BUILTIN_VPERMDI256,
28241 IX86_BUILTIN_VPERMTI256,
28242 IX86_BUILTIN_VEXTRACT128I256,
28243 IX86_BUILTIN_VINSERT128I256,
28244 IX86_BUILTIN_MASKLOADD,
28245 IX86_BUILTIN_MASKLOADQ,
28246 IX86_BUILTIN_MASKLOADD256,
28247 IX86_BUILTIN_MASKLOADQ256,
28248 IX86_BUILTIN_MASKSTORED,
28249 IX86_BUILTIN_MASKSTOREQ,
28250 IX86_BUILTIN_MASKSTORED256,
28251 IX86_BUILTIN_MASKSTOREQ256,
28252 IX86_BUILTIN_PSLLVV4DI,
28253 IX86_BUILTIN_PSLLVV2DI,
28254 IX86_BUILTIN_PSLLVV8SI,
28255 IX86_BUILTIN_PSLLVV4SI,
28256 IX86_BUILTIN_PSRAVV8SI,
28257 IX86_BUILTIN_PSRAVV4SI,
28258 IX86_BUILTIN_PSRLVV4DI,
28259 IX86_BUILTIN_PSRLVV2DI,
28260 IX86_BUILTIN_PSRLVV8SI,
28261 IX86_BUILTIN_PSRLVV4SI,
28262
28263 IX86_BUILTIN_GATHERSIV2DF,
28264 IX86_BUILTIN_GATHERSIV4DF,
28265 IX86_BUILTIN_GATHERDIV2DF,
28266 IX86_BUILTIN_GATHERDIV4DF,
28267 IX86_BUILTIN_GATHERSIV4SF,
28268 IX86_BUILTIN_GATHERSIV8SF,
28269 IX86_BUILTIN_GATHERDIV4SF,
28270 IX86_BUILTIN_GATHERDIV8SF,
28271 IX86_BUILTIN_GATHERSIV2DI,
28272 IX86_BUILTIN_GATHERSIV4DI,
28273 IX86_BUILTIN_GATHERDIV2DI,
28274 IX86_BUILTIN_GATHERDIV4DI,
28275 IX86_BUILTIN_GATHERSIV4SI,
28276 IX86_BUILTIN_GATHERSIV8SI,
28277 IX86_BUILTIN_GATHERDIV4SI,
28278 IX86_BUILTIN_GATHERDIV8SI,
28279
28280 /* AVX512F */
28281 IX86_BUILTIN_SI512_SI256,
28282 IX86_BUILTIN_PD512_PD256,
28283 IX86_BUILTIN_PS512_PS256,
28284 IX86_BUILTIN_SI512_SI,
28285 IX86_BUILTIN_PD512_PD,
28286 IX86_BUILTIN_PS512_PS,
28287 IX86_BUILTIN_ADDPD512,
28288 IX86_BUILTIN_ADDPS512,
28289 IX86_BUILTIN_ADDSD_ROUND,
28290 IX86_BUILTIN_ADDSS_ROUND,
28291 IX86_BUILTIN_ALIGND512,
28292 IX86_BUILTIN_ALIGNQ512,
28293 IX86_BUILTIN_BLENDMD512,
28294 IX86_BUILTIN_BLENDMPD512,
28295 IX86_BUILTIN_BLENDMPS512,
28296 IX86_BUILTIN_BLENDMQ512,
28297 IX86_BUILTIN_BROADCASTF32X4_512,
28298 IX86_BUILTIN_BROADCASTF64X4_512,
28299 IX86_BUILTIN_BROADCASTI32X4_512,
28300 IX86_BUILTIN_BROADCASTI64X4_512,
28301 IX86_BUILTIN_BROADCASTSD512,
28302 IX86_BUILTIN_BROADCASTSS512,
28303 IX86_BUILTIN_CMPD512,
28304 IX86_BUILTIN_CMPPD512,
28305 IX86_BUILTIN_CMPPS512,
28306 IX86_BUILTIN_CMPQ512,
28307 IX86_BUILTIN_CMPSD_MASK,
28308 IX86_BUILTIN_CMPSS_MASK,
28309 IX86_BUILTIN_COMIDF,
28310 IX86_BUILTIN_COMISF,
28311 IX86_BUILTIN_COMPRESSPD512,
28312 IX86_BUILTIN_COMPRESSPDSTORE512,
28313 IX86_BUILTIN_COMPRESSPS512,
28314 IX86_BUILTIN_COMPRESSPSSTORE512,
28315 IX86_BUILTIN_CVTDQ2PD512,
28316 IX86_BUILTIN_CVTDQ2PS512,
28317 IX86_BUILTIN_CVTPD2DQ512,
28318 IX86_BUILTIN_CVTPD2PS512,
28319 IX86_BUILTIN_CVTPD2UDQ512,
28320 IX86_BUILTIN_CVTPH2PS512,
28321 IX86_BUILTIN_CVTPS2DQ512,
28322 IX86_BUILTIN_CVTPS2PD512,
28323 IX86_BUILTIN_CVTPS2PH512,
28324 IX86_BUILTIN_CVTPS2UDQ512,
28325 IX86_BUILTIN_CVTSD2SS_ROUND,
28326 IX86_BUILTIN_CVTSI2SD64,
28327 IX86_BUILTIN_CVTSI2SS32,
28328 IX86_BUILTIN_CVTSI2SS64,
28329 IX86_BUILTIN_CVTSS2SD_ROUND,
28330 IX86_BUILTIN_CVTTPD2DQ512,
28331 IX86_BUILTIN_CVTTPD2UDQ512,
28332 IX86_BUILTIN_CVTTPS2DQ512,
28333 IX86_BUILTIN_CVTTPS2UDQ512,
28334 IX86_BUILTIN_CVTUDQ2PD512,
28335 IX86_BUILTIN_CVTUDQ2PS512,
28336 IX86_BUILTIN_CVTUSI2SD32,
28337 IX86_BUILTIN_CVTUSI2SD64,
28338 IX86_BUILTIN_CVTUSI2SS32,
28339 IX86_BUILTIN_CVTUSI2SS64,
28340 IX86_BUILTIN_DIVPD512,
28341 IX86_BUILTIN_DIVPS512,
28342 IX86_BUILTIN_DIVSD_ROUND,
28343 IX86_BUILTIN_DIVSS_ROUND,
28344 IX86_BUILTIN_EXPANDPD512,
28345 IX86_BUILTIN_EXPANDPD512Z,
28346 IX86_BUILTIN_EXPANDPDLOAD512,
28347 IX86_BUILTIN_EXPANDPDLOAD512Z,
28348 IX86_BUILTIN_EXPANDPS512,
28349 IX86_BUILTIN_EXPANDPS512Z,
28350 IX86_BUILTIN_EXPANDPSLOAD512,
28351 IX86_BUILTIN_EXPANDPSLOAD512Z,
28352 IX86_BUILTIN_EXTRACTF32X4,
28353 IX86_BUILTIN_EXTRACTF64X4,
28354 IX86_BUILTIN_EXTRACTI32X4,
28355 IX86_BUILTIN_EXTRACTI64X4,
28356 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28357 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28358 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28359 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28360 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28361 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28362 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28363 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28364 IX86_BUILTIN_GETEXPPD512,
28365 IX86_BUILTIN_GETEXPPS512,
28366 IX86_BUILTIN_GETEXPSD128,
28367 IX86_BUILTIN_GETEXPSS128,
28368 IX86_BUILTIN_GETMANTPD512,
28369 IX86_BUILTIN_GETMANTPS512,
28370 IX86_BUILTIN_GETMANTSD128,
28371 IX86_BUILTIN_GETMANTSS128,
28372 IX86_BUILTIN_INSERTF32X4,
28373 IX86_BUILTIN_INSERTF64X4,
28374 IX86_BUILTIN_INSERTI32X4,
28375 IX86_BUILTIN_INSERTI64X4,
28376 IX86_BUILTIN_LOADAPD512,
28377 IX86_BUILTIN_LOADAPS512,
28378 IX86_BUILTIN_LOADDQUDI512,
28379 IX86_BUILTIN_LOADDQUSI512,
28380 IX86_BUILTIN_LOADUPD512,
28381 IX86_BUILTIN_LOADUPS512,
28382 IX86_BUILTIN_MAXPD512,
28383 IX86_BUILTIN_MAXPS512,
28384 IX86_BUILTIN_MAXSD_ROUND,
28385 IX86_BUILTIN_MAXSS_ROUND,
28386 IX86_BUILTIN_MINPD512,
28387 IX86_BUILTIN_MINPS512,
28388 IX86_BUILTIN_MINSD_ROUND,
28389 IX86_BUILTIN_MINSS_ROUND,
28390 IX86_BUILTIN_MOVAPD512,
28391 IX86_BUILTIN_MOVAPS512,
28392 IX86_BUILTIN_MOVDDUP512,
28393 IX86_BUILTIN_MOVDQA32LOAD512,
28394 IX86_BUILTIN_MOVDQA32STORE512,
28395 IX86_BUILTIN_MOVDQA32_512,
28396 IX86_BUILTIN_MOVDQA64LOAD512,
28397 IX86_BUILTIN_MOVDQA64STORE512,
28398 IX86_BUILTIN_MOVDQA64_512,
28399 IX86_BUILTIN_MOVNTDQ512,
28400 IX86_BUILTIN_MOVNTDQA512,
28401 IX86_BUILTIN_MOVNTPD512,
28402 IX86_BUILTIN_MOVNTPS512,
28403 IX86_BUILTIN_MOVSHDUP512,
28404 IX86_BUILTIN_MOVSLDUP512,
28405 IX86_BUILTIN_MULPD512,
28406 IX86_BUILTIN_MULPS512,
28407 IX86_BUILTIN_MULSD_ROUND,
28408 IX86_BUILTIN_MULSS_ROUND,
28409 IX86_BUILTIN_PABSD512,
28410 IX86_BUILTIN_PABSQ512,
28411 IX86_BUILTIN_PADDD512,
28412 IX86_BUILTIN_PADDQ512,
28413 IX86_BUILTIN_PANDD512,
28414 IX86_BUILTIN_PANDND512,
28415 IX86_BUILTIN_PANDNQ512,
28416 IX86_BUILTIN_PANDQ512,
28417 IX86_BUILTIN_PBROADCASTD512,
28418 IX86_BUILTIN_PBROADCASTD512_GPR,
28419 IX86_BUILTIN_PBROADCASTMB512,
28420 IX86_BUILTIN_PBROADCASTMW512,
28421 IX86_BUILTIN_PBROADCASTQ512,
28422 IX86_BUILTIN_PBROADCASTQ512_GPR,
28423 IX86_BUILTIN_PBROADCASTQ512_MEM,
28424 IX86_BUILTIN_PCMPEQD512_MASK,
28425 IX86_BUILTIN_PCMPEQQ512_MASK,
28426 IX86_BUILTIN_PCMPGTD512_MASK,
28427 IX86_BUILTIN_PCMPGTQ512_MASK,
28428 IX86_BUILTIN_PCOMPRESSD512,
28429 IX86_BUILTIN_PCOMPRESSDSTORE512,
28430 IX86_BUILTIN_PCOMPRESSQ512,
28431 IX86_BUILTIN_PCOMPRESSQSTORE512,
28432 IX86_BUILTIN_PEXPANDD512,
28433 IX86_BUILTIN_PEXPANDD512Z,
28434 IX86_BUILTIN_PEXPANDDLOAD512,
28435 IX86_BUILTIN_PEXPANDDLOAD512Z,
28436 IX86_BUILTIN_PEXPANDQ512,
28437 IX86_BUILTIN_PEXPANDQ512Z,
28438 IX86_BUILTIN_PEXPANDQLOAD512,
28439 IX86_BUILTIN_PEXPANDQLOAD512Z,
28440 IX86_BUILTIN_PMAXSD512,
28441 IX86_BUILTIN_PMAXSQ512,
28442 IX86_BUILTIN_PMAXUD512,
28443 IX86_BUILTIN_PMAXUQ512,
28444 IX86_BUILTIN_PMINSD512,
28445 IX86_BUILTIN_PMINSQ512,
28446 IX86_BUILTIN_PMINUD512,
28447 IX86_BUILTIN_PMINUQ512,
28448 IX86_BUILTIN_PMOVDB512,
28449 IX86_BUILTIN_PMOVDB512_MEM,
28450 IX86_BUILTIN_PMOVDW512,
28451 IX86_BUILTIN_PMOVDW512_MEM,
28452 IX86_BUILTIN_PMOVQB512,
28453 IX86_BUILTIN_PMOVQB512_MEM,
28454 IX86_BUILTIN_PMOVQD512,
28455 IX86_BUILTIN_PMOVQD512_MEM,
28456 IX86_BUILTIN_PMOVQW512,
28457 IX86_BUILTIN_PMOVQW512_MEM,
28458 IX86_BUILTIN_PMOVSDB512,
28459 IX86_BUILTIN_PMOVSDB512_MEM,
28460 IX86_BUILTIN_PMOVSDW512,
28461 IX86_BUILTIN_PMOVSDW512_MEM,
28462 IX86_BUILTIN_PMOVSQB512,
28463 IX86_BUILTIN_PMOVSQB512_MEM,
28464 IX86_BUILTIN_PMOVSQD512,
28465 IX86_BUILTIN_PMOVSQD512_MEM,
28466 IX86_BUILTIN_PMOVSQW512,
28467 IX86_BUILTIN_PMOVSQW512_MEM,
28468 IX86_BUILTIN_PMOVSXBD512,
28469 IX86_BUILTIN_PMOVSXBQ512,
28470 IX86_BUILTIN_PMOVSXDQ512,
28471 IX86_BUILTIN_PMOVSXWD512,
28472 IX86_BUILTIN_PMOVSXWQ512,
28473 IX86_BUILTIN_PMOVUSDB512,
28474 IX86_BUILTIN_PMOVUSDB512_MEM,
28475 IX86_BUILTIN_PMOVUSDW512,
28476 IX86_BUILTIN_PMOVUSDW512_MEM,
28477 IX86_BUILTIN_PMOVUSQB512,
28478 IX86_BUILTIN_PMOVUSQB512_MEM,
28479 IX86_BUILTIN_PMOVUSQD512,
28480 IX86_BUILTIN_PMOVUSQD512_MEM,
28481 IX86_BUILTIN_PMOVUSQW512,
28482 IX86_BUILTIN_PMOVUSQW512_MEM,
28483 IX86_BUILTIN_PMOVZXBD512,
28484 IX86_BUILTIN_PMOVZXBQ512,
28485 IX86_BUILTIN_PMOVZXDQ512,
28486 IX86_BUILTIN_PMOVZXWD512,
28487 IX86_BUILTIN_PMOVZXWQ512,
28488 IX86_BUILTIN_PMULDQ512,
28489 IX86_BUILTIN_PMULLD512,
28490 IX86_BUILTIN_PMULUDQ512,
28491 IX86_BUILTIN_PORD512,
28492 IX86_BUILTIN_PORQ512,
28493 IX86_BUILTIN_PROLD512,
28494 IX86_BUILTIN_PROLQ512,
28495 IX86_BUILTIN_PROLVD512,
28496 IX86_BUILTIN_PROLVQ512,
28497 IX86_BUILTIN_PRORD512,
28498 IX86_BUILTIN_PRORQ512,
28499 IX86_BUILTIN_PRORVD512,
28500 IX86_BUILTIN_PRORVQ512,
28501 IX86_BUILTIN_PSHUFD512,
28502 IX86_BUILTIN_PSLLD512,
28503 IX86_BUILTIN_PSLLDI512,
28504 IX86_BUILTIN_PSLLQ512,
28505 IX86_BUILTIN_PSLLQI512,
28506 IX86_BUILTIN_PSLLVV16SI,
28507 IX86_BUILTIN_PSLLVV8DI,
28508 IX86_BUILTIN_PSRAD512,
28509 IX86_BUILTIN_PSRADI512,
28510 IX86_BUILTIN_PSRAQ512,
28511 IX86_BUILTIN_PSRAQI512,
28512 IX86_BUILTIN_PSRAVV16SI,
28513 IX86_BUILTIN_PSRAVV8DI,
28514 IX86_BUILTIN_PSRLD512,
28515 IX86_BUILTIN_PSRLDI512,
28516 IX86_BUILTIN_PSRLQ512,
28517 IX86_BUILTIN_PSRLQI512,
28518 IX86_BUILTIN_PSRLVV16SI,
28519 IX86_BUILTIN_PSRLVV8DI,
28520 IX86_BUILTIN_PSUBD512,
28521 IX86_BUILTIN_PSUBQ512,
28522 IX86_BUILTIN_PTESTMD512,
28523 IX86_BUILTIN_PTESTMQ512,
28524 IX86_BUILTIN_PTESTNMD512,
28525 IX86_BUILTIN_PTESTNMQ512,
28526 IX86_BUILTIN_PUNPCKHDQ512,
28527 IX86_BUILTIN_PUNPCKHQDQ512,
28528 IX86_BUILTIN_PUNPCKLDQ512,
28529 IX86_BUILTIN_PUNPCKLQDQ512,
28530 IX86_BUILTIN_PXORD512,
28531 IX86_BUILTIN_PXORQ512,
28532 IX86_BUILTIN_RCP14PD512,
28533 IX86_BUILTIN_RCP14PS512,
28534 IX86_BUILTIN_RCP14SD,
28535 IX86_BUILTIN_RCP14SS,
28536 IX86_BUILTIN_RNDSCALEPD,
28537 IX86_BUILTIN_RNDSCALEPS,
28538 IX86_BUILTIN_RNDSCALESD,
28539 IX86_BUILTIN_RNDSCALESS,
28540 IX86_BUILTIN_RSQRT14PD512,
28541 IX86_BUILTIN_RSQRT14PS512,
28542 IX86_BUILTIN_RSQRT14SD,
28543 IX86_BUILTIN_RSQRT14SS,
28544 IX86_BUILTIN_SCALEFPD512,
28545 IX86_BUILTIN_SCALEFPS512,
28546 IX86_BUILTIN_SCALEFSD,
28547 IX86_BUILTIN_SCALEFSS,
28548 IX86_BUILTIN_SHUFPD512,
28549 IX86_BUILTIN_SHUFPS512,
28550 IX86_BUILTIN_SHUF_F32x4,
28551 IX86_BUILTIN_SHUF_F64x2,
28552 IX86_BUILTIN_SHUF_I32x4,
28553 IX86_BUILTIN_SHUF_I64x2,
28554 IX86_BUILTIN_SQRTPD512,
28555 IX86_BUILTIN_SQRTPD512_MASK,
28556 IX86_BUILTIN_SQRTPS512_MASK,
28557 IX86_BUILTIN_SQRTPS_NR512,
28558 IX86_BUILTIN_SQRTSD_ROUND,
28559 IX86_BUILTIN_SQRTSS_ROUND,
28560 IX86_BUILTIN_STOREAPD512,
28561 IX86_BUILTIN_STOREAPS512,
28562 IX86_BUILTIN_STOREDQUDI512,
28563 IX86_BUILTIN_STOREDQUSI512,
28564 IX86_BUILTIN_STOREUPD512,
28565 IX86_BUILTIN_STOREUPS512,
28566 IX86_BUILTIN_SUBPD512,
28567 IX86_BUILTIN_SUBPS512,
28568 IX86_BUILTIN_SUBSD_ROUND,
28569 IX86_BUILTIN_SUBSS_ROUND,
28570 IX86_BUILTIN_UCMPD512,
28571 IX86_BUILTIN_UCMPQ512,
28572 IX86_BUILTIN_UNPCKHPD512,
28573 IX86_BUILTIN_UNPCKHPS512,
28574 IX86_BUILTIN_UNPCKLPD512,
28575 IX86_BUILTIN_UNPCKLPS512,
28576 IX86_BUILTIN_VCVTSD2SI32,
28577 IX86_BUILTIN_VCVTSD2SI64,
28578 IX86_BUILTIN_VCVTSD2USI32,
28579 IX86_BUILTIN_VCVTSD2USI64,
28580 IX86_BUILTIN_VCVTSS2SI32,
28581 IX86_BUILTIN_VCVTSS2SI64,
28582 IX86_BUILTIN_VCVTSS2USI32,
28583 IX86_BUILTIN_VCVTSS2USI64,
28584 IX86_BUILTIN_VCVTTSD2SI32,
28585 IX86_BUILTIN_VCVTTSD2SI64,
28586 IX86_BUILTIN_VCVTTSD2USI32,
28587 IX86_BUILTIN_VCVTTSD2USI64,
28588 IX86_BUILTIN_VCVTTSS2SI32,
28589 IX86_BUILTIN_VCVTTSS2SI64,
28590 IX86_BUILTIN_VCVTTSS2USI32,
28591 IX86_BUILTIN_VCVTTSS2USI64,
28592 IX86_BUILTIN_VFMADDPD512_MASK,
28593 IX86_BUILTIN_VFMADDPD512_MASK3,
28594 IX86_BUILTIN_VFMADDPD512_MASKZ,
28595 IX86_BUILTIN_VFMADDPS512_MASK,
28596 IX86_BUILTIN_VFMADDPS512_MASK3,
28597 IX86_BUILTIN_VFMADDPS512_MASKZ,
28598 IX86_BUILTIN_VFMADDSD3_ROUND,
28599 IX86_BUILTIN_VFMADDSS3_ROUND,
28600 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28601 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28602 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28603 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28604 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28605 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28606 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28607 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28608 IX86_BUILTIN_VFMSUBPD512_MASK3,
28609 IX86_BUILTIN_VFMSUBPS512_MASK3,
28610 IX86_BUILTIN_VFMSUBSD3_MASK3,
28611 IX86_BUILTIN_VFMSUBSS3_MASK3,
28612 IX86_BUILTIN_VFNMADDPD512_MASK,
28613 IX86_BUILTIN_VFNMADDPS512_MASK,
28614 IX86_BUILTIN_VFNMSUBPD512_MASK,
28615 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28616 IX86_BUILTIN_VFNMSUBPS512_MASK,
28617 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28618 IX86_BUILTIN_VPCLZCNTD512,
28619 IX86_BUILTIN_VPCLZCNTQ512,
28620 IX86_BUILTIN_VPCONFLICTD512,
28621 IX86_BUILTIN_VPCONFLICTQ512,
28622 IX86_BUILTIN_VPERMDF512,
28623 IX86_BUILTIN_VPERMDI512,
28624 IX86_BUILTIN_VPERMI2VARD512,
28625 IX86_BUILTIN_VPERMI2VARPD512,
28626 IX86_BUILTIN_VPERMI2VARPS512,
28627 IX86_BUILTIN_VPERMI2VARQ512,
28628 IX86_BUILTIN_VPERMILPD512,
28629 IX86_BUILTIN_VPERMILPS512,
28630 IX86_BUILTIN_VPERMILVARPD512,
28631 IX86_BUILTIN_VPERMILVARPS512,
28632 IX86_BUILTIN_VPERMT2VARD512,
28633 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28634 IX86_BUILTIN_VPERMT2VARPD512,
28635 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28636 IX86_BUILTIN_VPERMT2VARPS512,
28637 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28638 IX86_BUILTIN_VPERMT2VARQ512,
28639 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28640 IX86_BUILTIN_VPERMVARDF512,
28641 IX86_BUILTIN_VPERMVARDI512,
28642 IX86_BUILTIN_VPERMVARSF512,
28643 IX86_BUILTIN_VPERMVARSI512,
28644 IX86_BUILTIN_VTERNLOGD512_MASK,
28645 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28646 IX86_BUILTIN_VTERNLOGQ512_MASK,
28647 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28648
28649 /* Mask arithmetic operations */
28650 IX86_BUILTIN_KAND16,
28651 IX86_BUILTIN_KANDN16,
28652 IX86_BUILTIN_KNOT16,
28653 IX86_BUILTIN_KOR16,
28654 IX86_BUILTIN_KORTESTC16,
28655 IX86_BUILTIN_KORTESTZ16,
28656 IX86_BUILTIN_KUNPCKBW,
28657 IX86_BUILTIN_KXNOR16,
28658 IX86_BUILTIN_KXOR16,
28659 IX86_BUILTIN_KMOV16,
28660
28661 /* AVX512VL. */
28662 IX86_BUILTIN_PMOVUSQD256_MEM,
28663 IX86_BUILTIN_PMOVUSQD128_MEM,
28664 IX86_BUILTIN_PMOVSQD256_MEM,
28665 IX86_BUILTIN_PMOVSQD128_MEM,
28666 IX86_BUILTIN_PMOVQD256_MEM,
28667 IX86_BUILTIN_PMOVQD128_MEM,
28668 IX86_BUILTIN_PMOVUSQW256_MEM,
28669 IX86_BUILTIN_PMOVUSQW128_MEM,
28670 IX86_BUILTIN_PMOVSQW256_MEM,
28671 IX86_BUILTIN_PMOVSQW128_MEM,
28672 IX86_BUILTIN_PMOVQW256_MEM,
28673 IX86_BUILTIN_PMOVQW128_MEM,
28674 IX86_BUILTIN_PMOVUSQB256_MEM,
28675 IX86_BUILTIN_PMOVUSQB128_MEM,
28676 IX86_BUILTIN_PMOVSQB256_MEM,
28677 IX86_BUILTIN_PMOVSQB128_MEM,
28678 IX86_BUILTIN_PMOVQB256_MEM,
28679 IX86_BUILTIN_PMOVQB128_MEM,
28680 IX86_BUILTIN_PMOVUSDW256_MEM,
28681 IX86_BUILTIN_PMOVUSDW128_MEM,
28682 IX86_BUILTIN_PMOVSDW256_MEM,
28683 IX86_BUILTIN_PMOVSDW128_MEM,
28684 IX86_BUILTIN_PMOVDW256_MEM,
28685 IX86_BUILTIN_PMOVDW128_MEM,
28686 IX86_BUILTIN_PMOVUSDB256_MEM,
28687 IX86_BUILTIN_PMOVUSDB128_MEM,
28688 IX86_BUILTIN_PMOVSDB256_MEM,
28689 IX86_BUILTIN_PMOVSDB128_MEM,
28690 IX86_BUILTIN_PMOVDB256_MEM,
28691 IX86_BUILTIN_PMOVDB128_MEM,
28692 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
28693 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
28694 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
28695 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
28696 IX86_BUILTIN_MOVDQA64STORE256_MASK,
28697 IX86_BUILTIN_MOVDQA64STORE128_MASK,
28698 IX86_BUILTIN_MOVDQA32STORE256_MASK,
28699 IX86_BUILTIN_MOVDQA32STORE128_MASK,
28700 IX86_BUILTIN_LOADAPD256_MASK,
28701 IX86_BUILTIN_LOADAPD128_MASK,
28702 IX86_BUILTIN_LOADAPS256_MASK,
28703 IX86_BUILTIN_LOADAPS128_MASK,
28704 IX86_BUILTIN_STOREAPD256_MASK,
28705 IX86_BUILTIN_STOREAPD128_MASK,
28706 IX86_BUILTIN_STOREAPS256_MASK,
28707 IX86_BUILTIN_STOREAPS128_MASK,
28708 IX86_BUILTIN_LOADUPD256_MASK,
28709 IX86_BUILTIN_LOADUPD128_MASK,
28710 IX86_BUILTIN_LOADUPS256_MASK,
28711 IX86_BUILTIN_LOADUPS128_MASK,
28712 IX86_BUILTIN_STOREUPD256_MASK,
28713 IX86_BUILTIN_STOREUPD128_MASK,
28714 IX86_BUILTIN_STOREUPS256_MASK,
28715 IX86_BUILTIN_STOREUPS128_MASK,
28716 IX86_BUILTIN_LOADDQUDI256_MASK,
28717 IX86_BUILTIN_LOADDQUDI128_MASK,
28718 IX86_BUILTIN_LOADDQUSI256_MASK,
28719 IX86_BUILTIN_LOADDQUSI128_MASK,
28720 IX86_BUILTIN_LOADDQUHI256_MASK,
28721 IX86_BUILTIN_LOADDQUHI128_MASK,
28722 IX86_BUILTIN_LOADDQUQI256_MASK,
28723 IX86_BUILTIN_LOADDQUQI128_MASK,
28724 IX86_BUILTIN_STOREDQUDI256_MASK,
28725 IX86_BUILTIN_STOREDQUDI128_MASK,
28726 IX86_BUILTIN_STOREDQUSI256_MASK,
28727 IX86_BUILTIN_STOREDQUSI128_MASK,
28728 IX86_BUILTIN_STOREDQUHI256_MASK,
28729 IX86_BUILTIN_STOREDQUHI128_MASK,
28730 IX86_BUILTIN_STOREDQUQI256_MASK,
28731 IX86_BUILTIN_STOREDQUQI128_MASK,
28732 IX86_BUILTIN_COMPRESSPDSTORE256,
28733 IX86_BUILTIN_COMPRESSPDSTORE128,
28734 IX86_BUILTIN_COMPRESSPSSTORE256,
28735 IX86_BUILTIN_COMPRESSPSSTORE128,
28736 IX86_BUILTIN_PCOMPRESSQSTORE256,
28737 IX86_BUILTIN_PCOMPRESSQSTORE128,
28738 IX86_BUILTIN_PCOMPRESSDSTORE256,
28739 IX86_BUILTIN_PCOMPRESSDSTORE128,
28740 IX86_BUILTIN_EXPANDPDLOAD256,
28741 IX86_BUILTIN_EXPANDPDLOAD128,
28742 IX86_BUILTIN_EXPANDPSLOAD256,
28743 IX86_BUILTIN_EXPANDPSLOAD128,
28744 IX86_BUILTIN_PEXPANDQLOAD256,
28745 IX86_BUILTIN_PEXPANDQLOAD128,
28746 IX86_BUILTIN_PEXPANDDLOAD256,
28747 IX86_BUILTIN_PEXPANDDLOAD128,
28748 IX86_BUILTIN_EXPANDPDLOAD256Z,
28749 IX86_BUILTIN_EXPANDPDLOAD128Z,
28750 IX86_BUILTIN_EXPANDPSLOAD256Z,
28751 IX86_BUILTIN_EXPANDPSLOAD128Z,
28752 IX86_BUILTIN_PEXPANDQLOAD256Z,
28753 IX86_BUILTIN_PEXPANDQLOAD128Z,
28754 IX86_BUILTIN_PEXPANDDLOAD256Z,
28755 IX86_BUILTIN_PEXPANDDLOAD128Z,
28756 IX86_BUILTIN_PALIGNR256_MASK,
28757 IX86_BUILTIN_PALIGNR128_MASK,
28758 IX86_BUILTIN_MOVDQA64_256_MASK,
28759 IX86_BUILTIN_MOVDQA64_128_MASK,
28760 IX86_BUILTIN_MOVDQA32_256_MASK,
28761 IX86_BUILTIN_MOVDQA32_128_MASK,
28762 IX86_BUILTIN_MOVAPD256_MASK,
28763 IX86_BUILTIN_MOVAPD128_MASK,
28764 IX86_BUILTIN_MOVAPS256_MASK,
28765 IX86_BUILTIN_MOVAPS128_MASK,
28766 IX86_BUILTIN_MOVDQUHI256_MASK,
28767 IX86_BUILTIN_MOVDQUHI128_MASK,
28768 IX86_BUILTIN_MOVDQUQI256_MASK,
28769 IX86_BUILTIN_MOVDQUQI128_MASK,
28770 IX86_BUILTIN_MINPS128_MASK,
28771 IX86_BUILTIN_MAXPS128_MASK,
28772 IX86_BUILTIN_MINPD128_MASK,
28773 IX86_BUILTIN_MAXPD128_MASK,
28774 IX86_BUILTIN_MAXPD256_MASK,
28775 IX86_BUILTIN_MAXPS256_MASK,
28776 IX86_BUILTIN_MINPD256_MASK,
28777 IX86_BUILTIN_MINPS256_MASK,
28778 IX86_BUILTIN_MULPS128_MASK,
28779 IX86_BUILTIN_DIVPS128_MASK,
28780 IX86_BUILTIN_MULPD128_MASK,
28781 IX86_BUILTIN_DIVPD128_MASK,
28782 IX86_BUILTIN_DIVPD256_MASK,
28783 IX86_BUILTIN_DIVPS256_MASK,
28784 IX86_BUILTIN_MULPD256_MASK,
28785 IX86_BUILTIN_MULPS256_MASK,
28786 IX86_BUILTIN_ADDPD128_MASK,
28787 IX86_BUILTIN_ADDPD256_MASK,
28788 IX86_BUILTIN_ADDPS128_MASK,
28789 IX86_BUILTIN_ADDPS256_MASK,
28790 IX86_BUILTIN_SUBPD128_MASK,
28791 IX86_BUILTIN_SUBPD256_MASK,
28792 IX86_BUILTIN_SUBPS128_MASK,
28793 IX86_BUILTIN_SUBPS256_MASK,
28794 IX86_BUILTIN_XORPD256_MASK,
28795 IX86_BUILTIN_XORPD128_MASK,
28796 IX86_BUILTIN_XORPS256_MASK,
28797 IX86_BUILTIN_XORPS128_MASK,
28798 IX86_BUILTIN_ORPD256_MASK,
28799 IX86_BUILTIN_ORPD128_MASK,
28800 IX86_BUILTIN_ORPS256_MASK,
28801 IX86_BUILTIN_ORPS128_MASK,
28802 IX86_BUILTIN_BROADCASTF32x2_256,
28803 IX86_BUILTIN_BROADCASTI32x2_256,
28804 IX86_BUILTIN_BROADCASTI32x2_128,
28805 IX86_BUILTIN_BROADCASTF64X2_256,
28806 IX86_BUILTIN_BROADCASTI64X2_256,
28807 IX86_BUILTIN_BROADCASTF32X4_256,
28808 IX86_BUILTIN_BROADCASTI32X4_256,
28809 IX86_BUILTIN_EXTRACTF32X4_256,
28810 IX86_BUILTIN_EXTRACTI32X4_256,
28811 IX86_BUILTIN_DBPSADBW256,
28812 IX86_BUILTIN_DBPSADBW128,
28813 IX86_BUILTIN_CVTTPD2QQ256,
28814 IX86_BUILTIN_CVTTPD2QQ128,
28815 IX86_BUILTIN_CVTTPD2UQQ256,
28816 IX86_BUILTIN_CVTTPD2UQQ128,
28817 IX86_BUILTIN_CVTPD2QQ256,
28818 IX86_BUILTIN_CVTPD2QQ128,
28819 IX86_BUILTIN_CVTPD2UQQ256,
28820 IX86_BUILTIN_CVTPD2UQQ128,
28821 IX86_BUILTIN_CVTPD2UDQ256_MASK,
28822 IX86_BUILTIN_CVTPD2UDQ128_MASK,
28823 IX86_BUILTIN_CVTTPS2QQ256,
28824 IX86_BUILTIN_CVTTPS2QQ128,
28825 IX86_BUILTIN_CVTTPS2UQQ256,
28826 IX86_BUILTIN_CVTTPS2UQQ128,
28827 IX86_BUILTIN_CVTTPS2DQ256_MASK,
28828 IX86_BUILTIN_CVTTPS2DQ128_MASK,
28829 IX86_BUILTIN_CVTTPS2UDQ256,
28830 IX86_BUILTIN_CVTTPS2UDQ128,
28831 IX86_BUILTIN_CVTTPD2DQ256_MASK,
28832 IX86_BUILTIN_CVTTPD2DQ128_MASK,
28833 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
28834 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
28835 IX86_BUILTIN_CVTPD2DQ256_MASK,
28836 IX86_BUILTIN_CVTPD2DQ128_MASK,
28837 IX86_BUILTIN_CVTDQ2PD256_MASK,
28838 IX86_BUILTIN_CVTDQ2PD128_MASK,
28839 IX86_BUILTIN_CVTUDQ2PD256_MASK,
28840 IX86_BUILTIN_CVTUDQ2PD128_MASK,
28841 IX86_BUILTIN_CVTDQ2PS256_MASK,
28842 IX86_BUILTIN_CVTDQ2PS128_MASK,
28843 IX86_BUILTIN_CVTUDQ2PS256_MASK,
28844 IX86_BUILTIN_CVTUDQ2PS128_MASK,
28845 IX86_BUILTIN_CVTPS2PD256_MASK,
28846 IX86_BUILTIN_CVTPS2PD128_MASK,
28847 IX86_BUILTIN_PBROADCASTB256_MASK,
28848 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
28849 IX86_BUILTIN_PBROADCASTB128_MASK,
28850 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
28851 IX86_BUILTIN_PBROADCASTW256_MASK,
28852 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
28853 IX86_BUILTIN_PBROADCASTW128_MASK,
28854 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
28855 IX86_BUILTIN_PBROADCASTD256_MASK,
28856 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
28857 IX86_BUILTIN_PBROADCASTD128_MASK,
28858 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
28859 IX86_BUILTIN_PBROADCASTQ256_MASK,
28860 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
28861 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
28862 IX86_BUILTIN_PBROADCASTQ128_MASK,
28863 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
28864 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
28865 IX86_BUILTIN_BROADCASTSS256,
28866 IX86_BUILTIN_BROADCASTSS128,
28867 IX86_BUILTIN_BROADCASTSD256,
28868 IX86_BUILTIN_EXTRACTF64X2_256,
28869 IX86_BUILTIN_EXTRACTI64X2_256,
28870 IX86_BUILTIN_INSERTF32X4_256,
28871 IX86_BUILTIN_INSERTI32X4_256,
28872 IX86_BUILTIN_PMOVSXBW256_MASK,
28873 IX86_BUILTIN_PMOVSXBW128_MASK,
28874 IX86_BUILTIN_PMOVSXBD256_MASK,
28875 IX86_BUILTIN_PMOVSXBD128_MASK,
28876 IX86_BUILTIN_PMOVSXBQ256_MASK,
28877 IX86_BUILTIN_PMOVSXBQ128_MASK,
28878 IX86_BUILTIN_PMOVSXWD256_MASK,
28879 IX86_BUILTIN_PMOVSXWD128_MASK,
28880 IX86_BUILTIN_PMOVSXWQ256_MASK,
28881 IX86_BUILTIN_PMOVSXWQ128_MASK,
28882 IX86_BUILTIN_PMOVSXDQ256_MASK,
28883 IX86_BUILTIN_PMOVSXDQ128_MASK,
28884 IX86_BUILTIN_PMOVZXBW256_MASK,
28885 IX86_BUILTIN_PMOVZXBW128_MASK,
28886 IX86_BUILTIN_PMOVZXBD256_MASK,
28887 IX86_BUILTIN_PMOVZXBD128_MASK,
28888 IX86_BUILTIN_PMOVZXBQ256_MASK,
28889 IX86_BUILTIN_PMOVZXBQ128_MASK,
28890 IX86_BUILTIN_PMOVZXWD256_MASK,
28891 IX86_BUILTIN_PMOVZXWD128_MASK,
28892 IX86_BUILTIN_PMOVZXWQ256_MASK,
28893 IX86_BUILTIN_PMOVZXWQ128_MASK,
28894 IX86_BUILTIN_PMOVZXDQ256_MASK,
28895 IX86_BUILTIN_PMOVZXDQ128_MASK,
28896 IX86_BUILTIN_REDUCEPD256_MASK,
28897 IX86_BUILTIN_REDUCEPD128_MASK,
28898 IX86_BUILTIN_REDUCEPS256_MASK,
28899 IX86_BUILTIN_REDUCEPS128_MASK,
28900 IX86_BUILTIN_REDUCESD_MASK,
28901 IX86_BUILTIN_REDUCESS_MASK,
28902 IX86_BUILTIN_VPERMVARHI256_MASK,
28903 IX86_BUILTIN_VPERMVARHI128_MASK,
28904 IX86_BUILTIN_VPERMT2VARHI256,
28905 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
28906 IX86_BUILTIN_VPERMT2VARHI128,
28907 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
28908 IX86_BUILTIN_VPERMI2VARHI256,
28909 IX86_BUILTIN_VPERMI2VARHI128,
28910 IX86_BUILTIN_RCP14PD256,
28911 IX86_BUILTIN_RCP14PD128,
28912 IX86_BUILTIN_RCP14PS256,
28913 IX86_BUILTIN_RCP14PS128,
28914 IX86_BUILTIN_RSQRT14PD256_MASK,
28915 IX86_BUILTIN_RSQRT14PD128_MASK,
28916 IX86_BUILTIN_RSQRT14PS256_MASK,
28917 IX86_BUILTIN_RSQRT14PS128_MASK,
28918 IX86_BUILTIN_SQRTPD256_MASK,
28919 IX86_BUILTIN_SQRTPD128_MASK,
28920 IX86_BUILTIN_SQRTPS256_MASK,
28921 IX86_BUILTIN_SQRTPS128_MASK,
28922 IX86_BUILTIN_PADDB128_MASK,
28923 IX86_BUILTIN_PADDW128_MASK,
28924 IX86_BUILTIN_PADDD128_MASK,
28925 IX86_BUILTIN_PADDQ128_MASK,
28926 IX86_BUILTIN_PSUBB128_MASK,
28927 IX86_BUILTIN_PSUBW128_MASK,
28928 IX86_BUILTIN_PSUBD128_MASK,
28929 IX86_BUILTIN_PSUBQ128_MASK,
28930 IX86_BUILTIN_PADDSB128_MASK,
28931 IX86_BUILTIN_PADDSW128_MASK,
28932 IX86_BUILTIN_PSUBSB128_MASK,
28933 IX86_BUILTIN_PSUBSW128_MASK,
28934 IX86_BUILTIN_PADDUSB128_MASK,
28935 IX86_BUILTIN_PADDUSW128_MASK,
28936 IX86_BUILTIN_PSUBUSB128_MASK,
28937 IX86_BUILTIN_PSUBUSW128_MASK,
28938 IX86_BUILTIN_PADDB256_MASK,
28939 IX86_BUILTIN_PADDW256_MASK,
28940 IX86_BUILTIN_PADDD256_MASK,
28941 IX86_BUILTIN_PADDQ256_MASK,
28942 IX86_BUILTIN_PADDSB256_MASK,
28943 IX86_BUILTIN_PADDSW256_MASK,
28944 IX86_BUILTIN_PADDUSB256_MASK,
28945 IX86_BUILTIN_PADDUSW256_MASK,
28946 IX86_BUILTIN_PSUBB256_MASK,
28947 IX86_BUILTIN_PSUBW256_MASK,
28948 IX86_BUILTIN_PSUBD256_MASK,
28949 IX86_BUILTIN_PSUBQ256_MASK,
28950 IX86_BUILTIN_PSUBSB256_MASK,
28951 IX86_BUILTIN_PSUBSW256_MASK,
28952 IX86_BUILTIN_PSUBUSB256_MASK,
28953 IX86_BUILTIN_PSUBUSW256_MASK,
28954 IX86_BUILTIN_SHUF_F64x2_256,
28955 IX86_BUILTIN_SHUF_I64x2_256,
28956 IX86_BUILTIN_SHUF_I32x4_256,
28957 IX86_BUILTIN_SHUF_F32x4_256,
28958 IX86_BUILTIN_PMOVWB128,
28959 IX86_BUILTIN_PMOVWB256,
28960 IX86_BUILTIN_PMOVSWB128,
28961 IX86_BUILTIN_PMOVSWB256,
28962 IX86_BUILTIN_PMOVUSWB128,
28963 IX86_BUILTIN_PMOVUSWB256,
28964 IX86_BUILTIN_PMOVDB128,
28965 IX86_BUILTIN_PMOVDB256,
28966 IX86_BUILTIN_PMOVSDB128,
28967 IX86_BUILTIN_PMOVSDB256,
28968 IX86_BUILTIN_PMOVUSDB128,
28969 IX86_BUILTIN_PMOVUSDB256,
28970 IX86_BUILTIN_PMOVDW128,
28971 IX86_BUILTIN_PMOVDW256,
28972 IX86_BUILTIN_PMOVSDW128,
28973 IX86_BUILTIN_PMOVSDW256,
28974 IX86_BUILTIN_PMOVUSDW128,
28975 IX86_BUILTIN_PMOVUSDW256,
28976 IX86_BUILTIN_PMOVQB128,
28977 IX86_BUILTIN_PMOVQB256,
28978 IX86_BUILTIN_PMOVSQB128,
28979 IX86_BUILTIN_PMOVSQB256,
28980 IX86_BUILTIN_PMOVUSQB128,
28981 IX86_BUILTIN_PMOVUSQB256,
28982 IX86_BUILTIN_PMOVQW128,
28983 IX86_BUILTIN_PMOVQW256,
28984 IX86_BUILTIN_PMOVSQW128,
28985 IX86_BUILTIN_PMOVSQW256,
28986 IX86_BUILTIN_PMOVUSQW128,
28987 IX86_BUILTIN_PMOVUSQW256,
28988 IX86_BUILTIN_PMOVQD128,
28989 IX86_BUILTIN_PMOVQD256,
28990 IX86_BUILTIN_PMOVSQD128,
28991 IX86_BUILTIN_PMOVSQD256,
28992 IX86_BUILTIN_PMOVUSQD128,
28993 IX86_BUILTIN_PMOVUSQD256,
28994 IX86_BUILTIN_RANGEPD256,
28995 IX86_BUILTIN_RANGEPD128,
28996 IX86_BUILTIN_RANGEPS256,
28997 IX86_BUILTIN_RANGEPS128,
28998 IX86_BUILTIN_GETEXPPS256,
28999 IX86_BUILTIN_GETEXPPD256,
29000 IX86_BUILTIN_GETEXPPS128,
29001 IX86_BUILTIN_GETEXPPD128,
29002 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29003 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29004 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29005 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29006 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29007 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29008 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29009 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29010 IX86_BUILTIN_PABSQ256,
29011 IX86_BUILTIN_PABSQ128,
29012 IX86_BUILTIN_PABSD256_MASK,
29013 IX86_BUILTIN_PABSD128_MASK,
29014 IX86_BUILTIN_PMULHRSW256_MASK,
29015 IX86_BUILTIN_PMULHRSW128_MASK,
29016 IX86_BUILTIN_PMULHUW128_MASK,
29017 IX86_BUILTIN_PMULHUW256_MASK,
29018 IX86_BUILTIN_PMULHW256_MASK,
29019 IX86_BUILTIN_PMULHW128_MASK,
29020 IX86_BUILTIN_PMULLW256_MASK,
29021 IX86_BUILTIN_PMULLW128_MASK,
29022 IX86_BUILTIN_PMULLQ256,
29023 IX86_BUILTIN_PMULLQ128,
29024 IX86_BUILTIN_ANDPD256_MASK,
29025 IX86_BUILTIN_ANDPD128_MASK,
29026 IX86_BUILTIN_ANDPS256_MASK,
29027 IX86_BUILTIN_ANDPS128_MASK,
29028 IX86_BUILTIN_ANDNPD256_MASK,
29029 IX86_BUILTIN_ANDNPD128_MASK,
29030 IX86_BUILTIN_ANDNPS256_MASK,
29031 IX86_BUILTIN_ANDNPS128_MASK,
29032 IX86_BUILTIN_PSLLWI128_MASK,
29033 IX86_BUILTIN_PSLLDI128_MASK,
29034 IX86_BUILTIN_PSLLQI128_MASK,
29035 IX86_BUILTIN_PSLLW128_MASK,
29036 IX86_BUILTIN_PSLLD128_MASK,
29037 IX86_BUILTIN_PSLLQ128_MASK,
29038 IX86_BUILTIN_PSLLWI256_MASK ,
29039 IX86_BUILTIN_PSLLW256_MASK,
29040 IX86_BUILTIN_PSLLDI256_MASK,
29041 IX86_BUILTIN_PSLLD256_MASK,
29042 IX86_BUILTIN_PSLLQI256_MASK,
29043 IX86_BUILTIN_PSLLQ256_MASK,
29044 IX86_BUILTIN_PSRADI128_MASK,
29045 IX86_BUILTIN_PSRAD128_MASK,
29046 IX86_BUILTIN_PSRADI256_MASK,
29047 IX86_BUILTIN_PSRAD256_MASK,
29048 IX86_BUILTIN_PSRAQI128_MASK,
29049 IX86_BUILTIN_PSRAQ128_MASK,
29050 IX86_BUILTIN_PSRAQI256_MASK,
29051 IX86_BUILTIN_PSRAQ256_MASK,
29052 IX86_BUILTIN_PANDD256,
29053 IX86_BUILTIN_PANDD128,
29054 IX86_BUILTIN_PSRLDI128_MASK,
29055 IX86_BUILTIN_PSRLD128_MASK,
29056 IX86_BUILTIN_PSRLDI256_MASK,
29057 IX86_BUILTIN_PSRLD256_MASK,
29058 IX86_BUILTIN_PSRLQI128_MASK,
29059 IX86_BUILTIN_PSRLQ128_MASK,
29060 IX86_BUILTIN_PSRLQI256_MASK,
29061 IX86_BUILTIN_PSRLQ256_MASK,
29062 IX86_BUILTIN_PANDQ256,
29063 IX86_BUILTIN_PANDQ128,
29064 IX86_BUILTIN_PANDND256,
29065 IX86_BUILTIN_PANDND128,
29066 IX86_BUILTIN_PANDNQ256,
29067 IX86_BUILTIN_PANDNQ128,
29068 IX86_BUILTIN_PORD256,
29069 IX86_BUILTIN_PORD128,
29070 IX86_BUILTIN_PORQ256,
29071 IX86_BUILTIN_PORQ128,
29072 IX86_BUILTIN_PXORD256,
29073 IX86_BUILTIN_PXORD128,
29074 IX86_BUILTIN_PXORQ256,
29075 IX86_BUILTIN_PXORQ128,
29076 IX86_BUILTIN_PACKSSWB256_MASK,
29077 IX86_BUILTIN_PACKSSWB128_MASK,
29078 IX86_BUILTIN_PACKUSWB256_MASK,
29079 IX86_BUILTIN_PACKUSWB128_MASK,
29080 IX86_BUILTIN_RNDSCALEPS256,
29081 IX86_BUILTIN_RNDSCALEPD256,
29082 IX86_BUILTIN_RNDSCALEPS128,
29083 IX86_BUILTIN_RNDSCALEPD128,
29084 IX86_BUILTIN_VTERNLOGQ256_MASK,
29085 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29086 IX86_BUILTIN_VTERNLOGD256_MASK,
29087 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29088 IX86_BUILTIN_VTERNLOGQ128_MASK,
29089 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29090 IX86_BUILTIN_VTERNLOGD128_MASK,
29091 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29092 IX86_BUILTIN_SCALEFPD256,
29093 IX86_BUILTIN_SCALEFPS256,
29094 IX86_BUILTIN_SCALEFPD128,
29095 IX86_BUILTIN_SCALEFPS128,
29096 IX86_BUILTIN_VFMADDPD256_MASK,
29097 IX86_BUILTIN_VFMADDPD256_MASK3,
29098 IX86_BUILTIN_VFMADDPD256_MASKZ,
29099 IX86_BUILTIN_VFMADDPD128_MASK,
29100 IX86_BUILTIN_VFMADDPD128_MASK3,
29101 IX86_BUILTIN_VFMADDPD128_MASKZ,
29102 IX86_BUILTIN_VFMADDPS256_MASK,
29103 IX86_BUILTIN_VFMADDPS256_MASK3,
29104 IX86_BUILTIN_VFMADDPS256_MASKZ,
29105 IX86_BUILTIN_VFMADDPS128_MASK,
29106 IX86_BUILTIN_VFMADDPS128_MASK3,
29107 IX86_BUILTIN_VFMADDPS128_MASKZ,
29108 IX86_BUILTIN_VFMSUBPD256_MASK3,
29109 IX86_BUILTIN_VFMSUBPD128_MASK3,
29110 IX86_BUILTIN_VFMSUBPS256_MASK3,
29111 IX86_BUILTIN_VFMSUBPS128_MASK3,
29112 IX86_BUILTIN_VFNMADDPD256_MASK,
29113 IX86_BUILTIN_VFNMADDPD128_MASK,
29114 IX86_BUILTIN_VFNMADDPS256_MASK,
29115 IX86_BUILTIN_VFNMADDPS128_MASK,
29116 IX86_BUILTIN_VFNMSUBPD256_MASK,
29117 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29118 IX86_BUILTIN_VFNMSUBPD128_MASK,
29119 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29120 IX86_BUILTIN_VFNMSUBPS256_MASK,
29121 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29122 IX86_BUILTIN_VFNMSUBPS128_MASK,
29123 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29124 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29125 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29126 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29127 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29128 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29129 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29130 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29131 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29132 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29133 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29134 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29135 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29136 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29137 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29138 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29139 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29140 IX86_BUILTIN_INSERTF64X2_256,
29141 IX86_BUILTIN_INSERTI64X2_256,
29142 IX86_BUILTIN_PSRAVV16HI,
29143 IX86_BUILTIN_PSRAVV8HI,
29144 IX86_BUILTIN_PMADDUBSW256_MASK,
29145 IX86_BUILTIN_PMADDUBSW128_MASK,
29146 IX86_BUILTIN_PMADDWD256_MASK,
29147 IX86_BUILTIN_PMADDWD128_MASK,
29148 IX86_BUILTIN_PSRLVV16HI,
29149 IX86_BUILTIN_PSRLVV8HI,
29150 IX86_BUILTIN_CVTPS2DQ256_MASK,
29151 IX86_BUILTIN_CVTPS2DQ128_MASK,
29152 IX86_BUILTIN_CVTPS2UDQ256,
29153 IX86_BUILTIN_CVTPS2UDQ128,
29154 IX86_BUILTIN_CVTPS2QQ256,
29155 IX86_BUILTIN_CVTPS2QQ128,
29156 IX86_BUILTIN_CVTPS2UQQ256,
29157 IX86_BUILTIN_CVTPS2UQQ128,
29158 IX86_BUILTIN_GETMANTPS256,
29159 IX86_BUILTIN_GETMANTPS128,
29160 IX86_BUILTIN_GETMANTPD256,
29161 IX86_BUILTIN_GETMANTPD128,
29162 IX86_BUILTIN_MOVDDUP256_MASK,
29163 IX86_BUILTIN_MOVDDUP128_MASK,
29164 IX86_BUILTIN_MOVSHDUP256_MASK,
29165 IX86_BUILTIN_MOVSHDUP128_MASK,
29166 IX86_BUILTIN_MOVSLDUP256_MASK,
29167 IX86_BUILTIN_MOVSLDUP128_MASK,
29168 IX86_BUILTIN_CVTQQ2PS256,
29169 IX86_BUILTIN_CVTQQ2PS128,
29170 IX86_BUILTIN_CVTUQQ2PS256,
29171 IX86_BUILTIN_CVTUQQ2PS128,
29172 IX86_BUILTIN_CVTQQ2PD256,
29173 IX86_BUILTIN_CVTQQ2PD128,
29174 IX86_BUILTIN_CVTUQQ2PD256,
29175 IX86_BUILTIN_CVTUQQ2PD128,
29176 IX86_BUILTIN_VPERMT2VARQ256,
29177 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29178 IX86_BUILTIN_VPERMT2VARD256,
29179 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29180 IX86_BUILTIN_VPERMI2VARQ256,
29181 IX86_BUILTIN_VPERMI2VARD256,
29182 IX86_BUILTIN_VPERMT2VARPD256,
29183 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29184 IX86_BUILTIN_VPERMT2VARPS256,
29185 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29186 IX86_BUILTIN_VPERMI2VARPD256,
29187 IX86_BUILTIN_VPERMI2VARPS256,
29188 IX86_BUILTIN_VPERMT2VARQ128,
29189 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29190 IX86_BUILTIN_VPERMT2VARD128,
29191 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29192 IX86_BUILTIN_VPERMI2VARQ128,
29193 IX86_BUILTIN_VPERMI2VARD128,
29194 IX86_BUILTIN_VPERMT2VARPD128,
29195 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29196 IX86_BUILTIN_VPERMT2VARPS128,
29197 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29198 IX86_BUILTIN_VPERMI2VARPD128,
29199 IX86_BUILTIN_VPERMI2VARPS128,
29200 IX86_BUILTIN_PSHUFB256_MASK,
29201 IX86_BUILTIN_PSHUFB128_MASK,
29202 IX86_BUILTIN_PSHUFHW256_MASK,
29203 IX86_BUILTIN_PSHUFHW128_MASK,
29204 IX86_BUILTIN_PSHUFLW256_MASK,
29205 IX86_BUILTIN_PSHUFLW128_MASK,
29206 IX86_BUILTIN_PSHUFD256_MASK,
29207 IX86_BUILTIN_PSHUFD128_MASK,
29208 IX86_BUILTIN_SHUFPD256_MASK,
29209 IX86_BUILTIN_SHUFPD128_MASK,
29210 IX86_BUILTIN_SHUFPS256_MASK,
29211 IX86_BUILTIN_SHUFPS128_MASK,
29212 IX86_BUILTIN_PROLVQ256,
29213 IX86_BUILTIN_PROLVQ128,
29214 IX86_BUILTIN_PROLQ256,
29215 IX86_BUILTIN_PROLQ128,
29216 IX86_BUILTIN_PRORVQ256,
29217 IX86_BUILTIN_PRORVQ128,
29218 IX86_BUILTIN_PRORQ256,
29219 IX86_BUILTIN_PRORQ128,
29220 IX86_BUILTIN_PSRAVQ128,
29221 IX86_BUILTIN_PSRAVQ256,
29222 IX86_BUILTIN_PSLLVV4DI_MASK,
29223 IX86_BUILTIN_PSLLVV2DI_MASK,
29224 IX86_BUILTIN_PSLLVV8SI_MASK,
29225 IX86_BUILTIN_PSLLVV4SI_MASK,
29226 IX86_BUILTIN_PSRAVV8SI_MASK,
29227 IX86_BUILTIN_PSRAVV4SI_MASK,
29228 IX86_BUILTIN_PSRLVV4DI_MASK,
29229 IX86_BUILTIN_PSRLVV2DI_MASK,
29230 IX86_BUILTIN_PSRLVV8SI_MASK,
29231 IX86_BUILTIN_PSRLVV4SI_MASK,
29232 IX86_BUILTIN_PSRAWI256_MASK,
29233 IX86_BUILTIN_PSRAW256_MASK,
29234 IX86_BUILTIN_PSRAWI128_MASK,
29235 IX86_BUILTIN_PSRAW128_MASK,
29236 IX86_BUILTIN_PSRLWI256_MASK,
29237 IX86_BUILTIN_PSRLW256_MASK,
29238 IX86_BUILTIN_PSRLWI128_MASK,
29239 IX86_BUILTIN_PSRLW128_MASK,
29240 IX86_BUILTIN_PRORVD256,
29241 IX86_BUILTIN_PROLVD256,
29242 IX86_BUILTIN_PRORD256,
29243 IX86_BUILTIN_PROLD256,
29244 IX86_BUILTIN_PRORVD128,
29245 IX86_BUILTIN_PROLVD128,
29246 IX86_BUILTIN_PRORD128,
29247 IX86_BUILTIN_PROLD128,
29248 IX86_BUILTIN_FPCLASSPD256,
29249 IX86_BUILTIN_FPCLASSPD128,
29250 IX86_BUILTIN_FPCLASSSD,
29251 IX86_BUILTIN_FPCLASSPS256,
29252 IX86_BUILTIN_FPCLASSPS128,
29253 IX86_BUILTIN_FPCLASSSS,
29254 IX86_BUILTIN_CVTB2MASK128,
29255 IX86_BUILTIN_CVTB2MASK256,
29256 IX86_BUILTIN_CVTW2MASK128,
29257 IX86_BUILTIN_CVTW2MASK256,
29258 IX86_BUILTIN_CVTD2MASK128,
29259 IX86_BUILTIN_CVTD2MASK256,
29260 IX86_BUILTIN_CVTQ2MASK128,
29261 IX86_BUILTIN_CVTQ2MASK256,
29262 IX86_BUILTIN_CVTMASK2B128,
29263 IX86_BUILTIN_CVTMASK2B256,
29264 IX86_BUILTIN_CVTMASK2W128,
29265 IX86_BUILTIN_CVTMASK2W256,
29266 IX86_BUILTIN_CVTMASK2D128,
29267 IX86_BUILTIN_CVTMASK2D256,
29268 IX86_BUILTIN_CVTMASK2Q128,
29269 IX86_BUILTIN_CVTMASK2Q256,
29270 IX86_BUILTIN_PCMPEQB128_MASK,
29271 IX86_BUILTIN_PCMPEQB256_MASK,
29272 IX86_BUILTIN_PCMPEQW128_MASK,
29273 IX86_BUILTIN_PCMPEQW256_MASK,
29274 IX86_BUILTIN_PCMPEQD128_MASK,
29275 IX86_BUILTIN_PCMPEQD256_MASK,
29276 IX86_BUILTIN_PCMPEQQ128_MASK,
29277 IX86_BUILTIN_PCMPEQQ256_MASK,
29278 IX86_BUILTIN_PCMPGTB128_MASK,
29279 IX86_BUILTIN_PCMPGTB256_MASK,
29280 IX86_BUILTIN_PCMPGTW128_MASK,
29281 IX86_BUILTIN_PCMPGTW256_MASK,
29282 IX86_BUILTIN_PCMPGTD128_MASK,
29283 IX86_BUILTIN_PCMPGTD256_MASK,
29284 IX86_BUILTIN_PCMPGTQ128_MASK,
29285 IX86_BUILTIN_PCMPGTQ256_MASK,
29286 IX86_BUILTIN_PTESTMB128,
29287 IX86_BUILTIN_PTESTMB256,
29288 IX86_BUILTIN_PTESTMW128,
29289 IX86_BUILTIN_PTESTMW256,
29290 IX86_BUILTIN_PTESTMD128,
29291 IX86_BUILTIN_PTESTMD256,
29292 IX86_BUILTIN_PTESTMQ128,
29293 IX86_BUILTIN_PTESTMQ256,
29294 IX86_BUILTIN_PTESTNMB128,
29295 IX86_BUILTIN_PTESTNMB256,
29296 IX86_BUILTIN_PTESTNMW128,
29297 IX86_BUILTIN_PTESTNMW256,
29298 IX86_BUILTIN_PTESTNMD128,
29299 IX86_BUILTIN_PTESTNMD256,
29300 IX86_BUILTIN_PTESTNMQ128,
29301 IX86_BUILTIN_PTESTNMQ256,
29302 IX86_BUILTIN_PBROADCASTMB128,
29303 IX86_BUILTIN_PBROADCASTMB256,
29304 IX86_BUILTIN_PBROADCASTMW128,
29305 IX86_BUILTIN_PBROADCASTMW256,
29306 IX86_BUILTIN_COMPRESSPD256,
29307 IX86_BUILTIN_COMPRESSPD128,
29308 IX86_BUILTIN_COMPRESSPS256,
29309 IX86_BUILTIN_COMPRESSPS128,
29310 IX86_BUILTIN_PCOMPRESSQ256,
29311 IX86_BUILTIN_PCOMPRESSQ128,
29312 IX86_BUILTIN_PCOMPRESSD256,
29313 IX86_BUILTIN_PCOMPRESSD128,
29314 IX86_BUILTIN_EXPANDPD256,
29315 IX86_BUILTIN_EXPANDPD128,
29316 IX86_BUILTIN_EXPANDPS256,
29317 IX86_BUILTIN_EXPANDPS128,
29318 IX86_BUILTIN_PEXPANDQ256,
29319 IX86_BUILTIN_PEXPANDQ128,
29320 IX86_BUILTIN_PEXPANDD256,
29321 IX86_BUILTIN_PEXPANDD128,
29322 IX86_BUILTIN_EXPANDPD256Z,
29323 IX86_BUILTIN_EXPANDPD128Z,
29324 IX86_BUILTIN_EXPANDPS256Z,
29325 IX86_BUILTIN_EXPANDPS128Z,
29326 IX86_BUILTIN_PEXPANDQ256Z,
29327 IX86_BUILTIN_PEXPANDQ128Z,
29328 IX86_BUILTIN_PEXPANDD256Z,
29329 IX86_BUILTIN_PEXPANDD128Z,
29330 IX86_BUILTIN_PMAXSD256_MASK,
29331 IX86_BUILTIN_PMINSD256_MASK,
29332 IX86_BUILTIN_PMAXUD256_MASK,
29333 IX86_BUILTIN_PMINUD256_MASK,
29334 IX86_BUILTIN_PMAXSD128_MASK,
29335 IX86_BUILTIN_PMINSD128_MASK,
29336 IX86_BUILTIN_PMAXUD128_MASK,
29337 IX86_BUILTIN_PMINUD128_MASK,
29338 IX86_BUILTIN_PMAXSQ256_MASK,
29339 IX86_BUILTIN_PMINSQ256_MASK,
29340 IX86_BUILTIN_PMAXUQ256_MASK,
29341 IX86_BUILTIN_PMINUQ256_MASK,
29342 IX86_BUILTIN_PMAXSQ128_MASK,
29343 IX86_BUILTIN_PMINSQ128_MASK,
29344 IX86_BUILTIN_PMAXUQ128_MASK,
29345 IX86_BUILTIN_PMINUQ128_MASK,
29346 IX86_BUILTIN_PMINSB256_MASK,
29347 IX86_BUILTIN_PMINUB256_MASK,
29348 IX86_BUILTIN_PMAXSB256_MASK,
29349 IX86_BUILTIN_PMAXUB256_MASK,
29350 IX86_BUILTIN_PMINSB128_MASK,
29351 IX86_BUILTIN_PMINUB128_MASK,
29352 IX86_BUILTIN_PMAXSB128_MASK,
29353 IX86_BUILTIN_PMAXUB128_MASK,
29354 IX86_BUILTIN_PMINSW256_MASK,
29355 IX86_BUILTIN_PMINUW256_MASK,
29356 IX86_BUILTIN_PMAXSW256_MASK,
29357 IX86_BUILTIN_PMAXUW256_MASK,
29358 IX86_BUILTIN_PMINSW128_MASK,
29359 IX86_BUILTIN_PMINUW128_MASK,
29360 IX86_BUILTIN_PMAXSW128_MASK,
29361 IX86_BUILTIN_PMAXUW128_MASK,
29362 IX86_BUILTIN_VPCONFLICTQ256,
29363 IX86_BUILTIN_VPCONFLICTD256,
29364 IX86_BUILTIN_VPCLZCNTQ256,
29365 IX86_BUILTIN_VPCLZCNTD256,
29366 IX86_BUILTIN_UNPCKHPD256_MASK,
29367 IX86_BUILTIN_UNPCKHPD128_MASK,
29368 IX86_BUILTIN_UNPCKHPS256_MASK,
29369 IX86_BUILTIN_UNPCKHPS128_MASK,
29370 IX86_BUILTIN_UNPCKLPD256_MASK,
29371 IX86_BUILTIN_UNPCKLPD128_MASK,
29372 IX86_BUILTIN_UNPCKLPS256_MASK,
29373 IX86_BUILTIN_VPCONFLICTQ128,
29374 IX86_BUILTIN_VPCONFLICTD128,
29375 IX86_BUILTIN_VPCLZCNTQ128,
29376 IX86_BUILTIN_VPCLZCNTD128,
29377 IX86_BUILTIN_UNPCKLPS128_MASK,
29378 IX86_BUILTIN_ALIGND256,
29379 IX86_BUILTIN_ALIGNQ256,
29380 IX86_BUILTIN_ALIGND128,
29381 IX86_BUILTIN_ALIGNQ128,
29382 IX86_BUILTIN_CVTPS2PH256_MASK,
29383 IX86_BUILTIN_CVTPS2PH_MASK,
29384 IX86_BUILTIN_CVTPH2PS_MASK,
29385 IX86_BUILTIN_CVTPH2PS256_MASK,
29386 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29387 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29388 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29389 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29390 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29391 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29392 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29393 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29394 IX86_BUILTIN_PUNPCKHBW128_MASK,
29395 IX86_BUILTIN_PUNPCKHBW256_MASK,
29396 IX86_BUILTIN_PUNPCKHWD128_MASK,
29397 IX86_BUILTIN_PUNPCKHWD256_MASK,
29398 IX86_BUILTIN_PUNPCKLBW128_MASK,
29399 IX86_BUILTIN_PUNPCKLBW256_MASK,
29400 IX86_BUILTIN_PUNPCKLWD128_MASK,
29401 IX86_BUILTIN_PUNPCKLWD256_MASK,
29402 IX86_BUILTIN_PSLLVV16HI,
29403 IX86_BUILTIN_PSLLVV8HI,
29404 IX86_BUILTIN_PACKSSDW256_MASK,
29405 IX86_BUILTIN_PACKSSDW128_MASK,
29406 IX86_BUILTIN_PACKUSDW256_MASK,
29407 IX86_BUILTIN_PACKUSDW128_MASK,
29408 IX86_BUILTIN_PAVGB256_MASK,
29409 IX86_BUILTIN_PAVGW256_MASK,
29410 IX86_BUILTIN_PAVGB128_MASK,
29411 IX86_BUILTIN_PAVGW128_MASK,
29412 IX86_BUILTIN_VPERMVARSF256_MASK,
29413 IX86_BUILTIN_VPERMVARDF256_MASK,
29414 IX86_BUILTIN_VPERMDF256_MASK,
29415 IX86_BUILTIN_PABSB256_MASK,
29416 IX86_BUILTIN_PABSB128_MASK,
29417 IX86_BUILTIN_PABSW256_MASK,
29418 IX86_BUILTIN_PABSW128_MASK,
29419 IX86_BUILTIN_VPERMILVARPD_MASK,
29420 IX86_BUILTIN_VPERMILVARPS_MASK,
29421 IX86_BUILTIN_VPERMILVARPD256_MASK,
29422 IX86_BUILTIN_VPERMILVARPS256_MASK,
29423 IX86_BUILTIN_VPERMILPD_MASK,
29424 IX86_BUILTIN_VPERMILPS_MASK,
29425 IX86_BUILTIN_VPERMILPD256_MASK,
29426 IX86_BUILTIN_VPERMILPS256_MASK,
29427 IX86_BUILTIN_BLENDMQ256,
29428 IX86_BUILTIN_BLENDMD256,
29429 IX86_BUILTIN_BLENDMPD256,
29430 IX86_BUILTIN_BLENDMPS256,
29431 IX86_BUILTIN_BLENDMQ128,
29432 IX86_BUILTIN_BLENDMD128,
29433 IX86_BUILTIN_BLENDMPD128,
29434 IX86_BUILTIN_BLENDMPS128,
29435 IX86_BUILTIN_BLENDMW256,
29436 IX86_BUILTIN_BLENDMB256,
29437 IX86_BUILTIN_BLENDMW128,
29438 IX86_BUILTIN_BLENDMB128,
29439 IX86_BUILTIN_PMULLD256_MASK,
29440 IX86_BUILTIN_PMULLD128_MASK,
29441 IX86_BUILTIN_PMULUDQ256_MASK,
29442 IX86_BUILTIN_PMULDQ256_MASK,
29443 IX86_BUILTIN_PMULDQ128_MASK,
29444 IX86_BUILTIN_PMULUDQ128_MASK,
29445 IX86_BUILTIN_CVTPD2PS256_MASK,
29446 IX86_BUILTIN_CVTPD2PS_MASK,
29447 IX86_BUILTIN_VPERMVARSI256_MASK,
29448 IX86_BUILTIN_VPERMVARDI256_MASK,
29449 IX86_BUILTIN_VPERMDI256_MASK,
29450 IX86_BUILTIN_CMPQ256,
29451 IX86_BUILTIN_CMPD256,
29452 IX86_BUILTIN_UCMPQ256,
29453 IX86_BUILTIN_UCMPD256,
29454 IX86_BUILTIN_CMPB256,
29455 IX86_BUILTIN_CMPW256,
29456 IX86_BUILTIN_UCMPB256,
29457 IX86_BUILTIN_UCMPW256,
29458 IX86_BUILTIN_CMPPD256_MASK,
29459 IX86_BUILTIN_CMPPS256_MASK,
29460 IX86_BUILTIN_CMPQ128,
29461 IX86_BUILTIN_CMPD128,
29462 IX86_BUILTIN_UCMPQ128,
29463 IX86_BUILTIN_UCMPD128,
29464 IX86_BUILTIN_CMPB128,
29465 IX86_BUILTIN_CMPW128,
29466 IX86_BUILTIN_UCMPB128,
29467 IX86_BUILTIN_UCMPW128,
29468 IX86_BUILTIN_CMPPD128_MASK,
29469 IX86_BUILTIN_CMPPS128_MASK,
29470
29471 IX86_BUILTIN_GATHER3SIV8SF,
29472 IX86_BUILTIN_GATHER3SIV4SF,
29473 IX86_BUILTIN_GATHER3SIV4DF,
29474 IX86_BUILTIN_GATHER3SIV2DF,
29475 IX86_BUILTIN_GATHER3DIV8SF,
29476 IX86_BUILTIN_GATHER3DIV4SF,
29477 IX86_BUILTIN_GATHER3DIV4DF,
29478 IX86_BUILTIN_GATHER3DIV2DF,
29479 IX86_BUILTIN_GATHER3SIV8SI,
29480 IX86_BUILTIN_GATHER3SIV4SI,
29481 IX86_BUILTIN_GATHER3SIV4DI,
29482 IX86_BUILTIN_GATHER3SIV2DI,
29483 IX86_BUILTIN_GATHER3DIV8SI,
29484 IX86_BUILTIN_GATHER3DIV4SI,
29485 IX86_BUILTIN_GATHER3DIV4DI,
29486 IX86_BUILTIN_GATHER3DIV2DI,
29487 IX86_BUILTIN_SCATTERSIV8SF,
29488 IX86_BUILTIN_SCATTERSIV4SF,
29489 IX86_BUILTIN_SCATTERSIV4DF,
29490 IX86_BUILTIN_SCATTERSIV2DF,
29491 IX86_BUILTIN_SCATTERDIV8SF,
29492 IX86_BUILTIN_SCATTERDIV4SF,
29493 IX86_BUILTIN_SCATTERDIV4DF,
29494 IX86_BUILTIN_SCATTERDIV2DF,
29495 IX86_BUILTIN_SCATTERSIV8SI,
29496 IX86_BUILTIN_SCATTERSIV4SI,
29497 IX86_BUILTIN_SCATTERSIV4DI,
29498 IX86_BUILTIN_SCATTERSIV2DI,
29499 IX86_BUILTIN_SCATTERDIV8SI,
29500 IX86_BUILTIN_SCATTERDIV4SI,
29501 IX86_BUILTIN_SCATTERDIV4DI,
29502 IX86_BUILTIN_SCATTERDIV2DI,
29503
29504 /* AVX512DQ. */
29505 IX86_BUILTIN_RANGESD128,
29506 IX86_BUILTIN_RANGESS128,
29507 IX86_BUILTIN_KUNPCKWD,
29508 IX86_BUILTIN_KUNPCKDQ,
29509 IX86_BUILTIN_BROADCASTF32x2_512,
29510 IX86_BUILTIN_BROADCASTI32x2_512,
29511 IX86_BUILTIN_BROADCASTF64X2_512,
29512 IX86_BUILTIN_BROADCASTI64X2_512,
29513 IX86_BUILTIN_BROADCASTF32X8_512,
29514 IX86_BUILTIN_BROADCASTI32X8_512,
29515 IX86_BUILTIN_EXTRACTF64X2_512,
29516 IX86_BUILTIN_EXTRACTF32X8,
29517 IX86_BUILTIN_EXTRACTI64X2_512,
29518 IX86_BUILTIN_EXTRACTI32X8,
29519 IX86_BUILTIN_REDUCEPD512_MASK,
29520 IX86_BUILTIN_REDUCEPS512_MASK,
29521 IX86_BUILTIN_PMULLQ512,
29522 IX86_BUILTIN_XORPD512,
29523 IX86_BUILTIN_XORPS512,
29524 IX86_BUILTIN_ORPD512,
29525 IX86_BUILTIN_ORPS512,
29526 IX86_BUILTIN_ANDPD512,
29527 IX86_BUILTIN_ANDPS512,
29528 IX86_BUILTIN_ANDNPD512,
29529 IX86_BUILTIN_ANDNPS512,
29530 IX86_BUILTIN_INSERTF32X8,
29531 IX86_BUILTIN_INSERTI32X8,
29532 IX86_BUILTIN_INSERTF64X2_512,
29533 IX86_BUILTIN_INSERTI64X2_512,
29534 IX86_BUILTIN_FPCLASSPD512,
29535 IX86_BUILTIN_FPCLASSPS512,
29536 IX86_BUILTIN_CVTD2MASK512,
29537 IX86_BUILTIN_CVTQ2MASK512,
29538 IX86_BUILTIN_CVTMASK2D512,
29539 IX86_BUILTIN_CVTMASK2Q512,
29540 IX86_BUILTIN_CVTPD2QQ512,
29541 IX86_BUILTIN_CVTPS2QQ512,
29542 IX86_BUILTIN_CVTPD2UQQ512,
29543 IX86_BUILTIN_CVTPS2UQQ512,
29544 IX86_BUILTIN_CVTQQ2PS512,
29545 IX86_BUILTIN_CVTUQQ2PS512,
29546 IX86_BUILTIN_CVTQQ2PD512,
29547 IX86_BUILTIN_CVTUQQ2PD512,
29548 IX86_BUILTIN_CVTTPS2QQ512,
29549 IX86_BUILTIN_CVTTPS2UQQ512,
29550 IX86_BUILTIN_CVTTPD2QQ512,
29551 IX86_BUILTIN_CVTTPD2UQQ512,
29552 IX86_BUILTIN_RANGEPS512,
29553 IX86_BUILTIN_RANGEPD512,
29554
29555 /* AVX512BW. */
29556 IX86_BUILTIN_PACKUSDW512,
29557 IX86_BUILTIN_PACKSSDW512,
29558 IX86_BUILTIN_LOADDQUHI512_MASK,
29559 IX86_BUILTIN_LOADDQUQI512_MASK,
29560 IX86_BUILTIN_PSLLDQ512,
29561 IX86_BUILTIN_PSRLDQ512,
29562 IX86_BUILTIN_STOREDQUHI512_MASK,
29563 IX86_BUILTIN_STOREDQUQI512_MASK,
29564 IX86_BUILTIN_PALIGNR512,
29565 IX86_BUILTIN_PALIGNR512_MASK,
29566 IX86_BUILTIN_MOVDQUHI512_MASK,
29567 IX86_BUILTIN_MOVDQUQI512_MASK,
29568 IX86_BUILTIN_PSADBW512,
29569 IX86_BUILTIN_DBPSADBW512,
29570 IX86_BUILTIN_PBROADCASTB512,
29571 IX86_BUILTIN_PBROADCASTB512_GPR,
29572 IX86_BUILTIN_PBROADCASTW512,
29573 IX86_BUILTIN_PBROADCASTW512_GPR,
29574 IX86_BUILTIN_PMOVSXBW512_MASK,
29575 IX86_BUILTIN_PMOVZXBW512_MASK,
29576 IX86_BUILTIN_VPERMVARHI512_MASK,
29577 IX86_BUILTIN_VPERMT2VARHI512,
29578 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29579 IX86_BUILTIN_VPERMI2VARHI512,
29580 IX86_BUILTIN_PAVGB512,
29581 IX86_BUILTIN_PAVGW512,
29582 IX86_BUILTIN_PADDB512,
29583 IX86_BUILTIN_PSUBB512,
29584 IX86_BUILTIN_PSUBSB512,
29585 IX86_BUILTIN_PADDSB512,
29586 IX86_BUILTIN_PSUBUSB512,
29587 IX86_BUILTIN_PADDUSB512,
29588 IX86_BUILTIN_PSUBW512,
29589 IX86_BUILTIN_PADDW512,
29590 IX86_BUILTIN_PSUBSW512,
29591 IX86_BUILTIN_PADDSW512,
29592 IX86_BUILTIN_PSUBUSW512,
29593 IX86_BUILTIN_PADDUSW512,
29594 IX86_BUILTIN_PMAXUW512,
29595 IX86_BUILTIN_PMAXSW512,
29596 IX86_BUILTIN_PMINUW512,
29597 IX86_BUILTIN_PMINSW512,
29598 IX86_BUILTIN_PMAXUB512,
29599 IX86_BUILTIN_PMAXSB512,
29600 IX86_BUILTIN_PMINUB512,
29601 IX86_BUILTIN_PMINSB512,
29602 IX86_BUILTIN_PMOVWB512,
29603 IX86_BUILTIN_PMOVSWB512,
29604 IX86_BUILTIN_PMOVUSWB512,
29605 IX86_BUILTIN_PMULHRSW512_MASK,
29606 IX86_BUILTIN_PMULHUW512_MASK,
29607 IX86_BUILTIN_PMULHW512_MASK,
29608 IX86_BUILTIN_PMULLW512_MASK,
29609 IX86_BUILTIN_PSLLWI512_MASK,
29610 IX86_BUILTIN_PSLLW512_MASK,
29611 IX86_BUILTIN_PACKSSWB512,
29612 IX86_BUILTIN_PACKUSWB512,
29613 IX86_BUILTIN_PSRAVV32HI,
29614 IX86_BUILTIN_PMADDUBSW512_MASK,
29615 IX86_BUILTIN_PMADDWD512_MASK,
29616 IX86_BUILTIN_PSRLVV32HI,
29617 IX86_BUILTIN_PUNPCKHBW512,
29618 IX86_BUILTIN_PUNPCKHWD512,
29619 IX86_BUILTIN_PUNPCKLBW512,
29620 IX86_BUILTIN_PUNPCKLWD512,
29621 IX86_BUILTIN_PSHUFB512,
29622 IX86_BUILTIN_PSHUFHW512,
29623 IX86_BUILTIN_PSHUFLW512,
29624 IX86_BUILTIN_PSRAWI512,
29625 IX86_BUILTIN_PSRAW512,
29626 IX86_BUILTIN_PSRLWI512,
29627 IX86_BUILTIN_PSRLW512,
29628 IX86_BUILTIN_CVTB2MASK512,
29629 IX86_BUILTIN_CVTW2MASK512,
29630 IX86_BUILTIN_CVTMASK2B512,
29631 IX86_BUILTIN_CVTMASK2W512,
29632 IX86_BUILTIN_PCMPEQB512_MASK,
29633 IX86_BUILTIN_PCMPEQW512_MASK,
29634 IX86_BUILTIN_PCMPGTB512_MASK,
29635 IX86_BUILTIN_PCMPGTW512_MASK,
29636 IX86_BUILTIN_PTESTMB512,
29637 IX86_BUILTIN_PTESTMW512,
29638 IX86_BUILTIN_PTESTNMB512,
29639 IX86_BUILTIN_PTESTNMW512,
29640 IX86_BUILTIN_PSLLVV32HI,
29641 IX86_BUILTIN_PABSB512,
29642 IX86_BUILTIN_PABSW512,
29643 IX86_BUILTIN_BLENDMW512,
29644 IX86_BUILTIN_BLENDMB512,
29645 IX86_BUILTIN_CMPB512,
29646 IX86_BUILTIN_CMPW512,
29647 IX86_BUILTIN_UCMPB512,
29648 IX86_BUILTIN_UCMPW512,
29649
29650 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29651 where all operands are 32-byte or 64-byte wide respectively. */
29652 IX86_BUILTIN_GATHERALTSIV4DF,
29653 IX86_BUILTIN_GATHERALTDIV8SF,
29654 IX86_BUILTIN_GATHERALTSIV4DI,
29655 IX86_BUILTIN_GATHERALTDIV8SI,
29656 IX86_BUILTIN_GATHER3ALTDIV16SF,
29657 IX86_BUILTIN_GATHER3ALTDIV16SI,
29658 IX86_BUILTIN_GATHER3ALTSIV4DF,
29659 IX86_BUILTIN_GATHER3ALTDIV8SF,
29660 IX86_BUILTIN_GATHER3ALTSIV4DI,
29661 IX86_BUILTIN_GATHER3ALTDIV8SI,
29662 IX86_BUILTIN_GATHER3ALTSIV8DF,
29663 IX86_BUILTIN_GATHER3ALTSIV8DI,
29664 IX86_BUILTIN_GATHER3DIV16SF,
29665 IX86_BUILTIN_GATHER3DIV16SI,
29666 IX86_BUILTIN_GATHER3DIV8DF,
29667 IX86_BUILTIN_GATHER3DIV8DI,
29668 IX86_BUILTIN_GATHER3SIV16SF,
29669 IX86_BUILTIN_GATHER3SIV16SI,
29670 IX86_BUILTIN_GATHER3SIV8DF,
29671 IX86_BUILTIN_GATHER3SIV8DI,
29672 IX86_BUILTIN_SCATTERDIV16SF,
29673 IX86_BUILTIN_SCATTERDIV16SI,
29674 IX86_BUILTIN_SCATTERDIV8DF,
29675 IX86_BUILTIN_SCATTERDIV8DI,
29676 IX86_BUILTIN_SCATTERSIV16SF,
29677 IX86_BUILTIN_SCATTERSIV16SI,
29678 IX86_BUILTIN_SCATTERSIV8DF,
29679 IX86_BUILTIN_SCATTERSIV8DI,
29680
29681 /* AVX512PF */
29682 IX86_BUILTIN_GATHERPFQPD,
29683 IX86_BUILTIN_GATHERPFDPS,
29684 IX86_BUILTIN_GATHERPFDPD,
29685 IX86_BUILTIN_GATHERPFQPS,
29686 IX86_BUILTIN_SCATTERPFDPD,
29687 IX86_BUILTIN_SCATTERPFDPS,
29688 IX86_BUILTIN_SCATTERPFQPD,
29689 IX86_BUILTIN_SCATTERPFQPS,
29690
29691 /* AVX-512ER */
29692 IX86_BUILTIN_EXP2PD_MASK,
29693 IX86_BUILTIN_EXP2PS_MASK,
29694 IX86_BUILTIN_EXP2PS,
29695 IX86_BUILTIN_RCP28PD,
29696 IX86_BUILTIN_RCP28PS,
29697 IX86_BUILTIN_RCP28SD,
29698 IX86_BUILTIN_RCP28SS,
29699 IX86_BUILTIN_RSQRT28PD,
29700 IX86_BUILTIN_RSQRT28PS,
29701 IX86_BUILTIN_RSQRT28SD,
29702 IX86_BUILTIN_RSQRT28SS,
29703
29704 /* SHA builtins. */
29705 IX86_BUILTIN_SHA1MSG1,
29706 IX86_BUILTIN_SHA1MSG2,
29707 IX86_BUILTIN_SHA1NEXTE,
29708 IX86_BUILTIN_SHA1RNDS4,
29709 IX86_BUILTIN_SHA256MSG1,
29710 IX86_BUILTIN_SHA256MSG2,
29711 IX86_BUILTIN_SHA256RNDS2,
29712
29713 /* CLFLUSHOPT instructions. */
29714 IX86_BUILTIN_CLFLUSHOPT,
29715
29716 /* TFmode support builtins. */
29717 IX86_BUILTIN_INFQ,
29718 IX86_BUILTIN_HUGE_VALQ,
29719 IX86_BUILTIN_FABSQ,
29720 IX86_BUILTIN_COPYSIGNQ,
29721
29722 /* Vectorizer support builtins. */
29723 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
29724 IX86_BUILTIN_CPYSGNPS,
29725 IX86_BUILTIN_CPYSGNPD,
29726 IX86_BUILTIN_CPYSGNPS256,
29727 IX86_BUILTIN_CPYSGNPS512,
29728 IX86_BUILTIN_CPYSGNPD256,
29729 IX86_BUILTIN_CPYSGNPD512,
29730 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
29731 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
29732
29733
29734 /* FMA4 instructions. */
29735 IX86_BUILTIN_VFMADDSS,
29736 IX86_BUILTIN_VFMADDSD,
29737 IX86_BUILTIN_VFMADDPS,
29738 IX86_BUILTIN_VFMADDPD,
29739 IX86_BUILTIN_VFMADDPS256,
29740 IX86_BUILTIN_VFMADDPD256,
29741 IX86_BUILTIN_VFMADDSUBPS,
29742 IX86_BUILTIN_VFMADDSUBPD,
29743 IX86_BUILTIN_VFMADDSUBPS256,
29744 IX86_BUILTIN_VFMADDSUBPD256,
29745
29746 /* FMA3 instructions. */
29747 IX86_BUILTIN_VFMADDSS3,
29748 IX86_BUILTIN_VFMADDSD3,
29749
29750 /* XOP instructions. */
29751 IX86_BUILTIN_VPCMOV,
29752 IX86_BUILTIN_VPCMOV_V2DI,
29753 IX86_BUILTIN_VPCMOV_V4SI,
29754 IX86_BUILTIN_VPCMOV_V8HI,
29755 IX86_BUILTIN_VPCMOV_V16QI,
29756 IX86_BUILTIN_VPCMOV_V4SF,
29757 IX86_BUILTIN_VPCMOV_V2DF,
29758 IX86_BUILTIN_VPCMOV256,
29759 IX86_BUILTIN_VPCMOV_V4DI256,
29760 IX86_BUILTIN_VPCMOV_V8SI256,
29761 IX86_BUILTIN_VPCMOV_V16HI256,
29762 IX86_BUILTIN_VPCMOV_V32QI256,
29763 IX86_BUILTIN_VPCMOV_V8SF256,
29764 IX86_BUILTIN_VPCMOV_V4DF256,
29765
29766 IX86_BUILTIN_VPPERM,
29767
29768 IX86_BUILTIN_VPMACSSWW,
29769 IX86_BUILTIN_VPMACSWW,
29770 IX86_BUILTIN_VPMACSSWD,
29771 IX86_BUILTIN_VPMACSWD,
29772 IX86_BUILTIN_VPMACSSDD,
29773 IX86_BUILTIN_VPMACSDD,
29774 IX86_BUILTIN_VPMACSSDQL,
29775 IX86_BUILTIN_VPMACSSDQH,
29776 IX86_BUILTIN_VPMACSDQL,
29777 IX86_BUILTIN_VPMACSDQH,
29778 IX86_BUILTIN_VPMADCSSWD,
29779 IX86_BUILTIN_VPMADCSWD,
29780
29781 IX86_BUILTIN_VPHADDBW,
29782 IX86_BUILTIN_VPHADDBD,
29783 IX86_BUILTIN_VPHADDBQ,
29784 IX86_BUILTIN_VPHADDWD,
29785 IX86_BUILTIN_VPHADDWQ,
29786 IX86_BUILTIN_VPHADDDQ,
29787 IX86_BUILTIN_VPHADDUBW,
29788 IX86_BUILTIN_VPHADDUBD,
29789 IX86_BUILTIN_VPHADDUBQ,
29790 IX86_BUILTIN_VPHADDUWD,
29791 IX86_BUILTIN_VPHADDUWQ,
29792 IX86_BUILTIN_VPHADDUDQ,
29793 IX86_BUILTIN_VPHSUBBW,
29794 IX86_BUILTIN_VPHSUBWD,
29795 IX86_BUILTIN_VPHSUBDQ,
29796
29797 IX86_BUILTIN_VPROTB,
29798 IX86_BUILTIN_VPROTW,
29799 IX86_BUILTIN_VPROTD,
29800 IX86_BUILTIN_VPROTQ,
29801 IX86_BUILTIN_VPROTB_IMM,
29802 IX86_BUILTIN_VPROTW_IMM,
29803 IX86_BUILTIN_VPROTD_IMM,
29804 IX86_BUILTIN_VPROTQ_IMM,
29805
29806 IX86_BUILTIN_VPSHLB,
29807 IX86_BUILTIN_VPSHLW,
29808 IX86_BUILTIN_VPSHLD,
29809 IX86_BUILTIN_VPSHLQ,
29810 IX86_BUILTIN_VPSHAB,
29811 IX86_BUILTIN_VPSHAW,
29812 IX86_BUILTIN_VPSHAD,
29813 IX86_BUILTIN_VPSHAQ,
29814
29815 IX86_BUILTIN_VFRCZSS,
29816 IX86_BUILTIN_VFRCZSD,
29817 IX86_BUILTIN_VFRCZPS,
29818 IX86_BUILTIN_VFRCZPD,
29819 IX86_BUILTIN_VFRCZPS256,
29820 IX86_BUILTIN_VFRCZPD256,
29821
29822 IX86_BUILTIN_VPCOMEQUB,
29823 IX86_BUILTIN_VPCOMNEUB,
29824 IX86_BUILTIN_VPCOMLTUB,
29825 IX86_BUILTIN_VPCOMLEUB,
29826 IX86_BUILTIN_VPCOMGTUB,
29827 IX86_BUILTIN_VPCOMGEUB,
29828 IX86_BUILTIN_VPCOMFALSEUB,
29829 IX86_BUILTIN_VPCOMTRUEUB,
29830
29831 IX86_BUILTIN_VPCOMEQUW,
29832 IX86_BUILTIN_VPCOMNEUW,
29833 IX86_BUILTIN_VPCOMLTUW,
29834 IX86_BUILTIN_VPCOMLEUW,
29835 IX86_BUILTIN_VPCOMGTUW,
29836 IX86_BUILTIN_VPCOMGEUW,
29837 IX86_BUILTIN_VPCOMFALSEUW,
29838 IX86_BUILTIN_VPCOMTRUEUW,
29839
29840 IX86_BUILTIN_VPCOMEQUD,
29841 IX86_BUILTIN_VPCOMNEUD,
29842 IX86_BUILTIN_VPCOMLTUD,
29843 IX86_BUILTIN_VPCOMLEUD,
29844 IX86_BUILTIN_VPCOMGTUD,
29845 IX86_BUILTIN_VPCOMGEUD,
29846 IX86_BUILTIN_VPCOMFALSEUD,
29847 IX86_BUILTIN_VPCOMTRUEUD,
29848
29849 IX86_BUILTIN_VPCOMEQUQ,
29850 IX86_BUILTIN_VPCOMNEUQ,
29851 IX86_BUILTIN_VPCOMLTUQ,
29852 IX86_BUILTIN_VPCOMLEUQ,
29853 IX86_BUILTIN_VPCOMGTUQ,
29854 IX86_BUILTIN_VPCOMGEUQ,
29855 IX86_BUILTIN_VPCOMFALSEUQ,
29856 IX86_BUILTIN_VPCOMTRUEUQ,
29857
29858 IX86_BUILTIN_VPCOMEQB,
29859 IX86_BUILTIN_VPCOMNEB,
29860 IX86_BUILTIN_VPCOMLTB,
29861 IX86_BUILTIN_VPCOMLEB,
29862 IX86_BUILTIN_VPCOMGTB,
29863 IX86_BUILTIN_VPCOMGEB,
29864 IX86_BUILTIN_VPCOMFALSEB,
29865 IX86_BUILTIN_VPCOMTRUEB,
29866
29867 IX86_BUILTIN_VPCOMEQW,
29868 IX86_BUILTIN_VPCOMNEW,
29869 IX86_BUILTIN_VPCOMLTW,
29870 IX86_BUILTIN_VPCOMLEW,
29871 IX86_BUILTIN_VPCOMGTW,
29872 IX86_BUILTIN_VPCOMGEW,
29873 IX86_BUILTIN_VPCOMFALSEW,
29874 IX86_BUILTIN_VPCOMTRUEW,
29875
29876 IX86_BUILTIN_VPCOMEQD,
29877 IX86_BUILTIN_VPCOMNED,
29878 IX86_BUILTIN_VPCOMLTD,
29879 IX86_BUILTIN_VPCOMLED,
29880 IX86_BUILTIN_VPCOMGTD,
29881 IX86_BUILTIN_VPCOMGED,
29882 IX86_BUILTIN_VPCOMFALSED,
29883 IX86_BUILTIN_VPCOMTRUED,
29884
29885 IX86_BUILTIN_VPCOMEQQ,
29886 IX86_BUILTIN_VPCOMNEQ,
29887 IX86_BUILTIN_VPCOMLTQ,
29888 IX86_BUILTIN_VPCOMLEQ,
29889 IX86_BUILTIN_VPCOMGTQ,
29890 IX86_BUILTIN_VPCOMGEQ,
29891 IX86_BUILTIN_VPCOMFALSEQ,
29892 IX86_BUILTIN_VPCOMTRUEQ,
29893
29894 /* LWP instructions. */
29895 IX86_BUILTIN_LLWPCB,
29896 IX86_BUILTIN_SLWPCB,
29897 IX86_BUILTIN_LWPVAL32,
29898 IX86_BUILTIN_LWPVAL64,
29899 IX86_BUILTIN_LWPINS32,
29900 IX86_BUILTIN_LWPINS64,
29901
29902 IX86_BUILTIN_CLZS,
29903
29904 /* RTM */
29905 IX86_BUILTIN_XBEGIN,
29906 IX86_BUILTIN_XEND,
29907 IX86_BUILTIN_XABORT,
29908 IX86_BUILTIN_XTEST,
29909
29910 /* BMI instructions. */
29911 IX86_BUILTIN_BEXTR32,
29912 IX86_BUILTIN_BEXTR64,
29913 IX86_BUILTIN_CTZS,
29914
29915 /* TBM instructions. */
29916 IX86_BUILTIN_BEXTRI32,
29917 IX86_BUILTIN_BEXTRI64,
29918
29919 /* BMI2 instructions. */
29920 IX86_BUILTIN_BZHI32,
29921 IX86_BUILTIN_BZHI64,
29922 IX86_BUILTIN_PDEP32,
29923 IX86_BUILTIN_PDEP64,
29924 IX86_BUILTIN_PEXT32,
29925 IX86_BUILTIN_PEXT64,
29926
29927 /* ADX instructions. */
29928 IX86_BUILTIN_ADDCARRYX32,
29929 IX86_BUILTIN_ADDCARRYX64,
29930
29931 /* SBB instructions. */
29932 IX86_BUILTIN_SBB32,
29933 IX86_BUILTIN_SBB64,
29934
29935 /* FSGSBASE instructions. */
29936 IX86_BUILTIN_RDFSBASE32,
29937 IX86_BUILTIN_RDFSBASE64,
29938 IX86_BUILTIN_RDGSBASE32,
29939 IX86_BUILTIN_RDGSBASE64,
29940 IX86_BUILTIN_WRFSBASE32,
29941 IX86_BUILTIN_WRFSBASE64,
29942 IX86_BUILTIN_WRGSBASE32,
29943 IX86_BUILTIN_WRGSBASE64,
29944
29945 /* RDRND instructions. */
29946 IX86_BUILTIN_RDRAND16_STEP,
29947 IX86_BUILTIN_RDRAND32_STEP,
29948 IX86_BUILTIN_RDRAND64_STEP,
29949
29950 /* RDSEED instructions. */
29951 IX86_BUILTIN_RDSEED16_STEP,
29952 IX86_BUILTIN_RDSEED32_STEP,
29953 IX86_BUILTIN_RDSEED64_STEP,
29954
29955 /* F16C instructions. */
29956 IX86_BUILTIN_CVTPH2PS,
29957 IX86_BUILTIN_CVTPH2PS256,
29958 IX86_BUILTIN_CVTPS2PH,
29959 IX86_BUILTIN_CVTPS2PH256,
29960
29961 /* CFString built-in for darwin */
29962 IX86_BUILTIN_CFSTRING,
29963
29964 /* Builtins to get CPU type and supported features. */
29965 IX86_BUILTIN_CPU_INIT,
29966 IX86_BUILTIN_CPU_IS,
29967 IX86_BUILTIN_CPU_SUPPORTS,
29968
29969 /* Read/write FLAGS register built-ins. */
29970 IX86_BUILTIN_READ_FLAGS,
29971 IX86_BUILTIN_WRITE_FLAGS,
29972
29973 IX86_BUILTIN_MAX
29974 };
29975
29976 /* Table for the ix86 builtin decls. */
29977 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29978
29979 /* Table of all of the builtin functions that are possible with different ISA's
29980 but are waiting to be built until a function is declared to use that
29981 ISA. */
29982 struct builtin_isa {
29983 const char *name; /* function name */
29984 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29985 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29986 bool const_p; /* true if the declaration is constant */
29987 bool set_and_not_built_p;
29988 };
29989
29990 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29991
29992
29993 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29994 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29995 function decl in the ix86_builtins array. Returns the function decl or
29996 NULL_TREE, if the builtin was not added.
29997
29998 If the front end has a special hook for builtin functions, delay adding
29999 builtin functions that aren't in the current ISA until the ISA is changed
30000 with function specific optimization. Doing so, can save about 300K for the
30001 default compiler. When the builtin is expanded, check at that time whether
30002 it is valid.
30003
30004 If the front end doesn't have a special hook, record all builtins, even if
30005 it isn't an instruction set in the current ISA in case the user uses
30006 function specific options for a different ISA, so that we don't get scope
30007 errors if a builtin is added in the middle of a function scope. */
30008
30009 static inline tree
30010 def_builtin (HOST_WIDE_INT mask, const char *name,
30011 enum ix86_builtin_func_type tcode,
30012 enum ix86_builtins code)
30013 {
30014 tree decl = NULL_TREE;
30015
30016 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30017 {
30018 ix86_builtins_isa[(int) code].isa = mask;
30019
30020 mask &= ~OPTION_MASK_ISA_64BIT;
30021 if (mask == 0
30022 || (mask & ix86_isa_flags) != 0
30023 || (lang_hooks.builtin_function
30024 == lang_hooks.builtin_function_ext_scope))
30025
30026 {
30027 tree type = ix86_get_builtin_func_type (tcode);
30028 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30029 NULL, NULL_TREE);
30030 ix86_builtins[(int) code] = decl;
30031 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30032 }
30033 else
30034 {
30035 ix86_builtins[(int) code] = NULL_TREE;
30036 ix86_builtins_isa[(int) code].tcode = tcode;
30037 ix86_builtins_isa[(int) code].name = name;
30038 ix86_builtins_isa[(int) code].const_p = false;
30039 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30040 }
30041 }
30042
30043 return decl;
30044 }
30045
30046 /* Like def_builtin, but also marks the function decl "const". */
30047
30048 static inline tree
30049 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30050 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30051 {
30052 tree decl = def_builtin (mask, name, tcode, code);
30053 if (decl)
30054 TREE_READONLY (decl) = 1;
30055 else
30056 ix86_builtins_isa[(int) code].const_p = true;
30057
30058 return decl;
30059 }
30060
30061 /* Add any new builtin functions for a given ISA that may not have been
30062 declared. This saves a bit of space compared to adding all of the
30063 declarations to the tree, even if we didn't use them. */
30064
30065 static void
30066 ix86_add_new_builtins (HOST_WIDE_INT isa)
30067 {
30068 int i;
30069
30070 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30071 {
30072 if ((ix86_builtins_isa[i].isa & isa) != 0
30073 && ix86_builtins_isa[i].set_and_not_built_p)
30074 {
30075 tree decl, type;
30076
30077 /* Don't define the builtin again. */
30078 ix86_builtins_isa[i].set_and_not_built_p = false;
30079
30080 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30081 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30082 type, i, BUILT_IN_MD, NULL,
30083 NULL_TREE);
30084
30085 ix86_builtins[i] = decl;
30086 if (ix86_builtins_isa[i].const_p)
30087 TREE_READONLY (decl) = 1;
30088 }
30089 }
30090 }
30091
30092 /* Bits for builtin_description.flag. */
30093
30094 /* Set when we don't support the comparison natively, and should
30095 swap_comparison in order to support it. */
30096 #define BUILTIN_DESC_SWAP_OPERANDS 1
30097
30098 struct builtin_description
30099 {
30100 const HOST_WIDE_INT mask;
30101 const enum insn_code icode;
30102 const char *const name;
30103 const enum ix86_builtins code;
30104 const enum rtx_code comparison;
30105 const int flag;
30106 };
30107
30108 static const struct builtin_description bdesc_comi[] =
30109 {
30110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30134 };
30135
30136 static const struct builtin_description bdesc_pcmpestr[] =
30137 {
30138 /* SSE4.2 */
30139 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30140 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30141 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30142 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30146 };
30147
30148 static const struct builtin_description bdesc_pcmpistr[] =
30149 {
30150 /* SSE4.2 */
30151 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30152 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30153 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30154 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30157 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30158 };
30159
30160 /* Special builtins with variable number of arguments. */
30161 static const struct builtin_description bdesc_special_args[] =
30162 {
30163 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30164 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30165 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30166
30167 /* 80387 (for use internally for atomic compound assignment). */
30168 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30169 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30170 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30171 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30172
30173 /* MMX */
30174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30175
30176 /* 3DNow! */
30177 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30178
30179 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30180 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30181 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30182 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30183 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30184 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30185 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30186 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30187 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30188
30189 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30190 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30191 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30192 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30193 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30194 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30195 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30196 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30197
30198 /* SSE */
30199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30202
30203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30207
30208 /* SSE or 3DNow!A */
30209 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30210 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30211
30212 /* SSE2 */
30213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30220 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30223
30224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30226
30227 /* SSE3 */
30228 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30229
30230 /* SSE4.1 */
30231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30232
30233 /* SSE4A */
30234 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30235 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30236
30237 /* AVX */
30238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30240
30241 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30242 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30243 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30246
30247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30254
30255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30258
30259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30267
30268 /* AVX2 */
30269 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30270 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30271 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30272 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30273 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30274 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30277 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30278
30279 /* AVX512F */
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30327
30328 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30329 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30330 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30331 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30332 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30333 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30334
30335 /* FSGSBASE */
30336 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30337 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30338 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30339 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30340 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30341 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30342 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30343 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30344
30345 /* RTM */
30346 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30347 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30348 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30349
30350 /* AVX512BW */
30351 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30352 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30353 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30354 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30355
30356 /* AVX512VL */
30357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30393 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30394 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30395 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30451 };
30452
30453 /* Builtins with variable number of arguments. */
30454 static const struct builtin_description bdesc_args[] =
30455 {
30456 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30457 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30458 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30459 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30460 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30461 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30462 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30463
30464 /* MMX */
30465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30471
30472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30480
30481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30483
30484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30488
30489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30495
30496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30502
30503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30506
30507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30508
30509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30515
30516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30522
30523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30527
30528 /* 3DNow! */
30529 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30530 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30531 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30532 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30533
30534 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30535 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30536 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30541 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30545 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30547 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30548 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30549
30550 /* 3DNow!A */
30551 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30552 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30553 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30554 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30555 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30556 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30557
30558 /* SSE */
30559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
30560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30561 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30563 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30567 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30570 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30571
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30573
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30575 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30576 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30582
30583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
30594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30603
30604 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30605 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30608
30609 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30611 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30612 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30613
30614 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30615
30616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30619 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30620 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30621
30622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
30623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
30624 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
30625
30626 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
30627
30628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30631
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
30634
30635 /* SSE MMX or 3Dnow!A */
30636 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30637 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30638 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30639
30640 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30641 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30642 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30644
30645 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
30646 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
30647
30648 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
30649
30650 /* SSE2 */
30651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30652
30653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
30654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
30655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
30656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
30657 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
30658
30659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
30662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30664
30665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
30666
30667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30669 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
30670 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
30671
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
30674 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30675
30676 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30677 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30678 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30679 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30684
30685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
30686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
30687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
30688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
30690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
30692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
30693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
30694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
30698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
30699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
30700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
30702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
30703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
30704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30705
30706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30707 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30710
30711 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30713 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30714 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30715
30716 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30717
30718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30719 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30720 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30721
30722 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
30723
30724 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30725 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30726 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30727 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30728 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30729 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30730 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30731 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30732
30733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30741
30742 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
30744
30745 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30748 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30749
30750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30752
30753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30759
30760 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30761 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30762 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30764
30765 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30766 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30767 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30768 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30769 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30770 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30771 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30772 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30773
30774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
30777
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
30780
30781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
30782 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
30783
30784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
30785
30786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
30787 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
30788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
30789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
30790
30791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
30792 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30793 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30794 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
30795 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30796 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30797 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
30798
30799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
30800 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30801 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30802 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
30803 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30804 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30805 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
30806
30807 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30808 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30809 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30810 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30811
30812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
30813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
30814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
30815
30816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
30817
30818 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30819
30820 /* SSE2 MMX */
30821 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
30822 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
30823
30824 /* SSE3 */
30825 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
30826 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30827
30828 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30829 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30830 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30831 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30832 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30833 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30834
30835 /* SSSE3 */
30836 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30837 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
30838 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30839 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
30840 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30841 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30842
30843 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30844 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30845 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30846 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30847 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30848 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30849 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30850 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30851 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30852 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30853 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30854 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30855 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
30856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
30857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30858 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30867
30868 /* SSSE3. */
30869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
30870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
30871
30872 /* SSE4.1 */
30873 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30874 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
30876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
30877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
30881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
30882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
30883
30884 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
30885 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
30886 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
30887 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
30888 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
30889 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
30890 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
30891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
30892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
30893 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
30894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
30895 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
30896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30897
30898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
30899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
30909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30910
30911 /* SSE4.1 */
30912 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
30913 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
30914 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30915 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30916
30917 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
30918 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
30919 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
30920 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
30921
30922 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
30923 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
30924
30925 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
30926 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
30927
30928 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
30929 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
30930 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
30931 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
30932
30933 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
30934 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
30935
30936 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30937 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30938
30939 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30940 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30941 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30942
30943 /* SSE4.2 */
30944 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30945 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
30946 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
30947 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30948 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30949
30950 /* SSE4A */
30951 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
30952 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
30953 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
30954 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30955
30956 /* AES */
30957 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
30958 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30959
30960 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30961 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30962 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30963 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30964
30965 /* PCLMUL */
30966 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
30967
30968 /* AVX */
30969 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30970 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30973 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30974 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30977 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30983 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30984 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30985 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30986 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30987 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30988 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30989 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30990 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30991 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30992 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30993 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30994 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30995
30996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
30997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
30998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
30999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31000
31001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31017 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31018 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31022 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31024 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31035
31036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31039
31040 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31042 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31044 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31045
31046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31047
31048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31050
31051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31055
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31057 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31058
31059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31061
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31066
31067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31069
31070 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31071 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31072
31073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31077
31078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31081 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31082 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31083 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31084
31085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31100
31101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31103
31104 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31105 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31106
31107 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31108
31109 /* AVX2 */
31110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31111 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31112 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31113 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31118 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31119 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31120 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31121 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31127 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31149 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31150 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31151 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31152 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31153 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31154 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31155 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31156 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31157 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31158 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31159 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31160 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31161 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31162 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31163 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31164 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31165 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31166 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31167 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31168 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31169 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31170 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31171 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31172 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31173 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31174 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31175 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31176 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31177 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31178 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31179 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31180 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31181 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31182 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31183 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31184 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31186 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31187 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31188 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31189 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31190 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31191 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31192 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31193 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31194 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31195 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31196 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31197 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31198 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31199 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31200 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31202 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31203 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31204 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31205 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31206 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31207 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31208 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31209 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31210 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31211 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31212 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31213 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31214 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31215 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31219 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31220 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31221 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31222 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31223 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31224 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31225 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31226 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31227 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31228 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31229 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31230 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31231 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31232 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31233 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31234 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31235 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31236 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31237 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31238 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31239 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31240 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31241 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31242 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31243 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31244 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31246 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31247 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31248 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31249 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31250 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31251 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31252 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31253 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31254 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31255 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31256
31257 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31258
31259 /* BMI */
31260 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31261 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31262 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31263
31264 /* TBM */
31265 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31266 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31267
31268 /* F16C */
31269 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31270 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31271 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31272 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31273
31274 /* BMI2 */
31275 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31276 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31277 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31278 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31279 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31280 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31281
31282 /* AVX512F */
31283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31338 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31339 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31341 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31342 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31417 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31448 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31450 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31451 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31452 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31453 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31480
31481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31485 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31489
31490 /* Mask arithmetic operations */
31491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31499 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31500 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31501
31502 /* SHA */
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31510
31511 /* AVX512VL. */
31512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31522 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31550 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31551 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31552 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31555 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31556 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31557 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31558 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31559 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31560 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31561 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
31566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
31567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
31568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
31569 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31570 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31571 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31572 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31573 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31574 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31575 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31580 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31581 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31582 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
31602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
31603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
31604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
31605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
31607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
31608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
31609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
31611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
31613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
31615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31616 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31617 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31619 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31620 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31624 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
31625 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
31626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
31627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
31628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31652 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31653 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31654 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31655 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31656 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31657 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31706 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31707 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31708 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31709 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
31712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
31713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31714 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31715 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31716 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31717 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31718 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31750 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31751 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
31752 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31753 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
31754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
31759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
31760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
31761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
31762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
31763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
31764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
31765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
31766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31770 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31771 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31772 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31773 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31774 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31775 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31776 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31777 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31778 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31779 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31780 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31781 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31782 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31783 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31784 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31785 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31786 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31787 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31788 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31794 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31795 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31832 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
31833 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
31834 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
31835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
31836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
31841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
31842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
31843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
31844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
31845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
31846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
31847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
31848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31896 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
31897 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
31898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31900 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
31901 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
31902 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
31903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
31904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31905 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31910 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31911 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31912 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31913 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31924 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
31925 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
31926 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
31927 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
31928 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
31929 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
31930 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
31931 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
31932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
31939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
31940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
31941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
31942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
31943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
31944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
31951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
31952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
31953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
31954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
31955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
31956 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
31966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
31968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31988 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31989 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31990 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31991 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31992 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31993 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32004 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32005 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32006 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32007 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32008 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32009 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32022 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32023 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32024 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32025 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32026 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32027 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32034 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32035 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32036 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32050 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32051 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32058 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32059 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32060 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32061 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32103 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32104 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32105 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32106 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32107 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32108 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32110 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32111 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32112 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32113 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32114 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32115 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32116 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32117 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32118 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32119 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32120 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32121 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32129 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32130 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32131 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32132 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32157 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32158 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32226
32227 /* AVX512DQ. */
32228 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32229 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32230 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32231 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32232 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32233 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32234 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32235 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32236 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32237 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32238 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32239 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32240 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32241 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32242 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32243 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32244 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32245 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32246 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32247 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32248 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32249 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32250 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32251 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32252 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32253 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32254 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32255 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32256 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32257 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32258 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32259
32260 /* AVX512BW. */
32261 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32262 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32263 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32264 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32265 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32266 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32267 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32268 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32269 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32270 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32271 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32272 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32273 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32274 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32275 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32276 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32277 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32278 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32279 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32280 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32281 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32282 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32283 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32284 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32285 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32286 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32287 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32288 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32289 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32290 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32291 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32292 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32293 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32294 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32295 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32296 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32297 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32298 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32299 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32300 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32301 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32302 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32303 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32304 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32305 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32306 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32307 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32308 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32309 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32310 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32311 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32312 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32313 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32314 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32315 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32316 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32317 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32318 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32319 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32320 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32321 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32322 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32323 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32324 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32325 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32326 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32327 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32328 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32329 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32330 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32331 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32332 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32333 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32334 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32335 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32336 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32337 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32338 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32339 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32340 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32341 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32342 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32343 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32344 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32345 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32346 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32347 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32348 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32349 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32350 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32351 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32352 };
32353
32354 /* Builtins with rounding support. */
32355 static const struct builtin_description bdesc_round_args[] =
32356 {
32357 /* AVX512F */
32358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32377 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32379 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32386 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32388 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32417 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32438 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32440 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32442 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32444 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32446 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32448 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32450 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32452 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32477
32478 /* AVX512ER */
32479 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32480 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32481 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32482 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32483 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32484 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32485 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32486 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32487 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32488 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32489
32490 /* AVX512DQ. */
32491 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32492 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32493 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32494 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32495 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32496 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32497 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32498 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32499 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32500 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32501 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32502 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32503 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32504 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32505 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32506 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32507 };
32508
32509 /* FMA4 and XOP. */
32510 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
32511 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
32512 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
32513 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
32514 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
32515 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
32516 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
32517 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
32518 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
32519 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
32520 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
32521 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
32522 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
32523 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
32524 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
32525 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
32526 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
32527 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
32528 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
32529 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
32530 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
32531 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
32532 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
32533 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
32534 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
32535 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
32536 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
32537 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
32538 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
32539 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
32540 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
32541 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
32542 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
32543 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
32544 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
32545 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
32546 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
32547 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
32548 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
32549 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
32550 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
32551 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
32552 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
32553 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
32554 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
32555 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
32556 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
32557 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
32558 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
32559 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
32560 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
32561 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
32562
32563 static const struct builtin_description bdesc_multi_arg[] =
32564 {
32565 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
32566 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
32567 UNKNOWN, (int)MULTI_ARG_3_SF },
32568 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
32569 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
32570 UNKNOWN, (int)MULTI_ARG_3_DF },
32571
32572 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
32573 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
32574 UNKNOWN, (int)MULTI_ARG_3_SF },
32575 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
32576 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
32577 UNKNOWN, (int)MULTI_ARG_3_DF },
32578
32579 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
32580 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
32581 UNKNOWN, (int)MULTI_ARG_3_SF },
32582 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
32583 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
32584 UNKNOWN, (int)MULTI_ARG_3_DF },
32585 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
32586 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
32587 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32588 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
32589 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
32590 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32591
32592 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
32593 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
32594 UNKNOWN, (int)MULTI_ARG_3_SF },
32595 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
32596 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
32597 UNKNOWN, (int)MULTI_ARG_3_DF },
32598 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
32599 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
32600 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32601 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
32602 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
32603 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32604
32605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
32606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
32607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
32608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
32609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
32610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
32611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
32612
32613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
32616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
32617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
32618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
32619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
32620
32621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
32622
32623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32635
32636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
32638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
32639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
32640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
32641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
32642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
32643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
32644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
32646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
32647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
32648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
32650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
32651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
32652
32653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
32654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
32655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
32656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
32657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
32658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
32659
32660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
32662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
32663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
32665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
32668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
32669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
32671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32675
32676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
32677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
32678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
32679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
32680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
32681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
32682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
32683
32684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
32685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
32686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
32687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
32688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
32689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
32690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
32691
32692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
32693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
32694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
32695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
32696 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
32697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
32698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
32699
32700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
32701 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
32702 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
32703 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
32704 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
32705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
32706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
32707
32708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
32709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
32710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
32711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
32712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
32713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
32714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
32715
32716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
32717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
32718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
32719 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
32720 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
32721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
32722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
32723
32724 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
32725 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
32726 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
32727 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
32728 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
32729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
32730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
32731
32732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
32733 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
32734 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
32735 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
32736 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
32737 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
32738 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
32739
32740 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
32741 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
32742 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
32743 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
32744 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
32745 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
32746 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
32747 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
32748
32749 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
32750 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
32751 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
32752 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
32753 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
32754 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
32755 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
32756 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
32757
32758 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
32759 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
32760 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
32761 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
32762
32763 };
32764 \f
32765 /* TM vector builtins. */
32766
32767 /* Reuse the existing x86-specific `struct builtin_description' cause
32768 we're lazy. Add casts to make them fit. */
32769 static const struct builtin_description bdesc_tm[] =
32770 {
32771 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32772 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32773 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32774 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32775 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32776 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32777 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32778
32779 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32780 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32781 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32782 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32783 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32784 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32785 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32786
32787 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32788 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32789 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32790 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32791 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32792 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32793 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32794
32795 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
32796 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
32797 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
32798 };
32799
32800 /* TM callbacks. */
32801
32802 /* Return the builtin decl needed to load a vector of TYPE. */
32803
32804 static tree
32805 ix86_builtin_tm_load (tree type)
32806 {
32807 if (TREE_CODE (type) == VECTOR_TYPE)
32808 {
32809 switch (tree_to_uhwi (TYPE_SIZE (type)))
32810 {
32811 case 64:
32812 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
32813 case 128:
32814 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
32815 case 256:
32816 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
32817 }
32818 }
32819 return NULL_TREE;
32820 }
32821
32822 /* Return the builtin decl needed to store a vector of TYPE. */
32823
32824 static tree
32825 ix86_builtin_tm_store (tree type)
32826 {
32827 if (TREE_CODE (type) == VECTOR_TYPE)
32828 {
32829 switch (tree_to_uhwi (TYPE_SIZE (type)))
32830 {
32831 case 64:
32832 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
32833 case 128:
32834 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
32835 case 256:
32836 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
32837 }
32838 }
32839 return NULL_TREE;
32840 }
32841 \f
32842 /* Initialize the transactional memory vector load/store builtins. */
32843
32844 static void
32845 ix86_init_tm_builtins (void)
32846 {
32847 enum ix86_builtin_func_type ftype;
32848 const struct builtin_description *d;
32849 size_t i;
32850 tree decl;
32851 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
32852 tree attrs_log, attrs_type_log;
32853
32854 if (!flag_tm)
32855 return;
32856
32857 /* If there are no builtins defined, we must be compiling in a
32858 language without trans-mem support. */
32859 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
32860 return;
32861
32862 /* Use whatever attributes a normal TM load has. */
32863 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
32864 attrs_load = DECL_ATTRIBUTES (decl);
32865 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32866 /* Use whatever attributes a normal TM store has. */
32867 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
32868 attrs_store = DECL_ATTRIBUTES (decl);
32869 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32870 /* Use whatever attributes a normal TM log has. */
32871 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
32872 attrs_log = DECL_ATTRIBUTES (decl);
32873 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32874
32875 for (i = 0, d = bdesc_tm;
32876 i < ARRAY_SIZE (bdesc_tm);
32877 i++, d++)
32878 {
32879 if ((d->mask & ix86_isa_flags) != 0
32880 || (lang_hooks.builtin_function
32881 == lang_hooks.builtin_function_ext_scope))
32882 {
32883 tree type, attrs, attrs_type;
32884 enum built_in_function code = (enum built_in_function) d->code;
32885
32886 ftype = (enum ix86_builtin_func_type) d->flag;
32887 type = ix86_get_builtin_func_type (ftype);
32888
32889 if (BUILTIN_TM_LOAD_P (code))
32890 {
32891 attrs = attrs_load;
32892 attrs_type = attrs_type_load;
32893 }
32894 else if (BUILTIN_TM_STORE_P (code))
32895 {
32896 attrs = attrs_store;
32897 attrs_type = attrs_type_store;
32898 }
32899 else
32900 {
32901 attrs = attrs_log;
32902 attrs_type = attrs_type_log;
32903 }
32904 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
32905 /* The builtin without the prefix for
32906 calling it directly. */
32907 d->name + strlen ("__builtin_"),
32908 attrs);
32909 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
32910 set the TYPE_ATTRIBUTES. */
32911 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
32912
32913 set_builtin_decl (code, decl, false);
32914 }
32915 }
32916 }
32917
32918 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
32919 in the current target ISA to allow the user to compile particular modules
32920 with different target specific options that differ from the command line
32921 options. */
32922 static void
32923 ix86_init_mmx_sse_builtins (void)
32924 {
32925 const struct builtin_description * d;
32926 enum ix86_builtin_func_type ftype;
32927 size_t i;
32928
32929 /* Add all special builtins with variable number of operands. */
32930 for (i = 0, d = bdesc_special_args;
32931 i < ARRAY_SIZE (bdesc_special_args);
32932 i++, d++)
32933 {
32934 if (d->name == 0)
32935 continue;
32936
32937 ftype = (enum ix86_builtin_func_type) d->flag;
32938 def_builtin (d->mask, d->name, ftype, d->code);
32939 }
32940
32941 /* Add all builtins with variable number of operands. */
32942 for (i = 0, d = bdesc_args;
32943 i < ARRAY_SIZE (bdesc_args);
32944 i++, d++)
32945 {
32946 if (d->name == 0)
32947 continue;
32948
32949 ftype = (enum ix86_builtin_func_type) d->flag;
32950 def_builtin_const (d->mask, d->name, ftype, d->code);
32951 }
32952
32953 /* Add all builtins with rounding. */
32954 for (i = 0, d = bdesc_round_args;
32955 i < ARRAY_SIZE (bdesc_round_args);
32956 i++, d++)
32957 {
32958 if (d->name == 0)
32959 continue;
32960
32961 ftype = (enum ix86_builtin_func_type) d->flag;
32962 def_builtin_const (d->mask, d->name, ftype, d->code);
32963 }
32964
32965 /* pcmpestr[im] insns. */
32966 for (i = 0, d = bdesc_pcmpestr;
32967 i < ARRAY_SIZE (bdesc_pcmpestr);
32968 i++, d++)
32969 {
32970 if (d->code == IX86_BUILTIN_PCMPESTRM128)
32971 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
32972 else
32973 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
32974 def_builtin_const (d->mask, d->name, ftype, d->code);
32975 }
32976
32977 /* pcmpistr[im] insns. */
32978 for (i = 0, d = bdesc_pcmpistr;
32979 i < ARRAY_SIZE (bdesc_pcmpistr);
32980 i++, d++)
32981 {
32982 if (d->code == IX86_BUILTIN_PCMPISTRM128)
32983 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
32984 else
32985 ftype = INT_FTYPE_V16QI_V16QI_INT;
32986 def_builtin_const (d->mask, d->name, ftype, d->code);
32987 }
32988
32989 /* comi/ucomi insns. */
32990 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
32991 {
32992 if (d->mask == OPTION_MASK_ISA_SSE2)
32993 ftype = INT_FTYPE_V2DF_V2DF;
32994 else
32995 ftype = INT_FTYPE_V4SF_V4SF;
32996 def_builtin_const (d->mask, d->name, ftype, d->code);
32997 }
32998
32999 /* SSE */
33000 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33001 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33002 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33003 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33004
33005 /* SSE or 3DNow!A */
33006 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33007 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33008 IX86_BUILTIN_MASKMOVQ);
33009
33010 /* SSE2 */
33011 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33012 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33013
33014 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33015 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33016 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33017 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33018
33019 /* SSE3. */
33020 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33021 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33022 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33023 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33024
33025 /* AES */
33026 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33027 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33028 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33029 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33030 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33031 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33032 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33033 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33034 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33035 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33036 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33037 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33038
33039 /* PCLMUL */
33040 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33041 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33042
33043 /* RDRND */
33044 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33045 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33046 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33047 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33048 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33049 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33050 IX86_BUILTIN_RDRAND64_STEP);
33051
33052 /* AVX2 */
33053 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33054 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33055 IX86_BUILTIN_GATHERSIV2DF);
33056
33057 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33058 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33059 IX86_BUILTIN_GATHERSIV4DF);
33060
33061 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33062 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33063 IX86_BUILTIN_GATHERDIV2DF);
33064
33065 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33066 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33067 IX86_BUILTIN_GATHERDIV4DF);
33068
33069 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33070 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33071 IX86_BUILTIN_GATHERSIV4SF);
33072
33073 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33074 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33075 IX86_BUILTIN_GATHERSIV8SF);
33076
33077 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33078 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33079 IX86_BUILTIN_GATHERDIV4SF);
33080
33081 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33082 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33083 IX86_BUILTIN_GATHERDIV8SF);
33084
33085 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33086 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33087 IX86_BUILTIN_GATHERSIV2DI);
33088
33089 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33090 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33091 IX86_BUILTIN_GATHERSIV4DI);
33092
33093 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33094 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33095 IX86_BUILTIN_GATHERDIV2DI);
33096
33097 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33098 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33099 IX86_BUILTIN_GATHERDIV4DI);
33100
33101 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33102 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33103 IX86_BUILTIN_GATHERSIV4SI);
33104
33105 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33106 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33107 IX86_BUILTIN_GATHERSIV8SI);
33108
33109 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33110 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33111 IX86_BUILTIN_GATHERDIV4SI);
33112
33113 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33114 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33115 IX86_BUILTIN_GATHERDIV8SI);
33116
33117 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33118 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33119 IX86_BUILTIN_GATHERALTSIV4DF);
33120
33121 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33122 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33123 IX86_BUILTIN_GATHERALTDIV8SF);
33124
33125 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33126 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33127 IX86_BUILTIN_GATHERALTSIV4DI);
33128
33129 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33130 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33131 IX86_BUILTIN_GATHERALTDIV8SI);
33132
33133 /* AVX512F */
33134 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33135 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33136 IX86_BUILTIN_GATHER3SIV16SF);
33137
33138 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33139 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33140 IX86_BUILTIN_GATHER3SIV8DF);
33141
33142 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33143 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33144 IX86_BUILTIN_GATHER3DIV16SF);
33145
33146 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33147 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33148 IX86_BUILTIN_GATHER3DIV8DF);
33149
33150 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33151 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33152 IX86_BUILTIN_GATHER3SIV16SI);
33153
33154 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33155 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33156 IX86_BUILTIN_GATHER3SIV8DI);
33157
33158 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33159 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33160 IX86_BUILTIN_GATHER3DIV16SI);
33161
33162 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33163 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33164 IX86_BUILTIN_GATHER3DIV8DI);
33165
33166 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33167 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33168 IX86_BUILTIN_GATHER3ALTSIV8DF);
33169
33170 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33171 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33172 IX86_BUILTIN_GATHER3ALTDIV16SF);
33173
33174 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33175 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33176 IX86_BUILTIN_GATHER3ALTSIV8DI);
33177
33178 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33179 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33180 IX86_BUILTIN_GATHER3ALTDIV16SI);
33181
33182 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33183 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33184 IX86_BUILTIN_SCATTERSIV16SF);
33185
33186 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33187 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33188 IX86_BUILTIN_SCATTERSIV8DF);
33189
33190 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33191 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33192 IX86_BUILTIN_SCATTERDIV16SF);
33193
33194 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33195 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33196 IX86_BUILTIN_SCATTERDIV8DF);
33197
33198 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33199 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33200 IX86_BUILTIN_SCATTERSIV16SI);
33201
33202 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33203 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33204 IX86_BUILTIN_SCATTERSIV8DI);
33205
33206 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33207 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33208 IX86_BUILTIN_SCATTERDIV16SI);
33209
33210 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33211 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33212 IX86_BUILTIN_SCATTERDIV8DI);
33213
33214 /* AVX512VL */
33215 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33216 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33217 IX86_BUILTIN_GATHER3SIV2DF);
33218
33219 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33220 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33221 IX86_BUILTIN_GATHER3SIV4DF);
33222
33223 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33224 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33225 IX86_BUILTIN_GATHER3DIV2DF);
33226
33227 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33228 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33229 IX86_BUILTIN_GATHER3DIV4DF);
33230
33231 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33232 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33233 IX86_BUILTIN_GATHER3SIV4SF);
33234
33235 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33236 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33237 IX86_BUILTIN_GATHER3SIV8SF);
33238
33239 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33240 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33241 IX86_BUILTIN_GATHER3DIV4SF);
33242
33243 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33244 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33245 IX86_BUILTIN_GATHER3DIV8SF);
33246
33247 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33248 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33249 IX86_BUILTIN_GATHER3SIV2DI);
33250
33251 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33252 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33253 IX86_BUILTIN_GATHER3SIV4DI);
33254
33255 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33256 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33257 IX86_BUILTIN_GATHER3DIV2DI);
33258
33259 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33260 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33261 IX86_BUILTIN_GATHER3DIV4DI);
33262
33263 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33264 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33265 IX86_BUILTIN_GATHER3SIV4SI);
33266
33267 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33268 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33269 IX86_BUILTIN_GATHER3SIV8SI);
33270
33271 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33272 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33273 IX86_BUILTIN_GATHER3DIV4SI);
33274
33275 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33276 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33277 IX86_BUILTIN_GATHER3DIV8SI);
33278
33279 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33280 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33281 IX86_BUILTIN_GATHER3ALTSIV4DF);
33282
33283 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33284 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33285 IX86_BUILTIN_GATHER3ALTDIV8SF);
33286
33287 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33288 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33289 IX86_BUILTIN_GATHER3ALTSIV4DI);
33290
33291 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33292 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33293 IX86_BUILTIN_GATHER3ALTDIV8SI);
33294
33295 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33296 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33297 IX86_BUILTIN_SCATTERSIV8SF);
33298
33299 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33300 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33301 IX86_BUILTIN_SCATTERSIV4SF);
33302
33303 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33304 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33305 IX86_BUILTIN_SCATTERSIV4DF);
33306
33307 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33308 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33309 IX86_BUILTIN_SCATTERSIV2DF);
33310
33311 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33312 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33313 IX86_BUILTIN_SCATTERDIV8SF);
33314
33315 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33316 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33317 IX86_BUILTIN_SCATTERDIV4SF);
33318
33319 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33320 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33321 IX86_BUILTIN_SCATTERDIV4DF);
33322
33323 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33324 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33325 IX86_BUILTIN_SCATTERDIV2DF);
33326
33327 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33328 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33329 IX86_BUILTIN_SCATTERSIV8SI);
33330
33331 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33332 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33333 IX86_BUILTIN_SCATTERSIV4SI);
33334
33335 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33336 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33337 IX86_BUILTIN_SCATTERSIV4DI);
33338
33339 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33340 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33341 IX86_BUILTIN_SCATTERSIV2DI);
33342
33343 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33344 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33345 IX86_BUILTIN_SCATTERDIV8SI);
33346
33347 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33348 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33349 IX86_BUILTIN_SCATTERDIV4SI);
33350
33351 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33352 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33353 IX86_BUILTIN_SCATTERDIV4DI);
33354
33355 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33356 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33357 IX86_BUILTIN_SCATTERDIV2DI);
33358
33359 /* AVX512PF */
33360 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33361 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33362 IX86_BUILTIN_GATHERPFDPD);
33363 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33364 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33365 IX86_BUILTIN_GATHERPFDPS);
33366 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33367 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33368 IX86_BUILTIN_GATHERPFQPD);
33369 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33370 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33371 IX86_BUILTIN_GATHERPFQPS);
33372 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33373 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33374 IX86_BUILTIN_SCATTERPFDPD);
33375 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33376 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33377 IX86_BUILTIN_SCATTERPFDPS);
33378 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33379 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33380 IX86_BUILTIN_SCATTERPFQPD);
33381 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33382 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33383 IX86_BUILTIN_SCATTERPFQPS);
33384
33385 /* SHA */
33386 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33387 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33388 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33389 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33390 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33391 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33392 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33393 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33394 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33395 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33396 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33397 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33398 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33399 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33400
33401 /* RTM. */
33402 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33403 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33404
33405 /* MMX access to the vec_init patterns. */
33406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33407 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33408
33409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33410 V4HI_FTYPE_HI_HI_HI_HI,
33411 IX86_BUILTIN_VEC_INIT_V4HI);
33412
33413 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33414 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33415 IX86_BUILTIN_VEC_INIT_V8QI);
33416
33417 /* Access to the vec_extract patterns. */
33418 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33419 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33420 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33421 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33422 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33423 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33424 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33425 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33427 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33428
33429 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33430 "__builtin_ia32_vec_ext_v4hi",
33431 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33432
33433 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33434 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33435
33436 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33437 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33438
33439 /* Access to the vec_set patterns. */
33440 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33441 "__builtin_ia32_vec_set_v2di",
33442 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33443
33444 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33445 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33446
33447 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33448 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33449
33450 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33451 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33452
33453 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33454 "__builtin_ia32_vec_set_v4hi",
33455 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33456
33457 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33458 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33459
33460 /* RDSEED */
33461 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33462 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33463 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33464 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33465 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33466 "__builtin_ia32_rdseed_di_step",
33467 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33468
33469 /* ADCX */
33470 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33471 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33472 def_builtin (OPTION_MASK_ISA_64BIT,
33473 "__builtin_ia32_addcarryx_u64",
33474 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33475 IX86_BUILTIN_ADDCARRYX64);
33476
33477 /* SBB */
33478 def_builtin (0, "__builtin_ia32_sbb_u32",
33479 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33480 def_builtin (OPTION_MASK_ISA_64BIT,
33481 "__builtin_ia32_sbb_u64",
33482 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33483 IX86_BUILTIN_SBB64);
33484
33485 /* Read/write FLAGS. */
33486 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
33487 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33488 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
33489 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33490 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
33491 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
33492 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
33493 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
33494
33495 /* CLFLUSHOPT. */
33496 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
33497 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
33498
33499 /* Add FMA4 multi-arg argument instructions */
33500 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
33501 {
33502 if (d->name == 0)
33503 continue;
33504
33505 ftype = (enum ix86_builtin_func_type) d->flag;
33506 def_builtin_const (d->mask, d->name, ftype, d->code);
33507 }
33508 }
33509
33510 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
33511 to return a pointer to VERSION_DECL if the outcome of the expression
33512 formed by PREDICATE_CHAIN is true. This function will be called during
33513 version dispatch to decide which function version to execute. It returns
33514 the basic block at the end, to which more conditions can be added. */
33515
33516 static basic_block
33517 add_condition_to_bb (tree function_decl, tree version_decl,
33518 tree predicate_chain, basic_block new_bb)
33519 {
33520 gimple return_stmt;
33521 tree convert_expr, result_var;
33522 gimple convert_stmt;
33523 gimple call_cond_stmt;
33524 gimple if_else_stmt;
33525
33526 basic_block bb1, bb2, bb3;
33527 edge e12, e23;
33528
33529 tree cond_var, and_expr_var = NULL_TREE;
33530 gimple_seq gseq;
33531
33532 tree predicate_decl, predicate_arg;
33533
33534 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
33535
33536 gcc_assert (new_bb != NULL);
33537 gseq = bb_seq (new_bb);
33538
33539
33540 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
33541 build_fold_addr_expr (version_decl));
33542 result_var = create_tmp_var (ptr_type_node, NULL);
33543 convert_stmt = gimple_build_assign (result_var, convert_expr);
33544 return_stmt = gimple_build_return (result_var);
33545
33546 if (predicate_chain == NULL_TREE)
33547 {
33548 gimple_seq_add_stmt (&gseq, convert_stmt);
33549 gimple_seq_add_stmt (&gseq, return_stmt);
33550 set_bb_seq (new_bb, gseq);
33551 gimple_set_bb (convert_stmt, new_bb);
33552 gimple_set_bb (return_stmt, new_bb);
33553 pop_cfun ();
33554 return new_bb;
33555 }
33556
33557 while (predicate_chain != NULL)
33558 {
33559 cond_var = create_tmp_var (integer_type_node, NULL);
33560 predicate_decl = TREE_PURPOSE (predicate_chain);
33561 predicate_arg = TREE_VALUE (predicate_chain);
33562 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
33563 gimple_call_set_lhs (call_cond_stmt, cond_var);
33564
33565 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
33566 gimple_set_bb (call_cond_stmt, new_bb);
33567 gimple_seq_add_stmt (&gseq, call_cond_stmt);
33568
33569 predicate_chain = TREE_CHAIN (predicate_chain);
33570
33571 if (and_expr_var == NULL)
33572 and_expr_var = cond_var;
33573 else
33574 {
33575 gimple assign_stmt;
33576 /* Use MIN_EXPR to check if any integer is zero?.
33577 and_expr_var = min_expr <cond_var, and_expr_var> */
33578 assign_stmt = gimple_build_assign (and_expr_var,
33579 build2 (MIN_EXPR, integer_type_node,
33580 cond_var, and_expr_var));
33581
33582 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
33583 gimple_set_bb (assign_stmt, new_bb);
33584 gimple_seq_add_stmt (&gseq, assign_stmt);
33585 }
33586 }
33587
33588 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
33589 integer_zero_node,
33590 NULL_TREE, NULL_TREE);
33591 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
33592 gimple_set_bb (if_else_stmt, new_bb);
33593 gimple_seq_add_stmt (&gseq, if_else_stmt);
33594
33595 gimple_seq_add_stmt (&gseq, convert_stmt);
33596 gimple_seq_add_stmt (&gseq, return_stmt);
33597 set_bb_seq (new_bb, gseq);
33598
33599 bb1 = new_bb;
33600 e12 = split_block (bb1, if_else_stmt);
33601 bb2 = e12->dest;
33602 e12->flags &= ~EDGE_FALLTHRU;
33603 e12->flags |= EDGE_TRUE_VALUE;
33604
33605 e23 = split_block (bb2, return_stmt);
33606
33607 gimple_set_bb (convert_stmt, bb2);
33608 gimple_set_bb (return_stmt, bb2);
33609
33610 bb3 = e23->dest;
33611 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
33612
33613 remove_edge (e23);
33614 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
33615
33616 pop_cfun ();
33617
33618 return bb3;
33619 }
33620
33621 /* This parses the attribute arguments to target in DECL and determines
33622 the right builtin to use to match the platform specification.
33623 It returns the priority value for this version decl. If PREDICATE_LIST
33624 is not NULL, it stores the list of cpu features that need to be checked
33625 before dispatching this function. */
33626
33627 static unsigned int
33628 get_builtin_code_for_version (tree decl, tree *predicate_list)
33629 {
33630 tree attrs;
33631 struct cl_target_option cur_target;
33632 tree target_node;
33633 struct cl_target_option *new_target;
33634 const char *arg_str = NULL;
33635 const char *attrs_str = NULL;
33636 char *tok_str = NULL;
33637 char *token;
33638
33639 /* Priority of i386 features, greater value is higher priority. This is
33640 used to decide the order in which function dispatch must happen. For
33641 instance, a version specialized for SSE4.2 should be checked for dispatch
33642 before a version for SSE3, as SSE4.2 implies SSE3. */
33643 enum feature_priority
33644 {
33645 P_ZERO = 0,
33646 P_MMX,
33647 P_SSE,
33648 P_SSE2,
33649 P_SSE3,
33650 P_SSSE3,
33651 P_PROC_SSSE3,
33652 P_SSE4_A,
33653 P_PROC_SSE4_A,
33654 P_SSE4_1,
33655 P_SSE4_2,
33656 P_PROC_SSE4_2,
33657 P_POPCNT,
33658 P_AVX,
33659 P_PROC_AVX,
33660 P_FMA4,
33661 P_XOP,
33662 P_PROC_XOP,
33663 P_FMA,
33664 P_PROC_FMA,
33665 P_AVX2,
33666 P_PROC_AVX2
33667 };
33668
33669 enum feature_priority priority = P_ZERO;
33670
33671 /* These are the target attribute strings for which a dispatcher is
33672 available, from fold_builtin_cpu. */
33673
33674 static struct _feature_list
33675 {
33676 const char *const name;
33677 const enum feature_priority priority;
33678 }
33679 const feature_list[] =
33680 {
33681 {"mmx", P_MMX},
33682 {"sse", P_SSE},
33683 {"sse2", P_SSE2},
33684 {"sse3", P_SSE3},
33685 {"sse4a", P_SSE4_A},
33686 {"ssse3", P_SSSE3},
33687 {"sse4.1", P_SSE4_1},
33688 {"sse4.2", P_SSE4_2},
33689 {"popcnt", P_POPCNT},
33690 {"avx", P_AVX},
33691 {"fma4", P_FMA4},
33692 {"xop", P_XOP},
33693 {"fma", P_FMA},
33694 {"avx2", P_AVX2}
33695 };
33696
33697
33698 static unsigned int NUM_FEATURES
33699 = sizeof (feature_list) / sizeof (struct _feature_list);
33700
33701 unsigned int i;
33702
33703 tree predicate_chain = NULL_TREE;
33704 tree predicate_decl, predicate_arg;
33705
33706 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
33707 gcc_assert (attrs != NULL);
33708
33709 attrs = TREE_VALUE (TREE_VALUE (attrs));
33710
33711 gcc_assert (TREE_CODE (attrs) == STRING_CST);
33712 attrs_str = TREE_STRING_POINTER (attrs);
33713
33714 /* Return priority zero for default function. */
33715 if (strcmp (attrs_str, "default") == 0)
33716 return 0;
33717
33718 /* Handle arch= if specified. For priority, set it to be 1 more than
33719 the best instruction set the processor can handle. For instance, if
33720 there is a version for atom and a version for ssse3 (the highest ISA
33721 priority for atom), the atom version must be checked for dispatch
33722 before the ssse3 version. */
33723 if (strstr (attrs_str, "arch=") != NULL)
33724 {
33725 cl_target_option_save (&cur_target, &global_options);
33726 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
33727 &global_options_set);
33728
33729 gcc_assert (target_node);
33730 new_target = TREE_TARGET_OPTION (target_node);
33731 gcc_assert (new_target);
33732
33733 if (new_target->arch_specified && new_target->arch > 0)
33734 {
33735 switch (new_target->arch)
33736 {
33737 case PROCESSOR_CORE2:
33738 arg_str = "core2";
33739 priority = P_PROC_SSSE3;
33740 break;
33741 case PROCESSOR_NEHALEM:
33742 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
33743 arg_str = "westmere";
33744 else
33745 /* We translate "arch=corei7" and "arch=nehalem" to
33746 "corei7" so that it will be mapped to M_INTEL_COREI7
33747 as cpu type to cover all M_INTEL_COREI7_XXXs. */
33748 arg_str = "corei7";
33749 priority = P_PROC_SSE4_2;
33750 break;
33751 case PROCESSOR_SANDYBRIDGE:
33752 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
33753 arg_str = "ivybridge";
33754 else
33755 arg_str = "sandybridge";
33756 priority = P_PROC_AVX;
33757 break;
33758 case PROCESSOR_HASWELL:
33759 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
33760 arg_str = "broadwell";
33761 else
33762 arg_str = "haswell";
33763 priority = P_PROC_AVX2;
33764 break;
33765 case PROCESSOR_BONNELL:
33766 arg_str = "bonnell";
33767 priority = P_PROC_SSSE3;
33768 break;
33769 case PROCESSOR_SILVERMONT:
33770 arg_str = "silvermont";
33771 priority = P_PROC_SSE4_2;
33772 break;
33773 case PROCESSOR_AMDFAM10:
33774 arg_str = "amdfam10h";
33775 priority = P_PROC_SSE4_A;
33776 break;
33777 case PROCESSOR_BTVER1:
33778 arg_str = "btver1";
33779 priority = P_PROC_SSE4_A;
33780 break;
33781 case PROCESSOR_BTVER2:
33782 arg_str = "btver2";
33783 priority = P_PROC_AVX;
33784 break;
33785 case PROCESSOR_BDVER1:
33786 arg_str = "bdver1";
33787 priority = P_PROC_XOP;
33788 break;
33789 case PROCESSOR_BDVER2:
33790 arg_str = "bdver2";
33791 priority = P_PROC_FMA;
33792 break;
33793 case PROCESSOR_BDVER3:
33794 arg_str = "bdver3";
33795 priority = P_PROC_FMA;
33796 break;
33797 case PROCESSOR_BDVER4:
33798 arg_str = "bdver4";
33799 priority = P_PROC_AVX2;
33800 break;
33801 }
33802 }
33803
33804 cl_target_option_restore (&global_options, &cur_target);
33805
33806 if (predicate_list && arg_str == NULL)
33807 {
33808 error_at (DECL_SOURCE_LOCATION (decl),
33809 "No dispatcher found for the versioning attributes");
33810 return 0;
33811 }
33812
33813 if (predicate_list)
33814 {
33815 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
33816 /* For a C string literal the length includes the trailing NULL. */
33817 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
33818 predicate_chain = tree_cons (predicate_decl, predicate_arg,
33819 predicate_chain);
33820 }
33821 }
33822
33823 /* Process feature name. */
33824 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
33825 strcpy (tok_str, attrs_str);
33826 token = strtok (tok_str, ",");
33827 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
33828
33829 while (token != NULL)
33830 {
33831 /* Do not process "arch=" */
33832 if (strncmp (token, "arch=", 5) == 0)
33833 {
33834 token = strtok (NULL, ",");
33835 continue;
33836 }
33837 for (i = 0; i < NUM_FEATURES; ++i)
33838 {
33839 if (strcmp (token, feature_list[i].name) == 0)
33840 {
33841 if (predicate_list)
33842 {
33843 predicate_arg = build_string_literal (
33844 strlen (feature_list[i].name) + 1,
33845 feature_list[i].name);
33846 predicate_chain = tree_cons (predicate_decl, predicate_arg,
33847 predicate_chain);
33848 }
33849 /* Find the maximum priority feature. */
33850 if (feature_list[i].priority > priority)
33851 priority = feature_list[i].priority;
33852
33853 break;
33854 }
33855 }
33856 if (predicate_list && i == NUM_FEATURES)
33857 {
33858 error_at (DECL_SOURCE_LOCATION (decl),
33859 "No dispatcher found for %s", token);
33860 return 0;
33861 }
33862 token = strtok (NULL, ",");
33863 }
33864 free (tok_str);
33865
33866 if (predicate_list && predicate_chain == NULL_TREE)
33867 {
33868 error_at (DECL_SOURCE_LOCATION (decl),
33869 "No dispatcher found for the versioning attributes : %s",
33870 attrs_str);
33871 return 0;
33872 }
33873 else if (predicate_list)
33874 {
33875 predicate_chain = nreverse (predicate_chain);
33876 *predicate_list = predicate_chain;
33877 }
33878
33879 return priority;
33880 }
33881
33882 /* This compares the priority of target features in function DECL1
33883 and DECL2. It returns positive value if DECL1 is higher priority,
33884 negative value if DECL2 is higher priority and 0 if they are the
33885 same. */
33886
33887 static int
33888 ix86_compare_version_priority (tree decl1, tree decl2)
33889 {
33890 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
33891 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
33892
33893 return (int)priority1 - (int)priority2;
33894 }
33895
33896 /* V1 and V2 point to function versions with different priorities
33897 based on the target ISA. This function compares their priorities. */
33898
33899 static int
33900 feature_compare (const void *v1, const void *v2)
33901 {
33902 typedef struct _function_version_info
33903 {
33904 tree version_decl;
33905 tree predicate_chain;
33906 unsigned int dispatch_priority;
33907 } function_version_info;
33908
33909 const function_version_info c1 = *(const function_version_info *)v1;
33910 const function_version_info c2 = *(const function_version_info *)v2;
33911 return (c2.dispatch_priority - c1.dispatch_priority);
33912 }
33913
33914 /* This function generates the dispatch function for
33915 multi-versioned functions. DISPATCH_DECL is the function which will
33916 contain the dispatch logic. FNDECLS are the function choices for
33917 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
33918 in DISPATCH_DECL in which the dispatch code is generated. */
33919
33920 static int
33921 dispatch_function_versions (tree dispatch_decl,
33922 void *fndecls_p,
33923 basic_block *empty_bb)
33924 {
33925 tree default_decl;
33926 gimple ifunc_cpu_init_stmt;
33927 gimple_seq gseq;
33928 int ix;
33929 tree ele;
33930 vec<tree> *fndecls;
33931 unsigned int num_versions = 0;
33932 unsigned int actual_versions = 0;
33933 unsigned int i;
33934
33935 struct _function_version_info
33936 {
33937 tree version_decl;
33938 tree predicate_chain;
33939 unsigned int dispatch_priority;
33940 }*function_version_info;
33941
33942 gcc_assert (dispatch_decl != NULL
33943 && fndecls_p != NULL
33944 && empty_bb != NULL);
33945
33946 /*fndecls_p is actually a vector. */
33947 fndecls = static_cast<vec<tree> *> (fndecls_p);
33948
33949 /* At least one more version other than the default. */
33950 num_versions = fndecls->length ();
33951 gcc_assert (num_versions >= 2);
33952
33953 function_version_info = (struct _function_version_info *)
33954 XNEWVEC (struct _function_version_info, (num_versions - 1));
33955
33956 /* The first version in the vector is the default decl. */
33957 default_decl = (*fndecls)[0];
33958
33959 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
33960
33961 gseq = bb_seq (*empty_bb);
33962 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
33963 constructors, so explicity call __builtin_cpu_init here. */
33964 ifunc_cpu_init_stmt = gimple_build_call_vec (
33965 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
33966 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
33967 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
33968 set_bb_seq (*empty_bb, gseq);
33969
33970 pop_cfun ();
33971
33972
33973 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
33974 {
33975 tree version_decl = ele;
33976 tree predicate_chain = NULL_TREE;
33977 unsigned int priority;
33978 /* Get attribute string, parse it and find the right predicate decl.
33979 The predicate function could be a lengthy combination of many
33980 features, like arch-type and various isa-variants. */
33981 priority = get_builtin_code_for_version (version_decl,
33982 &predicate_chain);
33983
33984 if (predicate_chain == NULL_TREE)
33985 continue;
33986
33987 function_version_info [actual_versions].version_decl = version_decl;
33988 function_version_info [actual_versions].predicate_chain
33989 = predicate_chain;
33990 function_version_info [actual_versions].dispatch_priority = priority;
33991 actual_versions++;
33992 }
33993
33994 /* Sort the versions according to descending order of dispatch priority. The
33995 priority is based on the ISA. This is not a perfect solution. There
33996 could still be ambiguity. If more than one function version is suitable
33997 to execute, which one should be dispatched? In future, allow the user
33998 to specify a dispatch priority next to the version. */
33999 qsort (function_version_info, actual_versions,
34000 sizeof (struct _function_version_info), feature_compare);
34001
34002 for (i = 0; i < actual_versions; ++i)
34003 *empty_bb = add_condition_to_bb (dispatch_decl,
34004 function_version_info[i].version_decl,
34005 function_version_info[i].predicate_chain,
34006 *empty_bb);
34007
34008 /* dispatch default version at the end. */
34009 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34010 NULL, *empty_bb);
34011
34012 free (function_version_info);
34013 return 0;
34014 }
34015
34016 /* Comparator function to be used in qsort routine to sort attribute
34017 specification strings to "target". */
34018
34019 static int
34020 attr_strcmp (const void *v1, const void *v2)
34021 {
34022 const char *c1 = *(char *const*)v1;
34023 const char *c2 = *(char *const*)v2;
34024 return strcmp (c1, c2);
34025 }
34026
34027 /* ARGLIST is the argument to target attribute. This function tokenizes
34028 the comma separated arguments, sorts them and returns a string which
34029 is a unique identifier for the comma separated arguments. It also
34030 replaces non-identifier characters "=,-" with "_". */
34031
34032 static char *
34033 sorted_attr_string (tree arglist)
34034 {
34035 tree arg;
34036 size_t str_len_sum = 0;
34037 char **args = NULL;
34038 char *attr_str, *ret_str;
34039 char *attr = NULL;
34040 unsigned int argnum = 1;
34041 unsigned int i;
34042
34043 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34044 {
34045 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34046 size_t len = strlen (str);
34047 str_len_sum += len + 1;
34048 if (arg != arglist)
34049 argnum++;
34050 for (i = 0; i < strlen (str); i++)
34051 if (str[i] == ',')
34052 argnum++;
34053 }
34054
34055 attr_str = XNEWVEC (char, str_len_sum);
34056 str_len_sum = 0;
34057 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34058 {
34059 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34060 size_t len = strlen (str);
34061 memcpy (attr_str + str_len_sum, str, len);
34062 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34063 str_len_sum += len + 1;
34064 }
34065
34066 /* Replace "=,-" with "_". */
34067 for (i = 0; i < strlen (attr_str); i++)
34068 if (attr_str[i] == '=' || attr_str[i]== '-')
34069 attr_str[i] = '_';
34070
34071 if (argnum == 1)
34072 return attr_str;
34073
34074 args = XNEWVEC (char *, argnum);
34075
34076 i = 0;
34077 attr = strtok (attr_str, ",");
34078 while (attr != NULL)
34079 {
34080 args[i] = attr;
34081 i++;
34082 attr = strtok (NULL, ",");
34083 }
34084
34085 qsort (args, argnum, sizeof (char *), attr_strcmp);
34086
34087 ret_str = XNEWVEC (char, str_len_sum);
34088 str_len_sum = 0;
34089 for (i = 0; i < argnum; i++)
34090 {
34091 size_t len = strlen (args[i]);
34092 memcpy (ret_str + str_len_sum, args[i], len);
34093 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34094 str_len_sum += len + 1;
34095 }
34096
34097 XDELETEVEC (args);
34098 XDELETEVEC (attr_str);
34099 return ret_str;
34100 }
34101
34102 /* This function changes the assembler name for functions that are
34103 versions. If DECL is a function version and has a "target"
34104 attribute, it appends the attribute string to its assembler name. */
34105
34106 static tree
34107 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34108 {
34109 tree version_attr;
34110 const char *orig_name, *version_string;
34111 char *attr_str, *assembler_name;
34112
34113 if (DECL_DECLARED_INLINE_P (decl)
34114 && lookup_attribute ("gnu_inline",
34115 DECL_ATTRIBUTES (decl)))
34116 error_at (DECL_SOURCE_LOCATION (decl),
34117 "Function versions cannot be marked as gnu_inline,"
34118 " bodies have to be generated");
34119
34120 if (DECL_VIRTUAL_P (decl)
34121 || DECL_VINDEX (decl))
34122 sorry ("Virtual function multiversioning not supported");
34123
34124 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34125
34126 /* target attribute string cannot be NULL. */
34127 gcc_assert (version_attr != NULL_TREE);
34128
34129 orig_name = IDENTIFIER_POINTER (id);
34130 version_string
34131 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34132
34133 if (strcmp (version_string, "default") == 0)
34134 return id;
34135
34136 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34137 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34138
34139 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34140
34141 /* Allow assembler name to be modified if already set. */
34142 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34143 SET_DECL_RTL (decl, NULL);
34144
34145 tree ret = get_identifier (assembler_name);
34146 XDELETEVEC (attr_str);
34147 XDELETEVEC (assembler_name);
34148 return ret;
34149 }
34150
34151 /* This function returns true if FN1 and FN2 are versions of the same function,
34152 that is, the target strings of the function decls are different. This assumes
34153 that FN1 and FN2 have the same signature. */
34154
34155 static bool
34156 ix86_function_versions (tree fn1, tree fn2)
34157 {
34158 tree attr1, attr2;
34159 char *target1, *target2;
34160 bool result;
34161
34162 if (TREE_CODE (fn1) != FUNCTION_DECL
34163 || TREE_CODE (fn2) != FUNCTION_DECL)
34164 return false;
34165
34166 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34167 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34168
34169 /* At least one function decl should have the target attribute specified. */
34170 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34171 return false;
34172
34173 /* Diagnose missing target attribute if one of the decls is already
34174 multi-versioned. */
34175 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34176 {
34177 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34178 {
34179 if (attr2 != NULL_TREE)
34180 {
34181 tree tem = fn1;
34182 fn1 = fn2;
34183 fn2 = tem;
34184 attr1 = attr2;
34185 }
34186 error_at (DECL_SOURCE_LOCATION (fn2),
34187 "missing %<target%> attribute for multi-versioned %D",
34188 fn2);
34189 inform (DECL_SOURCE_LOCATION (fn1),
34190 "previous declaration of %D", fn1);
34191 /* Prevent diagnosing of the same error multiple times. */
34192 DECL_ATTRIBUTES (fn2)
34193 = tree_cons (get_identifier ("target"),
34194 copy_node (TREE_VALUE (attr1)),
34195 DECL_ATTRIBUTES (fn2));
34196 }
34197 return false;
34198 }
34199
34200 target1 = sorted_attr_string (TREE_VALUE (attr1));
34201 target2 = sorted_attr_string (TREE_VALUE (attr2));
34202
34203 /* The sorted target strings must be different for fn1 and fn2
34204 to be versions. */
34205 if (strcmp (target1, target2) == 0)
34206 result = false;
34207 else
34208 result = true;
34209
34210 XDELETEVEC (target1);
34211 XDELETEVEC (target2);
34212
34213 return result;
34214 }
34215
34216 static tree
34217 ix86_mangle_decl_assembler_name (tree decl, tree id)
34218 {
34219 /* For function version, add the target suffix to the assembler name. */
34220 if (TREE_CODE (decl) == FUNCTION_DECL
34221 && DECL_FUNCTION_VERSIONED (decl))
34222 id = ix86_mangle_function_version_assembler_name (decl, id);
34223 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34224 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34225 #endif
34226
34227 return id;
34228 }
34229
34230 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34231 is true, append the full path name of the source file. */
34232
34233 static char *
34234 make_name (tree decl, const char *suffix, bool make_unique)
34235 {
34236 char *global_var_name;
34237 int name_len;
34238 const char *name;
34239 const char *unique_name = NULL;
34240
34241 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34242
34243 /* Get a unique name that can be used globally without any chances
34244 of collision at link time. */
34245 if (make_unique)
34246 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34247
34248 name_len = strlen (name) + strlen (suffix) + 2;
34249
34250 if (make_unique)
34251 name_len += strlen (unique_name) + 1;
34252 global_var_name = XNEWVEC (char, name_len);
34253
34254 /* Use '.' to concatenate names as it is demangler friendly. */
34255 if (make_unique)
34256 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34257 suffix);
34258 else
34259 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34260
34261 return global_var_name;
34262 }
34263
34264 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34265
34266 /* Make a dispatcher declaration for the multi-versioned function DECL.
34267 Calls to DECL function will be replaced with calls to the dispatcher
34268 by the front-end. Return the decl created. */
34269
34270 static tree
34271 make_dispatcher_decl (const tree decl)
34272 {
34273 tree func_decl;
34274 char *func_name;
34275 tree fn_type, func_type;
34276 bool is_uniq = false;
34277
34278 if (TREE_PUBLIC (decl) == 0)
34279 is_uniq = true;
34280
34281 func_name = make_name (decl, "ifunc", is_uniq);
34282
34283 fn_type = TREE_TYPE (decl);
34284 func_type = build_function_type (TREE_TYPE (fn_type),
34285 TYPE_ARG_TYPES (fn_type));
34286
34287 func_decl = build_fn_decl (func_name, func_type);
34288 XDELETEVEC (func_name);
34289 TREE_USED (func_decl) = 1;
34290 DECL_CONTEXT (func_decl) = NULL_TREE;
34291 DECL_INITIAL (func_decl) = error_mark_node;
34292 DECL_ARTIFICIAL (func_decl) = 1;
34293 /* Mark this func as external, the resolver will flip it again if
34294 it gets generated. */
34295 DECL_EXTERNAL (func_decl) = 1;
34296 /* This will be of type IFUNCs have to be externally visible. */
34297 TREE_PUBLIC (func_decl) = 1;
34298
34299 return func_decl;
34300 }
34301
34302 #endif
34303
34304 /* Returns true if decl is multi-versioned and DECL is the default function,
34305 that is it is not tagged with target specific optimization. */
34306
34307 static bool
34308 is_function_default_version (const tree decl)
34309 {
34310 if (TREE_CODE (decl) != FUNCTION_DECL
34311 || !DECL_FUNCTION_VERSIONED (decl))
34312 return false;
34313 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34314 gcc_assert (attr);
34315 attr = TREE_VALUE (TREE_VALUE (attr));
34316 return (TREE_CODE (attr) == STRING_CST
34317 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34318 }
34319
34320 /* Make a dispatcher declaration for the multi-versioned function DECL.
34321 Calls to DECL function will be replaced with calls to the dispatcher
34322 by the front-end. Returns the decl of the dispatcher function. */
34323
34324 static tree
34325 ix86_get_function_versions_dispatcher (void *decl)
34326 {
34327 tree fn = (tree) decl;
34328 struct cgraph_node *node = NULL;
34329 struct cgraph_node *default_node = NULL;
34330 struct cgraph_function_version_info *node_v = NULL;
34331 struct cgraph_function_version_info *first_v = NULL;
34332
34333 tree dispatch_decl = NULL;
34334
34335 struct cgraph_function_version_info *default_version_info = NULL;
34336
34337 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34338
34339 node = cgraph_node::get (fn);
34340 gcc_assert (node != NULL);
34341
34342 node_v = node->function_version ();
34343 gcc_assert (node_v != NULL);
34344
34345 if (node_v->dispatcher_resolver != NULL)
34346 return node_v->dispatcher_resolver;
34347
34348 /* Find the default version and make it the first node. */
34349 first_v = node_v;
34350 /* Go to the beginning of the chain. */
34351 while (first_v->prev != NULL)
34352 first_v = first_v->prev;
34353 default_version_info = first_v;
34354 while (default_version_info != NULL)
34355 {
34356 if (is_function_default_version
34357 (default_version_info->this_node->decl))
34358 break;
34359 default_version_info = default_version_info->next;
34360 }
34361
34362 /* If there is no default node, just return NULL. */
34363 if (default_version_info == NULL)
34364 return NULL;
34365
34366 /* Make default info the first node. */
34367 if (first_v != default_version_info)
34368 {
34369 default_version_info->prev->next = default_version_info->next;
34370 if (default_version_info->next)
34371 default_version_info->next->prev = default_version_info->prev;
34372 first_v->prev = default_version_info;
34373 default_version_info->next = first_v;
34374 default_version_info->prev = NULL;
34375 }
34376
34377 default_node = default_version_info->this_node;
34378
34379 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34380 if (targetm.has_ifunc_p ())
34381 {
34382 struct cgraph_function_version_info *it_v = NULL;
34383 struct cgraph_node *dispatcher_node = NULL;
34384 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34385
34386 /* Right now, the dispatching is done via ifunc. */
34387 dispatch_decl = make_dispatcher_decl (default_node->decl);
34388
34389 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34390 gcc_assert (dispatcher_node != NULL);
34391 dispatcher_node->dispatcher_function = 1;
34392 dispatcher_version_info
34393 = dispatcher_node->insert_new_function_version ();
34394 dispatcher_version_info->next = default_version_info;
34395 dispatcher_node->definition = 1;
34396
34397 /* Set the dispatcher for all the versions. */
34398 it_v = default_version_info;
34399 while (it_v != NULL)
34400 {
34401 it_v->dispatcher_resolver = dispatch_decl;
34402 it_v = it_v->next;
34403 }
34404 }
34405 else
34406 #endif
34407 {
34408 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34409 "multiversioning needs ifunc which is not supported "
34410 "on this target");
34411 }
34412
34413 return dispatch_decl;
34414 }
34415
34416 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34417 it to CHAIN. */
34418
34419 static tree
34420 make_attribute (const char *name, const char *arg_name, tree chain)
34421 {
34422 tree attr_name;
34423 tree attr_arg_name;
34424 tree attr_args;
34425 tree attr;
34426
34427 attr_name = get_identifier (name);
34428 attr_arg_name = build_string (strlen (arg_name), arg_name);
34429 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
34430 attr = tree_cons (attr_name, attr_args, chain);
34431 return attr;
34432 }
34433
34434 /* Make the resolver function decl to dispatch the versions of
34435 a multi-versioned function, DEFAULT_DECL. Create an
34436 empty basic block in the resolver and store the pointer in
34437 EMPTY_BB. Return the decl of the resolver function. */
34438
34439 static tree
34440 make_resolver_func (const tree default_decl,
34441 const tree dispatch_decl,
34442 basic_block *empty_bb)
34443 {
34444 char *resolver_name;
34445 tree decl, type, decl_name, t;
34446 bool is_uniq = false;
34447
34448 /* IFUNC's have to be globally visible. So, if the default_decl is
34449 not, then the name of the IFUNC should be made unique. */
34450 if (TREE_PUBLIC (default_decl) == 0)
34451 is_uniq = true;
34452
34453 /* Append the filename to the resolver function if the versions are
34454 not externally visible. This is because the resolver function has
34455 to be externally visible for the loader to find it. So, appending
34456 the filename will prevent conflicts with a resolver function from
34457 another module which is based on the same version name. */
34458 resolver_name = make_name (default_decl, "resolver", is_uniq);
34459
34460 /* The resolver function should return a (void *). */
34461 type = build_function_type_list (ptr_type_node, NULL_TREE);
34462
34463 decl = build_fn_decl (resolver_name, type);
34464 decl_name = get_identifier (resolver_name);
34465 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
34466
34467 DECL_NAME (decl) = decl_name;
34468 TREE_USED (decl) = 1;
34469 DECL_ARTIFICIAL (decl) = 1;
34470 DECL_IGNORED_P (decl) = 0;
34471 /* IFUNC resolvers have to be externally visible. */
34472 TREE_PUBLIC (decl) = 1;
34473 DECL_UNINLINABLE (decl) = 1;
34474
34475 /* Resolver is not external, body is generated. */
34476 DECL_EXTERNAL (decl) = 0;
34477 DECL_EXTERNAL (dispatch_decl) = 0;
34478
34479 DECL_CONTEXT (decl) = NULL_TREE;
34480 DECL_INITIAL (decl) = make_node (BLOCK);
34481 DECL_STATIC_CONSTRUCTOR (decl) = 0;
34482
34483 if (DECL_COMDAT_GROUP (default_decl)
34484 || TREE_PUBLIC (default_decl))
34485 {
34486 /* In this case, each translation unit with a call to this
34487 versioned function will put out a resolver. Ensure it
34488 is comdat to keep just one copy. */
34489 DECL_COMDAT (decl) = 1;
34490 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
34491 }
34492 /* Build result decl and add to function_decl. */
34493 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
34494 DECL_ARTIFICIAL (t) = 1;
34495 DECL_IGNORED_P (t) = 1;
34496 DECL_RESULT (decl) = t;
34497
34498 gimplify_function_tree (decl);
34499 push_cfun (DECL_STRUCT_FUNCTION (decl));
34500 *empty_bb = init_lowered_empty_function (decl, false);
34501
34502 cgraph_node::add_new_function (decl, true);
34503 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
34504
34505 pop_cfun ();
34506
34507 gcc_assert (dispatch_decl != NULL);
34508 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
34509 DECL_ATTRIBUTES (dispatch_decl)
34510 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
34511
34512 /* Create the alias for dispatch to resolver here. */
34513 /*cgraph_create_function_alias (dispatch_decl, decl);*/
34514 cgraph_node::create_same_body_alias (dispatch_decl, decl);
34515 XDELETEVEC (resolver_name);
34516 return decl;
34517 }
34518
34519 /* Generate the dispatching code body to dispatch multi-versioned function
34520 DECL. The target hook is called to process the "target" attributes and
34521 provide the code to dispatch the right function at run-time. NODE points
34522 to the dispatcher decl whose body will be created. */
34523
34524 static tree
34525 ix86_generate_version_dispatcher_body (void *node_p)
34526 {
34527 tree resolver_decl;
34528 basic_block empty_bb;
34529 tree default_ver_decl;
34530 struct cgraph_node *versn;
34531 struct cgraph_node *node;
34532
34533 struct cgraph_function_version_info *node_version_info = NULL;
34534 struct cgraph_function_version_info *versn_info = NULL;
34535
34536 node = (cgraph_node *)node_p;
34537
34538 node_version_info = node->function_version ();
34539 gcc_assert (node->dispatcher_function
34540 && node_version_info != NULL);
34541
34542 if (node_version_info->dispatcher_resolver)
34543 return node_version_info->dispatcher_resolver;
34544
34545 /* The first version in the chain corresponds to the default version. */
34546 default_ver_decl = node_version_info->next->this_node->decl;
34547
34548 /* node is going to be an alias, so remove the finalized bit. */
34549 node->definition = false;
34550
34551 resolver_decl = make_resolver_func (default_ver_decl,
34552 node->decl, &empty_bb);
34553
34554 node_version_info->dispatcher_resolver = resolver_decl;
34555
34556 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
34557
34558 auto_vec<tree, 2> fn_ver_vec;
34559
34560 for (versn_info = node_version_info->next; versn_info;
34561 versn_info = versn_info->next)
34562 {
34563 versn = versn_info->this_node;
34564 /* Check for virtual functions here again, as by this time it should
34565 have been determined if this function needs a vtable index or
34566 not. This happens for methods in derived classes that override
34567 virtual methods in base classes but are not explicitly marked as
34568 virtual. */
34569 if (DECL_VINDEX (versn->decl))
34570 sorry ("Virtual function multiversioning not supported");
34571
34572 fn_ver_vec.safe_push (versn->decl);
34573 }
34574
34575 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
34576 cgraph_edge::rebuild_edges ();
34577 pop_cfun ();
34578 return resolver_decl;
34579 }
34580 /* This builds the processor_model struct type defined in
34581 libgcc/config/i386/cpuinfo.c */
34582
34583 static tree
34584 build_processor_model_struct (void)
34585 {
34586 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
34587 "__cpu_features"};
34588 tree field = NULL_TREE, field_chain = NULL_TREE;
34589 int i;
34590 tree type = make_node (RECORD_TYPE);
34591
34592 /* The first 3 fields are unsigned int. */
34593 for (i = 0; i < 3; ++i)
34594 {
34595 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
34596 get_identifier (field_name[i]), unsigned_type_node);
34597 if (field_chain != NULL_TREE)
34598 DECL_CHAIN (field) = field_chain;
34599 field_chain = field;
34600 }
34601
34602 /* The last field is an array of unsigned integers of size one. */
34603 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
34604 get_identifier (field_name[3]),
34605 build_array_type (unsigned_type_node,
34606 build_index_type (size_one_node)));
34607 if (field_chain != NULL_TREE)
34608 DECL_CHAIN (field) = field_chain;
34609 field_chain = field;
34610
34611 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
34612 return type;
34613 }
34614
34615 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
34616
34617 static tree
34618 make_var_decl (tree type, const char *name)
34619 {
34620 tree new_decl;
34621
34622 new_decl = build_decl (UNKNOWN_LOCATION,
34623 VAR_DECL,
34624 get_identifier(name),
34625 type);
34626
34627 DECL_EXTERNAL (new_decl) = 1;
34628 TREE_STATIC (new_decl) = 1;
34629 TREE_PUBLIC (new_decl) = 1;
34630 DECL_INITIAL (new_decl) = 0;
34631 DECL_ARTIFICIAL (new_decl) = 0;
34632 DECL_PRESERVE_P (new_decl) = 1;
34633
34634 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
34635 assemble_variable (new_decl, 0, 0, 0);
34636
34637 return new_decl;
34638 }
34639
34640 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
34641 into an integer defined in libgcc/config/i386/cpuinfo.c */
34642
34643 static tree
34644 fold_builtin_cpu (tree fndecl, tree *args)
34645 {
34646 unsigned int i;
34647 enum ix86_builtins fn_code = (enum ix86_builtins)
34648 DECL_FUNCTION_CODE (fndecl);
34649 tree param_string_cst = NULL;
34650
34651 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
34652 enum processor_features
34653 {
34654 F_CMOV = 0,
34655 F_MMX,
34656 F_POPCNT,
34657 F_SSE,
34658 F_SSE2,
34659 F_SSE3,
34660 F_SSSE3,
34661 F_SSE4_1,
34662 F_SSE4_2,
34663 F_AVX,
34664 F_AVX2,
34665 F_SSE4_A,
34666 F_FMA4,
34667 F_XOP,
34668 F_FMA,
34669 F_MAX
34670 };
34671
34672 /* These are the values for vendor types and cpu types and subtypes
34673 in cpuinfo.c. Cpu types and subtypes should be subtracted by
34674 the corresponding start value. */
34675 enum processor_model
34676 {
34677 M_INTEL = 1,
34678 M_AMD,
34679 M_CPU_TYPE_START,
34680 M_INTEL_BONNELL,
34681 M_INTEL_CORE2,
34682 M_INTEL_COREI7,
34683 M_AMDFAM10H,
34684 M_AMDFAM15H,
34685 M_INTEL_SILVERMONT,
34686 M_AMD_BTVER1,
34687 M_AMD_BTVER2,
34688 M_CPU_SUBTYPE_START,
34689 M_INTEL_COREI7_NEHALEM,
34690 M_INTEL_COREI7_WESTMERE,
34691 M_INTEL_COREI7_SANDYBRIDGE,
34692 M_AMDFAM10H_BARCELONA,
34693 M_AMDFAM10H_SHANGHAI,
34694 M_AMDFAM10H_ISTANBUL,
34695 M_AMDFAM15H_BDVER1,
34696 M_AMDFAM15H_BDVER2,
34697 M_AMDFAM15H_BDVER3,
34698 M_AMDFAM15H_BDVER4,
34699 M_INTEL_COREI7_IVYBRIDGE,
34700 M_INTEL_COREI7_HASWELL
34701 };
34702
34703 static struct _arch_names_table
34704 {
34705 const char *const name;
34706 const enum processor_model model;
34707 }
34708 const arch_names_table[] =
34709 {
34710 {"amd", M_AMD},
34711 {"intel", M_INTEL},
34712 {"atom", M_INTEL_BONNELL},
34713 {"slm", M_INTEL_SILVERMONT},
34714 {"core2", M_INTEL_CORE2},
34715 {"corei7", M_INTEL_COREI7},
34716 {"nehalem", M_INTEL_COREI7_NEHALEM},
34717 {"westmere", M_INTEL_COREI7_WESTMERE},
34718 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
34719 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
34720 {"haswell", M_INTEL_COREI7_HASWELL},
34721 {"bonnell", M_INTEL_BONNELL},
34722 {"silvermont", M_INTEL_SILVERMONT},
34723 {"amdfam10h", M_AMDFAM10H},
34724 {"barcelona", M_AMDFAM10H_BARCELONA},
34725 {"shanghai", M_AMDFAM10H_SHANGHAI},
34726 {"istanbul", M_AMDFAM10H_ISTANBUL},
34727 {"btver1", M_AMD_BTVER1},
34728 {"amdfam15h", M_AMDFAM15H},
34729 {"bdver1", M_AMDFAM15H_BDVER1},
34730 {"bdver2", M_AMDFAM15H_BDVER2},
34731 {"bdver3", M_AMDFAM15H_BDVER3},
34732 {"bdver4", M_AMDFAM15H_BDVER4},
34733 {"btver2", M_AMD_BTVER2},
34734 };
34735
34736 static struct _isa_names_table
34737 {
34738 const char *const name;
34739 const enum processor_features feature;
34740 }
34741 const isa_names_table[] =
34742 {
34743 {"cmov", F_CMOV},
34744 {"mmx", F_MMX},
34745 {"popcnt", F_POPCNT},
34746 {"sse", F_SSE},
34747 {"sse2", F_SSE2},
34748 {"sse3", F_SSE3},
34749 {"ssse3", F_SSSE3},
34750 {"sse4a", F_SSE4_A},
34751 {"sse4.1", F_SSE4_1},
34752 {"sse4.2", F_SSE4_2},
34753 {"avx", F_AVX},
34754 {"fma4", F_FMA4},
34755 {"xop", F_XOP},
34756 {"fma", F_FMA},
34757 {"avx2", F_AVX2}
34758 };
34759
34760 tree __processor_model_type = build_processor_model_struct ();
34761 tree __cpu_model_var = make_var_decl (__processor_model_type,
34762 "__cpu_model");
34763
34764
34765 varpool_node::add (__cpu_model_var);
34766
34767 gcc_assert ((args != NULL) && (*args != NULL));
34768
34769 param_string_cst = *args;
34770 while (param_string_cst
34771 && TREE_CODE (param_string_cst) != STRING_CST)
34772 {
34773 /* *args must be a expr that can contain other EXPRS leading to a
34774 STRING_CST. */
34775 if (!EXPR_P (param_string_cst))
34776 {
34777 error ("Parameter to builtin must be a string constant or literal");
34778 return integer_zero_node;
34779 }
34780 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
34781 }
34782
34783 gcc_assert (param_string_cst);
34784
34785 if (fn_code == IX86_BUILTIN_CPU_IS)
34786 {
34787 tree ref;
34788 tree field;
34789 tree final;
34790
34791 unsigned int field_val = 0;
34792 unsigned int NUM_ARCH_NAMES
34793 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
34794
34795 for (i = 0; i < NUM_ARCH_NAMES; i++)
34796 if (strcmp (arch_names_table[i].name,
34797 TREE_STRING_POINTER (param_string_cst)) == 0)
34798 break;
34799
34800 if (i == NUM_ARCH_NAMES)
34801 {
34802 error ("Parameter to builtin not valid: %s",
34803 TREE_STRING_POINTER (param_string_cst));
34804 return integer_zero_node;
34805 }
34806
34807 field = TYPE_FIELDS (__processor_model_type);
34808 field_val = arch_names_table[i].model;
34809
34810 /* CPU types are stored in the next field. */
34811 if (field_val > M_CPU_TYPE_START
34812 && field_val < M_CPU_SUBTYPE_START)
34813 {
34814 field = DECL_CHAIN (field);
34815 field_val -= M_CPU_TYPE_START;
34816 }
34817
34818 /* CPU subtypes are stored in the next field. */
34819 if (field_val > M_CPU_SUBTYPE_START)
34820 {
34821 field = DECL_CHAIN ( DECL_CHAIN (field));
34822 field_val -= M_CPU_SUBTYPE_START;
34823 }
34824
34825 /* Get the appropriate field in __cpu_model. */
34826 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
34827 field, NULL_TREE);
34828
34829 /* Check the value. */
34830 final = build2 (EQ_EXPR, unsigned_type_node, ref,
34831 build_int_cstu (unsigned_type_node, field_val));
34832 return build1 (CONVERT_EXPR, integer_type_node, final);
34833 }
34834 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
34835 {
34836 tree ref;
34837 tree array_elt;
34838 tree field;
34839 tree final;
34840
34841 unsigned int field_val = 0;
34842 unsigned int NUM_ISA_NAMES
34843 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
34844
34845 for (i = 0; i < NUM_ISA_NAMES; i++)
34846 if (strcmp (isa_names_table[i].name,
34847 TREE_STRING_POINTER (param_string_cst)) == 0)
34848 break;
34849
34850 if (i == NUM_ISA_NAMES)
34851 {
34852 error ("Parameter to builtin not valid: %s",
34853 TREE_STRING_POINTER (param_string_cst));
34854 return integer_zero_node;
34855 }
34856
34857 field = TYPE_FIELDS (__processor_model_type);
34858 /* Get the last field, which is __cpu_features. */
34859 while (DECL_CHAIN (field))
34860 field = DECL_CHAIN (field);
34861
34862 /* Get the appropriate field: __cpu_model.__cpu_features */
34863 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
34864 field, NULL_TREE);
34865
34866 /* Access the 0th element of __cpu_features array. */
34867 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
34868 integer_zero_node, NULL_TREE, NULL_TREE);
34869
34870 field_val = (1 << isa_names_table[i].feature);
34871 /* Return __cpu_model.__cpu_features[0] & field_val */
34872 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
34873 build_int_cstu (unsigned_type_node, field_val));
34874 return build1 (CONVERT_EXPR, integer_type_node, final);
34875 }
34876 gcc_unreachable ();
34877 }
34878
34879 static tree
34880 ix86_fold_builtin (tree fndecl, int n_args,
34881 tree *args, bool ignore ATTRIBUTE_UNUSED)
34882 {
34883 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
34884 {
34885 enum ix86_builtins fn_code = (enum ix86_builtins)
34886 DECL_FUNCTION_CODE (fndecl);
34887 if (fn_code == IX86_BUILTIN_CPU_IS
34888 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
34889 {
34890 gcc_assert (n_args == 1);
34891 return fold_builtin_cpu (fndecl, args);
34892 }
34893 }
34894
34895 #ifdef SUBTARGET_FOLD_BUILTIN
34896 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
34897 #endif
34898
34899 return NULL_TREE;
34900 }
34901
34902 /* Make builtins to detect cpu type and features supported. NAME is
34903 the builtin name, CODE is the builtin code, and FTYPE is the function
34904 type of the builtin. */
34905
34906 static void
34907 make_cpu_type_builtin (const char* name, int code,
34908 enum ix86_builtin_func_type ftype, bool is_const)
34909 {
34910 tree decl;
34911 tree type;
34912
34913 type = ix86_get_builtin_func_type (ftype);
34914 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
34915 NULL, NULL_TREE);
34916 gcc_assert (decl != NULL_TREE);
34917 ix86_builtins[(int) code] = decl;
34918 TREE_READONLY (decl) = is_const;
34919 }
34920
34921 /* Make builtins to get CPU type and features supported. The created
34922 builtins are :
34923
34924 __builtin_cpu_init (), to detect cpu type and features,
34925 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
34926 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
34927 */
34928
34929 static void
34930 ix86_init_platform_type_builtins (void)
34931 {
34932 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
34933 INT_FTYPE_VOID, false);
34934 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
34935 INT_FTYPE_PCCHAR, true);
34936 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
34937 INT_FTYPE_PCCHAR, true);
34938 }
34939
34940 /* Internal method for ix86_init_builtins. */
34941
34942 static void
34943 ix86_init_builtins_va_builtins_abi (void)
34944 {
34945 tree ms_va_ref, sysv_va_ref;
34946 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
34947 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
34948 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
34949 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
34950
34951 if (!TARGET_64BIT)
34952 return;
34953 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
34954 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
34955 ms_va_ref = build_reference_type (ms_va_list_type_node);
34956 sysv_va_ref =
34957 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
34958
34959 fnvoid_va_end_ms =
34960 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
34961 fnvoid_va_start_ms =
34962 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
34963 fnvoid_va_end_sysv =
34964 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
34965 fnvoid_va_start_sysv =
34966 build_varargs_function_type_list (void_type_node, sysv_va_ref,
34967 NULL_TREE);
34968 fnvoid_va_copy_ms =
34969 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
34970 NULL_TREE);
34971 fnvoid_va_copy_sysv =
34972 build_function_type_list (void_type_node, sysv_va_ref,
34973 sysv_va_ref, NULL_TREE);
34974
34975 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
34976 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
34977 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
34978 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
34979 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
34980 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
34981 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
34982 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34983 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
34984 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34985 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
34986 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34987 }
34988
34989 static void
34990 ix86_init_builtin_types (void)
34991 {
34992 tree float128_type_node, float80_type_node;
34993
34994 /* The __float80 type. */
34995 float80_type_node = long_double_type_node;
34996 if (TYPE_MODE (float80_type_node) != XFmode)
34997 {
34998 /* The __float80 type. */
34999 float80_type_node = make_node (REAL_TYPE);
35000
35001 TYPE_PRECISION (float80_type_node) = 80;
35002 layout_type (float80_type_node);
35003 }
35004 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35005
35006 /* The __float128 type. */
35007 float128_type_node = make_node (REAL_TYPE);
35008 TYPE_PRECISION (float128_type_node) = 128;
35009 layout_type (float128_type_node);
35010 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35011
35012 /* This macro is built by i386-builtin-types.awk. */
35013 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35014 }
35015
35016 static void
35017 ix86_init_builtins (void)
35018 {
35019 tree t;
35020
35021 ix86_init_builtin_types ();
35022
35023 /* Builtins to get CPU type and features. */
35024 ix86_init_platform_type_builtins ();
35025
35026 /* TFmode support builtins. */
35027 def_builtin_const (0, "__builtin_infq",
35028 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35029 def_builtin_const (0, "__builtin_huge_valq",
35030 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35031
35032 /* We will expand them to normal call if SSE isn't available since
35033 they are used by libgcc. */
35034 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35035 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35036 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35037 TREE_READONLY (t) = 1;
35038 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35039
35040 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35041 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35042 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35043 TREE_READONLY (t) = 1;
35044 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35045
35046 ix86_init_tm_builtins ();
35047 ix86_init_mmx_sse_builtins ();
35048
35049 if (TARGET_LP64)
35050 ix86_init_builtins_va_builtins_abi ();
35051
35052 #ifdef SUBTARGET_INIT_BUILTINS
35053 SUBTARGET_INIT_BUILTINS;
35054 #endif
35055 }
35056
35057 /* Return the ix86 builtin for CODE. */
35058
35059 static tree
35060 ix86_builtin_decl (unsigned code, bool)
35061 {
35062 if (code >= IX86_BUILTIN_MAX)
35063 return error_mark_node;
35064
35065 return ix86_builtins[code];
35066 }
35067
35068 /* Errors in the source file can cause expand_expr to return const0_rtx
35069 where we expect a vector. To avoid crashing, use one of the vector
35070 clear instructions. */
35071 static rtx
35072 safe_vector_operand (rtx x, enum machine_mode mode)
35073 {
35074 if (x == const0_rtx)
35075 x = CONST0_RTX (mode);
35076 return x;
35077 }
35078
35079 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35080
35081 static rtx
35082 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35083 {
35084 rtx pat;
35085 tree arg0 = CALL_EXPR_ARG (exp, 0);
35086 tree arg1 = CALL_EXPR_ARG (exp, 1);
35087 rtx op0 = expand_normal (arg0);
35088 rtx op1 = expand_normal (arg1);
35089 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35090 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
35091 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
35092
35093 if (VECTOR_MODE_P (mode0))
35094 op0 = safe_vector_operand (op0, mode0);
35095 if (VECTOR_MODE_P (mode1))
35096 op1 = safe_vector_operand (op1, mode1);
35097
35098 if (optimize || !target
35099 || GET_MODE (target) != tmode
35100 || !insn_data[icode].operand[0].predicate (target, tmode))
35101 target = gen_reg_rtx (tmode);
35102
35103 if (GET_MODE (op1) == SImode && mode1 == TImode)
35104 {
35105 rtx x = gen_reg_rtx (V4SImode);
35106 emit_insn (gen_sse2_loadd (x, op1));
35107 op1 = gen_lowpart (TImode, x);
35108 }
35109
35110 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35111 op0 = copy_to_mode_reg (mode0, op0);
35112 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35113 op1 = copy_to_mode_reg (mode1, op1);
35114
35115 pat = GEN_FCN (icode) (target, op0, op1);
35116 if (! pat)
35117 return 0;
35118
35119 emit_insn (pat);
35120
35121 return target;
35122 }
35123
35124 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35125
35126 static rtx
35127 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35128 enum ix86_builtin_func_type m_type,
35129 enum rtx_code sub_code)
35130 {
35131 rtx pat;
35132 int i;
35133 int nargs;
35134 bool comparison_p = false;
35135 bool tf_p = false;
35136 bool last_arg_constant = false;
35137 int num_memory = 0;
35138 struct {
35139 rtx op;
35140 enum machine_mode mode;
35141 } args[4];
35142
35143 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35144
35145 switch (m_type)
35146 {
35147 case MULTI_ARG_4_DF2_DI_I:
35148 case MULTI_ARG_4_DF2_DI_I1:
35149 case MULTI_ARG_4_SF2_SI_I:
35150 case MULTI_ARG_4_SF2_SI_I1:
35151 nargs = 4;
35152 last_arg_constant = true;
35153 break;
35154
35155 case MULTI_ARG_3_SF:
35156 case MULTI_ARG_3_DF:
35157 case MULTI_ARG_3_SF2:
35158 case MULTI_ARG_3_DF2:
35159 case MULTI_ARG_3_DI:
35160 case MULTI_ARG_3_SI:
35161 case MULTI_ARG_3_SI_DI:
35162 case MULTI_ARG_3_HI:
35163 case MULTI_ARG_3_HI_SI:
35164 case MULTI_ARG_3_QI:
35165 case MULTI_ARG_3_DI2:
35166 case MULTI_ARG_3_SI2:
35167 case MULTI_ARG_3_HI2:
35168 case MULTI_ARG_3_QI2:
35169 nargs = 3;
35170 break;
35171
35172 case MULTI_ARG_2_SF:
35173 case MULTI_ARG_2_DF:
35174 case MULTI_ARG_2_DI:
35175 case MULTI_ARG_2_SI:
35176 case MULTI_ARG_2_HI:
35177 case MULTI_ARG_2_QI:
35178 nargs = 2;
35179 break;
35180
35181 case MULTI_ARG_2_DI_IMM:
35182 case MULTI_ARG_2_SI_IMM:
35183 case MULTI_ARG_2_HI_IMM:
35184 case MULTI_ARG_2_QI_IMM:
35185 nargs = 2;
35186 last_arg_constant = true;
35187 break;
35188
35189 case MULTI_ARG_1_SF:
35190 case MULTI_ARG_1_DF:
35191 case MULTI_ARG_1_SF2:
35192 case MULTI_ARG_1_DF2:
35193 case MULTI_ARG_1_DI:
35194 case MULTI_ARG_1_SI:
35195 case MULTI_ARG_1_HI:
35196 case MULTI_ARG_1_QI:
35197 case MULTI_ARG_1_SI_DI:
35198 case MULTI_ARG_1_HI_DI:
35199 case MULTI_ARG_1_HI_SI:
35200 case MULTI_ARG_1_QI_DI:
35201 case MULTI_ARG_1_QI_SI:
35202 case MULTI_ARG_1_QI_HI:
35203 nargs = 1;
35204 break;
35205
35206 case MULTI_ARG_2_DI_CMP:
35207 case MULTI_ARG_2_SI_CMP:
35208 case MULTI_ARG_2_HI_CMP:
35209 case MULTI_ARG_2_QI_CMP:
35210 nargs = 2;
35211 comparison_p = true;
35212 break;
35213
35214 case MULTI_ARG_2_SF_TF:
35215 case MULTI_ARG_2_DF_TF:
35216 case MULTI_ARG_2_DI_TF:
35217 case MULTI_ARG_2_SI_TF:
35218 case MULTI_ARG_2_HI_TF:
35219 case MULTI_ARG_2_QI_TF:
35220 nargs = 2;
35221 tf_p = true;
35222 break;
35223
35224 default:
35225 gcc_unreachable ();
35226 }
35227
35228 if (optimize || !target
35229 || GET_MODE (target) != tmode
35230 || !insn_data[icode].operand[0].predicate (target, tmode))
35231 target = gen_reg_rtx (tmode);
35232
35233 gcc_assert (nargs <= 4);
35234
35235 for (i = 0; i < nargs; i++)
35236 {
35237 tree arg = CALL_EXPR_ARG (exp, i);
35238 rtx op = expand_normal (arg);
35239 int adjust = (comparison_p) ? 1 : 0;
35240 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35241
35242 if (last_arg_constant && i == nargs - 1)
35243 {
35244 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35245 {
35246 enum insn_code new_icode = icode;
35247 switch (icode)
35248 {
35249 case CODE_FOR_xop_vpermil2v2df3:
35250 case CODE_FOR_xop_vpermil2v4sf3:
35251 case CODE_FOR_xop_vpermil2v4df3:
35252 case CODE_FOR_xop_vpermil2v8sf3:
35253 error ("the last argument must be a 2-bit immediate");
35254 return gen_reg_rtx (tmode);
35255 case CODE_FOR_xop_rotlv2di3:
35256 new_icode = CODE_FOR_rotlv2di3;
35257 goto xop_rotl;
35258 case CODE_FOR_xop_rotlv4si3:
35259 new_icode = CODE_FOR_rotlv4si3;
35260 goto xop_rotl;
35261 case CODE_FOR_xop_rotlv8hi3:
35262 new_icode = CODE_FOR_rotlv8hi3;
35263 goto xop_rotl;
35264 case CODE_FOR_xop_rotlv16qi3:
35265 new_icode = CODE_FOR_rotlv16qi3;
35266 xop_rotl:
35267 if (CONST_INT_P (op))
35268 {
35269 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35270 op = GEN_INT (INTVAL (op) & mask);
35271 gcc_checking_assert
35272 (insn_data[icode].operand[i + 1].predicate (op, mode));
35273 }
35274 else
35275 {
35276 gcc_checking_assert
35277 (nargs == 2
35278 && insn_data[new_icode].operand[0].mode == tmode
35279 && insn_data[new_icode].operand[1].mode == tmode
35280 && insn_data[new_icode].operand[2].mode == mode
35281 && insn_data[new_icode].operand[0].predicate
35282 == insn_data[icode].operand[0].predicate
35283 && insn_data[new_icode].operand[1].predicate
35284 == insn_data[icode].operand[1].predicate);
35285 icode = new_icode;
35286 goto non_constant;
35287 }
35288 break;
35289 default:
35290 gcc_unreachable ();
35291 }
35292 }
35293 }
35294 else
35295 {
35296 non_constant:
35297 if (VECTOR_MODE_P (mode))
35298 op = safe_vector_operand (op, mode);
35299
35300 /* If we aren't optimizing, only allow one memory operand to be
35301 generated. */
35302 if (memory_operand (op, mode))
35303 num_memory++;
35304
35305 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35306
35307 if (optimize
35308 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35309 || num_memory > 1)
35310 op = force_reg (mode, op);
35311 }
35312
35313 args[i].op = op;
35314 args[i].mode = mode;
35315 }
35316
35317 switch (nargs)
35318 {
35319 case 1:
35320 pat = GEN_FCN (icode) (target, args[0].op);
35321 break;
35322
35323 case 2:
35324 if (tf_p)
35325 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35326 GEN_INT ((int)sub_code));
35327 else if (! comparison_p)
35328 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35329 else
35330 {
35331 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35332 args[0].op,
35333 args[1].op);
35334
35335 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35336 }
35337 break;
35338
35339 case 3:
35340 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35341 break;
35342
35343 case 4:
35344 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35345 break;
35346
35347 default:
35348 gcc_unreachable ();
35349 }
35350
35351 if (! pat)
35352 return 0;
35353
35354 emit_insn (pat);
35355 return target;
35356 }
35357
35358 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35359 insns with vec_merge. */
35360
35361 static rtx
35362 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35363 rtx target)
35364 {
35365 rtx pat;
35366 tree arg0 = CALL_EXPR_ARG (exp, 0);
35367 rtx op1, op0 = expand_normal (arg0);
35368 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35369 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
35370
35371 if (optimize || !target
35372 || GET_MODE (target) != tmode
35373 || !insn_data[icode].operand[0].predicate (target, tmode))
35374 target = gen_reg_rtx (tmode);
35375
35376 if (VECTOR_MODE_P (mode0))
35377 op0 = safe_vector_operand (op0, mode0);
35378
35379 if ((optimize && !register_operand (op0, mode0))
35380 || !insn_data[icode].operand[1].predicate (op0, mode0))
35381 op0 = copy_to_mode_reg (mode0, op0);
35382
35383 op1 = op0;
35384 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35385 op1 = copy_to_mode_reg (mode0, op1);
35386
35387 pat = GEN_FCN (icode) (target, op0, op1);
35388 if (! pat)
35389 return 0;
35390 emit_insn (pat);
35391 return target;
35392 }
35393
35394 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35395
35396 static rtx
35397 ix86_expand_sse_compare (const struct builtin_description *d,
35398 tree exp, rtx target, bool swap)
35399 {
35400 rtx pat;
35401 tree arg0 = CALL_EXPR_ARG (exp, 0);
35402 tree arg1 = CALL_EXPR_ARG (exp, 1);
35403 rtx op0 = expand_normal (arg0);
35404 rtx op1 = expand_normal (arg1);
35405 rtx op2;
35406 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35407 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35408 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35409 enum rtx_code comparison = d->comparison;
35410
35411 if (VECTOR_MODE_P (mode0))
35412 op0 = safe_vector_operand (op0, mode0);
35413 if (VECTOR_MODE_P (mode1))
35414 op1 = safe_vector_operand (op1, mode1);
35415
35416 /* Swap operands if we have a comparison that isn't available in
35417 hardware. */
35418 if (swap)
35419 {
35420 rtx tmp = gen_reg_rtx (mode1);
35421 emit_move_insn (tmp, op1);
35422 op1 = op0;
35423 op0 = tmp;
35424 }
35425
35426 if (optimize || !target
35427 || GET_MODE (target) != tmode
35428 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35429 target = gen_reg_rtx (tmode);
35430
35431 if ((optimize && !register_operand (op0, mode0))
35432 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
35433 op0 = copy_to_mode_reg (mode0, op0);
35434 if ((optimize && !register_operand (op1, mode1))
35435 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
35436 op1 = copy_to_mode_reg (mode1, op1);
35437
35438 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
35439 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35440 if (! pat)
35441 return 0;
35442 emit_insn (pat);
35443 return target;
35444 }
35445
35446 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
35447
35448 static rtx
35449 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
35450 rtx target)
35451 {
35452 rtx pat;
35453 tree arg0 = CALL_EXPR_ARG (exp, 0);
35454 tree arg1 = CALL_EXPR_ARG (exp, 1);
35455 rtx op0 = expand_normal (arg0);
35456 rtx op1 = expand_normal (arg1);
35457 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35458 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35459 enum rtx_code comparison = d->comparison;
35460
35461 if (VECTOR_MODE_P (mode0))
35462 op0 = safe_vector_operand (op0, mode0);
35463 if (VECTOR_MODE_P (mode1))
35464 op1 = safe_vector_operand (op1, mode1);
35465
35466 /* Swap operands if we have a comparison that isn't available in
35467 hardware. */
35468 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
35469 {
35470 rtx tmp = op1;
35471 op1 = op0;
35472 op0 = tmp;
35473 }
35474
35475 target = gen_reg_rtx (SImode);
35476 emit_move_insn (target, const0_rtx);
35477 target = gen_rtx_SUBREG (QImode, target, 0);
35478
35479 if ((optimize && !register_operand (op0, mode0))
35480 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35481 op0 = copy_to_mode_reg (mode0, op0);
35482 if ((optimize && !register_operand (op1, mode1))
35483 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35484 op1 = copy_to_mode_reg (mode1, op1);
35485
35486 pat = GEN_FCN (d->icode) (op0, op1);
35487 if (! pat)
35488 return 0;
35489 emit_insn (pat);
35490 emit_insn (gen_rtx_SET (VOIDmode,
35491 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35492 gen_rtx_fmt_ee (comparison, QImode,
35493 SET_DEST (pat),
35494 const0_rtx)));
35495
35496 return SUBREG_REG (target);
35497 }
35498
35499 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
35500
35501 static rtx
35502 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
35503 rtx target)
35504 {
35505 rtx pat;
35506 tree arg0 = CALL_EXPR_ARG (exp, 0);
35507 rtx op1, op0 = expand_normal (arg0);
35508 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35509 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35510
35511 if (optimize || target == 0
35512 || GET_MODE (target) != tmode
35513 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35514 target = gen_reg_rtx (tmode);
35515
35516 if (VECTOR_MODE_P (mode0))
35517 op0 = safe_vector_operand (op0, mode0);
35518
35519 if ((optimize && !register_operand (op0, mode0))
35520 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35521 op0 = copy_to_mode_reg (mode0, op0);
35522
35523 op1 = GEN_INT (d->comparison);
35524
35525 pat = GEN_FCN (d->icode) (target, op0, op1);
35526 if (! pat)
35527 return 0;
35528 emit_insn (pat);
35529 return target;
35530 }
35531
35532 static rtx
35533 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
35534 tree exp, rtx target)
35535 {
35536 rtx pat;
35537 tree arg0 = CALL_EXPR_ARG (exp, 0);
35538 tree arg1 = CALL_EXPR_ARG (exp, 1);
35539 rtx op0 = expand_normal (arg0);
35540 rtx op1 = expand_normal (arg1);
35541 rtx op2;
35542 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35543 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35544 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35545
35546 if (optimize || target == 0
35547 || GET_MODE (target) != tmode
35548 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35549 target = gen_reg_rtx (tmode);
35550
35551 op0 = safe_vector_operand (op0, mode0);
35552 op1 = safe_vector_operand (op1, mode1);
35553
35554 if ((optimize && !register_operand (op0, mode0))
35555 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35556 op0 = copy_to_mode_reg (mode0, op0);
35557 if ((optimize && !register_operand (op1, mode1))
35558 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35559 op1 = copy_to_mode_reg (mode1, op1);
35560
35561 op2 = GEN_INT (d->comparison);
35562
35563 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35564 if (! pat)
35565 return 0;
35566 emit_insn (pat);
35567 return target;
35568 }
35569
35570 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
35571
35572 static rtx
35573 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
35574 rtx target)
35575 {
35576 rtx pat;
35577 tree arg0 = CALL_EXPR_ARG (exp, 0);
35578 tree arg1 = CALL_EXPR_ARG (exp, 1);
35579 rtx op0 = expand_normal (arg0);
35580 rtx op1 = expand_normal (arg1);
35581 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35582 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35583 enum rtx_code comparison = d->comparison;
35584
35585 if (VECTOR_MODE_P (mode0))
35586 op0 = safe_vector_operand (op0, mode0);
35587 if (VECTOR_MODE_P (mode1))
35588 op1 = safe_vector_operand (op1, mode1);
35589
35590 target = gen_reg_rtx (SImode);
35591 emit_move_insn (target, const0_rtx);
35592 target = gen_rtx_SUBREG (QImode, target, 0);
35593
35594 if ((optimize && !register_operand (op0, mode0))
35595 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35596 op0 = copy_to_mode_reg (mode0, op0);
35597 if ((optimize && !register_operand (op1, mode1))
35598 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35599 op1 = copy_to_mode_reg (mode1, op1);
35600
35601 pat = GEN_FCN (d->icode) (op0, op1);
35602 if (! pat)
35603 return 0;
35604 emit_insn (pat);
35605 emit_insn (gen_rtx_SET (VOIDmode,
35606 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35607 gen_rtx_fmt_ee (comparison, QImode,
35608 SET_DEST (pat),
35609 const0_rtx)));
35610
35611 return SUBREG_REG (target);
35612 }
35613
35614 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
35615
35616 static rtx
35617 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
35618 tree exp, rtx target)
35619 {
35620 rtx pat;
35621 tree arg0 = CALL_EXPR_ARG (exp, 0);
35622 tree arg1 = CALL_EXPR_ARG (exp, 1);
35623 tree arg2 = CALL_EXPR_ARG (exp, 2);
35624 tree arg3 = CALL_EXPR_ARG (exp, 3);
35625 tree arg4 = CALL_EXPR_ARG (exp, 4);
35626 rtx scratch0, scratch1;
35627 rtx op0 = expand_normal (arg0);
35628 rtx op1 = expand_normal (arg1);
35629 rtx op2 = expand_normal (arg2);
35630 rtx op3 = expand_normal (arg3);
35631 rtx op4 = expand_normal (arg4);
35632 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
35633
35634 tmode0 = insn_data[d->icode].operand[0].mode;
35635 tmode1 = insn_data[d->icode].operand[1].mode;
35636 modev2 = insn_data[d->icode].operand[2].mode;
35637 modei3 = insn_data[d->icode].operand[3].mode;
35638 modev4 = insn_data[d->icode].operand[4].mode;
35639 modei5 = insn_data[d->icode].operand[5].mode;
35640 modeimm = insn_data[d->icode].operand[6].mode;
35641
35642 if (VECTOR_MODE_P (modev2))
35643 op0 = safe_vector_operand (op0, modev2);
35644 if (VECTOR_MODE_P (modev4))
35645 op2 = safe_vector_operand (op2, modev4);
35646
35647 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
35648 op0 = copy_to_mode_reg (modev2, op0);
35649 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
35650 op1 = copy_to_mode_reg (modei3, op1);
35651 if ((optimize && !register_operand (op2, modev4))
35652 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
35653 op2 = copy_to_mode_reg (modev4, op2);
35654 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
35655 op3 = copy_to_mode_reg (modei5, op3);
35656
35657 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
35658 {
35659 error ("the fifth argument must be an 8-bit immediate");
35660 return const0_rtx;
35661 }
35662
35663 if (d->code == IX86_BUILTIN_PCMPESTRI128)
35664 {
35665 if (optimize || !target
35666 || GET_MODE (target) != tmode0
35667 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
35668 target = gen_reg_rtx (tmode0);
35669
35670 scratch1 = gen_reg_rtx (tmode1);
35671
35672 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
35673 }
35674 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
35675 {
35676 if (optimize || !target
35677 || GET_MODE (target) != tmode1
35678 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
35679 target = gen_reg_rtx (tmode1);
35680
35681 scratch0 = gen_reg_rtx (tmode0);
35682
35683 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
35684 }
35685 else
35686 {
35687 gcc_assert (d->flag);
35688
35689 scratch0 = gen_reg_rtx (tmode0);
35690 scratch1 = gen_reg_rtx (tmode1);
35691
35692 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
35693 }
35694
35695 if (! pat)
35696 return 0;
35697
35698 emit_insn (pat);
35699
35700 if (d->flag)
35701 {
35702 target = gen_reg_rtx (SImode);
35703 emit_move_insn (target, const0_rtx);
35704 target = gen_rtx_SUBREG (QImode, target, 0);
35705
35706 emit_insn
35707 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35708 gen_rtx_fmt_ee (EQ, QImode,
35709 gen_rtx_REG ((enum machine_mode) d->flag,
35710 FLAGS_REG),
35711 const0_rtx)));
35712 return SUBREG_REG (target);
35713 }
35714 else
35715 return target;
35716 }
35717
35718
35719 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
35720
35721 static rtx
35722 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
35723 tree exp, rtx target)
35724 {
35725 rtx pat;
35726 tree arg0 = CALL_EXPR_ARG (exp, 0);
35727 tree arg1 = CALL_EXPR_ARG (exp, 1);
35728 tree arg2 = CALL_EXPR_ARG (exp, 2);
35729 rtx scratch0, scratch1;
35730 rtx op0 = expand_normal (arg0);
35731 rtx op1 = expand_normal (arg1);
35732 rtx op2 = expand_normal (arg2);
35733 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
35734
35735 tmode0 = insn_data[d->icode].operand[0].mode;
35736 tmode1 = insn_data[d->icode].operand[1].mode;
35737 modev2 = insn_data[d->icode].operand[2].mode;
35738 modev3 = insn_data[d->icode].operand[3].mode;
35739 modeimm = insn_data[d->icode].operand[4].mode;
35740
35741 if (VECTOR_MODE_P (modev2))
35742 op0 = safe_vector_operand (op0, modev2);
35743 if (VECTOR_MODE_P (modev3))
35744 op1 = safe_vector_operand (op1, modev3);
35745
35746 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
35747 op0 = copy_to_mode_reg (modev2, op0);
35748 if ((optimize && !register_operand (op1, modev3))
35749 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
35750 op1 = copy_to_mode_reg (modev3, op1);
35751
35752 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
35753 {
35754 error ("the third argument must be an 8-bit immediate");
35755 return const0_rtx;
35756 }
35757
35758 if (d->code == IX86_BUILTIN_PCMPISTRI128)
35759 {
35760 if (optimize || !target
35761 || GET_MODE (target) != tmode0
35762 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
35763 target = gen_reg_rtx (tmode0);
35764
35765 scratch1 = gen_reg_rtx (tmode1);
35766
35767 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
35768 }
35769 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
35770 {
35771 if (optimize || !target
35772 || GET_MODE (target) != tmode1
35773 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
35774 target = gen_reg_rtx (tmode1);
35775
35776 scratch0 = gen_reg_rtx (tmode0);
35777
35778 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
35779 }
35780 else
35781 {
35782 gcc_assert (d->flag);
35783
35784 scratch0 = gen_reg_rtx (tmode0);
35785 scratch1 = gen_reg_rtx (tmode1);
35786
35787 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
35788 }
35789
35790 if (! pat)
35791 return 0;
35792
35793 emit_insn (pat);
35794
35795 if (d->flag)
35796 {
35797 target = gen_reg_rtx (SImode);
35798 emit_move_insn (target, const0_rtx);
35799 target = gen_rtx_SUBREG (QImode, target, 0);
35800
35801 emit_insn
35802 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35803 gen_rtx_fmt_ee (EQ, QImode,
35804 gen_rtx_REG ((enum machine_mode) d->flag,
35805 FLAGS_REG),
35806 const0_rtx)));
35807 return SUBREG_REG (target);
35808 }
35809 else
35810 return target;
35811 }
35812
35813 /* Subroutine of ix86_expand_builtin to take care of insns with
35814 variable number of operands. */
35815
35816 static rtx
35817 ix86_expand_args_builtin (const struct builtin_description *d,
35818 tree exp, rtx target)
35819 {
35820 rtx pat, real_target;
35821 unsigned int i, nargs;
35822 unsigned int nargs_constant = 0;
35823 unsigned int mask_pos = 0;
35824 int num_memory = 0;
35825 struct
35826 {
35827 rtx op;
35828 enum machine_mode mode;
35829 } args[6];
35830 bool last_arg_count = false;
35831 enum insn_code icode = d->icode;
35832 const struct insn_data_d *insn_p = &insn_data[icode];
35833 enum machine_mode tmode = insn_p->operand[0].mode;
35834 enum machine_mode rmode = VOIDmode;
35835 bool swap = false;
35836 enum rtx_code comparison = d->comparison;
35837
35838 switch ((enum ix86_builtin_func_type) d->flag)
35839 {
35840 case V2DF_FTYPE_V2DF_ROUND:
35841 case V4DF_FTYPE_V4DF_ROUND:
35842 case V4SF_FTYPE_V4SF_ROUND:
35843 case V8SF_FTYPE_V8SF_ROUND:
35844 case V4SI_FTYPE_V4SF_ROUND:
35845 case V8SI_FTYPE_V8SF_ROUND:
35846 return ix86_expand_sse_round (d, exp, target);
35847 case V4SI_FTYPE_V2DF_V2DF_ROUND:
35848 case V8SI_FTYPE_V4DF_V4DF_ROUND:
35849 case V16SI_FTYPE_V8DF_V8DF_ROUND:
35850 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
35851 case INT_FTYPE_V8SF_V8SF_PTEST:
35852 case INT_FTYPE_V4DI_V4DI_PTEST:
35853 case INT_FTYPE_V4DF_V4DF_PTEST:
35854 case INT_FTYPE_V4SF_V4SF_PTEST:
35855 case INT_FTYPE_V2DI_V2DI_PTEST:
35856 case INT_FTYPE_V2DF_V2DF_PTEST:
35857 return ix86_expand_sse_ptest (d, exp, target);
35858 case FLOAT128_FTYPE_FLOAT128:
35859 case FLOAT_FTYPE_FLOAT:
35860 case INT_FTYPE_INT:
35861 case UINT64_FTYPE_INT:
35862 case UINT16_FTYPE_UINT16:
35863 case INT64_FTYPE_INT64:
35864 case INT64_FTYPE_V4SF:
35865 case INT64_FTYPE_V2DF:
35866 case INT_FTYPE_V16QI:
35867 case INT_FTYPE_V8QI:
35868 case INT_FTYPE_V8SF:
35869 case INT_FTYPE_V4DF:
35870 case INT_FTYPE_V4SF:
35871 case INT_FTYPE_V2DF:
35872 case INT_FTYPE_V32QI:
35873 case V16QI_FTYPE_V16QI:
35874 case V8SI_FTYPE_V8SF:
35875 case V8SI_FTYPE_V4SI:
35876 case V8HI_FTYPE_V8HI:
35877 case V8HI_FTYPE_V16QI:
35878 case V8QI_FTYPE_V8QI:
35879 case V8SF_FTYPE_V8SF:
35880 case V8SF_FTYPE_V8SI:
35881 case V8SF_FTYPE_V4SF:
35882 case V8SF_FTYPE_V8HI:
35883 case V4SI_FTYPE_V4SI:
35884 case V4SI_FTYPE_V16QI:
35885 case V4SI_FTYPE_V4SF:
35886 case V4SI_FTYPE_V8SI:
35887 case V4SI_FTYPE_V8HI:
35888 case V4SI_FTYPE_V4DF:
35889 case V4SI_FTYPE_V2DF:
35890 case V4HI_FTYPE_V4HI:
35891 case V4DF_FTYPE_V4DF:
35892 case V4DF_FTYPE_V4SI:
35893 case V4DF_FTYPE_V4SF:
35894 case V4DF_FTYPE_V2DF:
35895 case V4SF_FTYPE_V4SF:
35896 case V4SF_FTYPE_V4SI:
35897 case V4SF_FTYPE_V8SF:
35898 case V4SF_FTYPE_V4DF:
35899 case V4SF_FTYPE_V8HI:
35900 case V4SF_FTYPE_V2DF:
35901 case V2DI_FTYPE_V2DI:
35902 case V2DI_FTYPE_V16QI:
35903 case V2DI_FTYPE_V8HI:
35904 case V2DI_FTYPE_V4SI:
35905 case V2DF_FTYPE_V2DF:
35906 case V2DF_FTYPE_V4SI:
35907 case V2DF_FTYPE_V4DF:
35908 case V2DF_FTYPE_V4SF:
35909 case V2DF_FTYPE_V2SI:
35910 case V2SI_FTYPE_V2SI:
35911 case V2SI_FTYPE_V4SF:
35912 case V2SI_FTYPE_V2SF:
35913 case V2SI_FTYPE_V2DF:
35914 case V2SF_FTYPE_V2SF:
35915 case V2SF_FTYPE_V2SI:
35916 case V32QI_FTYPE_V32QI:
35917 case V32QI_FTYPE_V16QI:
35918 case V16HI_FTYPE_V16HI:
35919 case V16HI_FTYPE_V8HI:
35920 case V8SI_FTYPE_V8SI:
35921 case V16HI_FTYPE_V16QI:
35922 case V8SI_FTYPE_V16QI:
35923 case V4DI_FTYPE_V16QI:
35924 case V8SI_FTYPE_V8HI:
35925 case V4DI_FTYPE_V8HI:
35926 case V4DI_FTYPE_V4SI:
35927 case V4DI_FTYPE_V2DI:
35928 case HI_FTYPE_HI:
35929 case HI_FTYPE_V16QI:
35930 case SI_FTYPE_V32QI:
35931 case DI_FTYPE_V64QI:
35932 case V16QI_FTYPE_HI:
35933 case V32QI_FTYPE_SI:
35934 case V64QI_FTYPE_DI:
35935 case V8HI_FTYPE_QI:
35936 case V16HI_FTYPE_HI:
35937 case V32HI_FTYPE_SI:
35938 case V4SI_FTYPE_QI:
35939 case V8SI_FTYPE_QI:
35940 case V4SI_FTYPE_HI:
35941 case V8SI_FTYPE_HI:
35942 case QI_FTYPE_V8HI:
35943 case HI_FTYPE_V16HI:
35944 case SI_FTYPE_V32HI:
35945 case QI_FTYPE_V4SI:
35946 case QI_FTYPE_V8SI:
35947 case HI_FTYPE_V16SI:
35948 case QI_FTYPE_V2DI:
35949 case QI_FTYPE_V4DI:
35950 case QI_FTYPE_V8DI:
35951 case UINT_FTYPE_V2DF:
35952 case UINT_FTYPE_V4SF:
35953 case UINT64_FTYPE_V2DF:
35954 case UINT64_FTYPE_V4SF:
35955 case V16QI_FTYPE_V8DI:
35956 case V16HI_FTYPE_V16SI:
35957 case V16SI_FTYPE_HI:
35958 case V2DI_FTYPE_QI:
35959 case V4DI_FTYPE_QI:
35960 case V16SI_FTYPE_V16SI:
35961 case V16SI_FTYPE_INT:
35962 case V16SF_FTYPE_FLOAT:
35963 case V16SF_FTYPE_V8SF:
35964 case V16SI_FTYPE_V8SI:
35965 case V16SF_FTYPE_V4SF:
35966 case V16SI_FTYPE_V4SI:
35967 case V16SF_FTYPE_V16SF:
35968 case V8HI_FTYPE_V8DI:
35969 case V8UHI_FTYPE_V8UHI:
35970 case V8SI_FTYPE_V8DI:
35971 case V8SF_FTYPE_V8DF:
35972 case V8DI_FTYPE_QI:
35973 case V8DI_FTYPE_INT64:
35974 case V8DI_FTYPE_V4DI:
35975 case V8DI_FTYPE_V8DI:
35976 case V8DF_FTYPE_DOUBLE:
35977 case V8DF_FTYPE_V4DF:
35978 case V8DF_FTYPE_V2DF:
35979 case V8DF_FTYPE_V8DF:
35980 case V8DF_FTYPE_V8SI:
35981 nargs = 1;
35982 break;
35983 case V4SF_FTYPE_V4SF_VEC_MERGE:
35984 case V2DF_FTYPE_V2DF_VEC_MERGE:
35985 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
35986 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
35987 case V16QI_FTYPE_V16QI_V16QI:
35988 case V16QI_FTYPE_V8HI_V8HI:
35989 case V16SI_FTYPE_V16SI_V16SI:
35990 case V16SF_FTYPE_V16SF_V16SF:
35991 case V16SF_FTYPE_V16SF_V16SI:
35992 case V8QI_FTYPE_V8QI_V8QI:
35993 case V8QI_FTYPE_V4HI_V4HI:
35994 case V8HI_FTYPE_V8HI_V8HI:
35995 case V8HI_FTYPE_V16QI_V16QI:
35996 case V8HI_FTYPE_V4SI_V4SI:
35997 case V8SF_FTYPE_V8SF_V8SF:
35998 case V8SF_FTYPE_V8SF_V8SI:
35999 case V8DI_FTYPE_V8DI_V8DI:
36000 case V8DF_FTYPE_V8DF_V8DF:
36001 case V8DF_FTYPE_V8DF_V8DI:
36002 case V4SI_FTYPE_V4SI_V4SI:
36003 case V4SI_FTYPE_V8HI_V8HI:
36004 case V4SI_FTYPE_V4SF_V4SF:
36005 case V4SI_FTYPE_V2DF_V2DF:
36006 case V4HI_FTYPE_V4HI_V4HI:
36007 case V4HI_FTYPE_V8QI_V8QI:
36008 case V4HI_FTYPE_V2SI_V2SI:
36009 case V4DF_FTYPE_V4DF_V4DF:
36010 case V4DF_FTYPE_V4DF_V4DI:
36011 case V4SF_FTYPE_V4SF_V4SF:
36012 case V4SF_FTYPE_V4SF_V4SI:
36013 case V4SF_FTYPE_V4SF_V2SI:
36014 case V4SF_FTYPE_V4SF_V2DF:
36015 case V4SF_FTYPE_V4SF_UINT:
36016 case V4SF_FTYPE_V4SF_UINT64:
36017 case V4SF_FTYPE_V4SF_DI:
36018 case V4SF_FTYPE_V4SF_SI:
36019 case V2DI_FTYPE_V2DI_V2DI:
36020 case V2DI_FTYPE_V16QI_V16QI:
36021 case V2DI_FTYPE_V4SI_V4SI:
36022 case V2UDI_FTYPE_V4USI_V4USI:
36023 case V2DI_FTYPE_V2DI_V16QI:
36024 case V2DI_FTYPE_V2DF_V2DF:
36025 case V2SI_FTYPE_V2SI_V2SI:
36026 case V2SI_FTYPE_V4HI_V4HI:
36027 case V2SI_FTYPE_V2SF_V2SF:
36028 case V2DF_FTYPE_V2DF_V2DF:
36029 case V2DF_FTYPE_V2DF_V4SF:
36030 case V2DF_FTYPE_V2DF_V2DI:
36031 case V2DF_FTYPE_V2DF_DI:
36032 case V2DF_FTYPE_V2DF_SI:
36033 case V2DF_FTYPE_V2DF_UINT:
36034 case V2DF_FTYPE_V2DF_UINT64:
36035 case V2SF_FTYPE_V2SF_V2SF:
36036 case V1DI_FTYPE_V1DI_V1DI:
36037 case V1DI_FTYPE_V8QI_V8QI:
36038 case V1DI_FTYPE_V2SI_V2SI:
36039 case V32QI_FTYPE_V16HI_V16HI:
36040 case V16HI_FTYPE_V8SI_V8SI:
36041 case V32QI_FTYPE_V32QI_V32QI:
36042 case V16HI_FTYPE_V32QI_V32QI:
36043 case V16HI_FTYPE_V16HI_V16HI:
36044 case V8SI_FTYPE_V4DF_V4DF:
36045 case V8SI_FTYPE_V8SI_V8SI:
36046 case V8SI_FTYPE_V16HI_V16HI:
36047 case V4DI_FTYPE_V4DI_V4DI:
36048 case V4DI_FTYPE_V8SI_V8SI:
36049 case V4UDI_FTYPE_V8USI_V8USI:
36050 case QI_FTYPE_V8DI_V8DI:
36051 case V8DI_FTYPE_V64QI_V64QI:
36052 case HI_FTYPE_V16SI_V16SI:
36053 if (comparison == UNKNOWN)
36054 return ix86_expand_binop_builtin (icode, exp, target);
36055 nargs = 2;
36056 break;
36057 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36058 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36059 gcc_assert (comparison != UNKNOWN);
36060 nargs = 2;
36061 swap = true;
36062 break;
36063 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36064 case V16HI_FTYPE_V16HI_SI_COUNT:
36065 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36066 case V8SI_FTYPE_V8SI_SI_COUNT:
36067 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36068 case V4DI_FTYPE_V4DI_INT_COUNT:
36069 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36070 case V8HI_FTYPE_V8HI_SI_COUNT:
36071 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36072 case V4SI_FTYPE_V4SI_SI_COUNT:
36073 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36074 case V4HI_FTYPE_V4HI_SI_COUNT:
36075 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36076 case V2DI_FTYPE_V2DI_SI_COUNT:
36077 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36078 case V2SI_FTYPE_V2SI_SI_COUNT:
36079 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36080 case V1DI_FTYPE_V1DI_SI_COUNT:
36081 nargs = 2;
36082 last_arg_count = true;
36083 break;
36084 case UINT64_FTYPE_UINT64_UINT64:
36085 case UINT_FTYPE_UINT_UINT:
36086 case UINT_FTYPE_UINT_USHORT:
36087 case UINT_FTYPE_UINT_UCHAR:
36088 case UINT16_FTYPE_UINT16_INT:
36089 case UINT8_FTYPE_UINT8_INT:
36090 case HI_FTYPE_HI_HI:
36091 case SI_FTYPE_SI_SI:
36092 case DI_FTYPE_DI_DI:
36093 case V16SI_FTYPE_V8DF_V8DF:
36094 nargs = 2;
36095 break;
36096 case V2DI_FTYPE_V2DI_INT_CONVERT:
36097 nargs = 2;
36098 rmode = V1TImode;
36099 nargs_constant = 1;
36100 break;
36101 case V4DI_FTYPE_V4DI_INT_CONVERT:
36102 nargs = 2;
36103 rmode = V2TImode;
36104 nargs_constant = 1;
36105 break;
36106 case V8DI_FTYPE_V8DI_INT_CONVERT:
36107 nargs = 2;
36108 rmode = V4TImode;
36109 nargs_constant = 1;
36110 break;
36111 case V8HI_FTYPE_V8HI_INT:
36112 case V8HI_FTYPE_V8SF_INT:
36113 case V16HI_FTYPE_V16SF_INT:
36114 case V8HI_FTYPE_V4SF_INT:
36115 case V8SF_FTYPE_V8SF_INT:
36116 case V4SF_FTYPE_V16SF_INT:
36117 case V16SF_FTYPE_V16SF_INT:
36118 case V4SI_FTYPE_V4SI_INT:
36119 case V4SI_FTYPE_V8SI_INT:
36120 case V4HI_FTYPE_V4HI_INT:
36121 case V4DF_FTYPE_V4DF_INT:
36122 case V4DF_FTYPE_V8DF_INT:
36123 case V4SF_FTYPE_V4SF_INT:
36124 case V4SF_FTYPE_V8SF_INT:
36125 case V2DI_FTYPE_V2DI_INT:
36126 case V2DF_FTYPE_V2DF_INT:
36127 case V2DF_FTYPE_V4DF_INT:
36128 case V16HI_FTYPE_V16HI_INT:
36129 case V8SI_FTYPE_V8SI_INT:
36130 case V16SI_FTYPE_V16SI_INT:
36131 case V4SI_FTYPE_V16SI_INT:
36132 case V4DI_FTYPE_V4DI_INT:
36133 case V2DI_FTYPE_V4DI_INT:
36134 case V4DI_FTYPE_V8DI_INT:
36135 case HI_FTYPE_HI_INT:
36136 case QI_FTYPE_V4SF_INT:
36137 case QI_FTYPE_V2DF_INT:
36138 nargs = 2;
36139 nargs_constant = 1;
36140 break;
36141 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36142 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36143 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36144 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36145 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36146 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36147 case HI_FTYPE_V16SI_V16SI_HI:
36148 case QI_FTYPE_V8DI_V8DI_QI:
36149 case V16HI_FTYPE_V16SI_V16HI_HI:
36150 case V16QI_FTYPE_V16SI_V16QI_HI:
36151 case V16QI_FTYPE_V8DI_V16QI_QI:
36152 case V16SF_FTYPE_V16SF_V16SF_HI:
36153 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36154 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36155 case V16SF_FTYPE_V16SI_V16SF_HI:
36156 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36157 case V16SF_FTYPE_V4SF_V16SF_HI:
36158 case V16SI_FTYPE_SI_V16SI_HI:
36159 case V16SI_FTYPE_V16HI_V16SI_HI:
36160 case V16SI_FTYPE_V16QI_V16SI_HI:
36161 case V16SI_FTYPE_V16SF_V16SI_HI:
36162 case V8SF_FTYPE_V4SF_V8SF_QI:
36163 case V4DF_FTYPE_V2DF_V4DF_QI:
36164 case V8SI_FTYPE_V4SI_V8SI_QI:
36165 case V8SI_FTYPE_SI_V8SI_QI:
36166 case V4SI_FTYPE_V4SI_V4SI_QI:
36167 case V4SI_FTYPE_SI_V4SI_QI:
36168 case V4DI_FTYPE_V2DI_V4DI_QI:
36169 case V4DI_FTYPE_DI_V4DI_QI:
36170 case V2DI_FTYPE_V2DI_V2DI_QI:
36171 case V2DI_FTYPE_DI_V2DI_QI:
36172 case V64QI_FTYPE_V64QI_V64QI_DI:
36173 case V64QI_FTYPE_V16QI_V64QI_DI:
36174 case V64QI_FTYPE_QI_V64QI_DI:
36175 case V32QI_FTYPE_V32QI_V32QI_SI:
36176 case V32QI_FTYPE_V16QI_V32QI_SI:
36177 case V32QI_FTYPE_QI_V32QI_SI:
36178 case V16QI_FTYPE_V16QI_V16QI_HI:
36179 case V16QI_FTYPE_QI_V16QI_HI:
36180 case V32HI_FTYPE_V8HI_V32HI_SI:
36181 case V32HI_FTYPE_HI_V32HI_SI:
36182 case V16HI_FTYPE_V8HI_V16HI_HI:
36183 case V16HI_FTYPE_HI_V16HI_HI:
36184 case V8HI_FTYPE_V8HI_V8HI_QI:
36185 case V8HI_FTYPE_HI_V8HI_QI:
36186 case V8SF_FTYPE_V8HI_V8SF_QI:
36187 case V4SF_FTYPE_V8HI_V4SF_QI:
36188 case V8SI_FTYPE_V8SF_V8SI_QI:
36189 case V4SI_FTYPE_V4SF_V4SI_QI:
36190 case V8DI_FTYPE_V8SF_V8DI_QI:
36191 case V4DI_FTYPE_V4SF_V4DI_QI:
36192 case V2DI_FTYPE_V4SF_V2DI_QI:
36193 case V8SF_FTYPE_V8DI_V8SF_QI:
36194 case V4SF_FTYPE_V4DI_V4SF_QI:
36195 case V4SF_FTYPE_V2DI_V4SF_QI:
36196 case V8DF_FTYPE_V8DI_V8DF_QI:
36197 case V4DF_FTYPE_V4DI_V4DF_QI:
36198 case V2DF_FTYPE_V2DI_V2DF_QI:
36199 case V16QI_FTYPE_V8HI_V16QI_QI:
36200 case V16QI_FTYPE_V16HI_V16QI_HI:
36201 case V16QI_FTYPE_V4SI_V16QI_QI:
36202 case V16QI_FTYPE_V8SI_V16QI_QI:
36203 case V8HI_FTYPE_V4SI_V8HI_QI:
36204 case V8HI_FTYPE_V8SI_V8HI_QI:
36205 case V16QI_FTYPE_V2DI_V16QI_QI:
36206 case V16QI_FTYPE_V4DI_V16QI_QI:
36207 case V8HI_FTYPE_V2DI_V8HI_QI:
36208 case V8HI_FTYPE_V4DI_V8HI_QI:
36209 case V4SI_FTYPE_V2DI_V4SI_QI:
36210 case V4SI_FTYPE_V4DI_V4SI_QI:
36211 case V32QI_FTYPE_V32HI_V32QI_SI:
36212 case HI_FTYPE_V16QI_V16QI_HI:
36213 case SI_FTYPE_V32QI_V32QI_SI:
36214 case DI_FTYPE_V64QI_V64QI_DI:
36215 case QI_FTYPE_V8HI_V8HI_QI:
36216 case HI_FTYPE_V16HI_V16HI_HI:
36217 case SI_FTYPE_V32HI_V32HI_SI:
36218 case QI_FTYPE_V4SI_V4SI_QI:
36219 case QI_FTYPE_V8SI_V8SI_QI:
36220 case QI_FTYPE_V2DI_V2DI_QI:
36221 case QI_FTYPE_V4DI_V4DI_QI:
36222 case V4SF_FTYPE_V2DF_V4SF_QI:
36223 case V4SF_FTYPE_V4DF_V4SF_QI:
36224 nargs = 3;
36225 case V16SI_FTYPE_V16SI_V16SI_HI:
36226 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36227 case V16SI_FTYPE_V4SI_V16SI_HI:
36228 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36229 case V2DI_FTYPE_V4SI_V2DI_QI:
36230 case V2DI_FTYPE_V8HI_V2DI_QI:
36231 case V2DI_FTYPE_V16QI_V2DI_QI:
36232 case V4DI_FTYPE_V4DI_V4DI_QI:
36233 case V4DI_FTYPE_V4SI_V4DI_QI:
36234 case V4DI_FTYPE_V8HI_V4DI_QI:
36235 case V4DI_FTYPE_V16QI_V4DI_QI:
36236 case V8DI_FTYPE_V8DF_V8DI_QI:
36237 case V4DI_FTYPE_V4DF_V4DI_QI:
36238 case V2DI_FTYPE_V2DF_V2DI_QI:
36239 case V4SI_FTYPE_V4DF_V4SI_QI:
36240 case V4SI_FTYPE_V2DF_V4SI_QI:
36241 case V4SI_FTYPE_V8HI_V4SI_QI:
36242 case V4SI_FTYPE_V16QI_V4SI_QI:
36243 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36244 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36245 case V8DF_FTYPE_V2DF_V8DF_QI:
36246 case V8DF_FTYPE_V4DF_V8DF_QI:
36247 case V8DF_FTYPE_V8DF_V8DF_QI:
36248 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36249 case V8SF_FTYPE_V8SF_V8SF_QI:
36250 case V8SF_FTYPE_V8SI_V8SF_QI:
36251 case V4DF_FTYPE_V4DF_V4DF_QI:
36252 case V4SF_FTYPE_V4SF_V4SF_QI:
36253 case V2DF_FTYPE_V2DF_V2DF_QI:
36254 case V2DF_FTYPE_V4SF_V2DF_QI:
36255 case V2DF_FTYPE_V4SI_V2DF_QI:
36256 case V4SF_FTYPE_V4SI_V4SF_QI:
36257 case V4DF_FTYPE_V4SF_V4DF_QI:
36258 case V4DF_FTYPE_V4SI_V4DF_QI:
36259 case V8SI_FTYPE_V8SI_V8SI_QI:
36260 case V8SI_FTYPE_V8HI_V8SI_QI:
36261 case V8SI_FTYPE_V16QI_V8SI_QI:
36262 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36263 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36264 case V8DF_FTYPE_V8SF_V8DF_QI:
36265 case V8DF_FTYPE_V8SI_V8DF_QI:
36266 case V8DI_FTYPE_DI_V8DI_QI:
36267 case V16SF_FTYPE_V8SF_V16SF_HI:
36268 case V16SI_FTYPE_V8SI_V16SI_HI:
36269 case V16HI_FTYPE_V16HI_V16HI_HI:
36270 case V8HI_FTYPE_V16QI_V8HI_QI:
36271 case V16HI_FTYPE_V16QI_V16HI_HI:
36272 case V32HI_FTYPE_V32HI_V32HI_SI:
36273 case V32HI_FTYPE_V32QI_V32HI_SI:
36274 case V8DI_FTYPE_V16QI_V8DI_QI:
36275 case V8DI_FTYPE_V2DI_V8DI_QI:
36276 case V8DI_FTYPE_V4DI_V8DI_QI:
36277 case V8DI_FTYPE_V8DI_V8DI_QI:
36278 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36279 case V8DI_FTYPE_V8HI_V8DI_QI:
36280 case V8DI_FTYPE_V8SI_V8DI_QI:
36281 case V8HI_FTYPE_V8DI_V8HI_QI:
36282 case V8SF_FTYPE_V8DF_V8SF_QI:
36283 case V8SI_FTYPE_V8DF_V8SI_QI:
36284 case V8SI_FTYPE_V8DI_V8SI_QI:
36285 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36286 nargs = 3;
36287 break;
36288 case V32QI_FTYPE_V32QI_V32QI_INT:
36289 case V16HI_FTYPE_V16HI_V16HI_INT:
36290 case V16QI_FTYPE_V16QI_V16QI_INT:
36291 case V4DI_FTYPE_V4DI_V4DI_INT:
36292 case V8HI_FTYPE_V8HI_V8HI_INT:
36293 case V8SI_FTYPE_V8SI_V8SI_INT:
36294 case V8SI_FTYPE_V8SI_V4SI_INT:
36295 case V8SF_FTYPE_V8SF_V8SF_INT:
36296 case V8SF_FTYPE_V8SF_V4SF_INT:
36297 case V4SI_FTYPE_V4SI_V4SI_INT:
36298 case V4DF_FTYPE_V4DF_V4DF_INT:
36299 case V16SF_FTYPE_V16SF_V16SF_INT:
36300 case V16SF_FTYPE_V16SF_V4SF_INT:
36301 case V16SI_FTYPE_V16SI_V4SI_INT:
36302 case V4DF_FTYPE_V4DF_V2DF_INT:
36303 case V4SF_FTYPE_V4SF_V4SF_INT:
36304 case V2DI_FTYPE_V2DI_V2DI_INT:
36305 case V4DI_FTYPE_V4DI_V2DI_INT:
36306 case V2DF_FTYPE_V2DF_V2DF_INT:
36307 case QI_FTYPE_V8DI_V8DI_INT:
36308 case QI_FTYPE_V8DF_V8DF_INT:
36309 case QI_FTYPE_V2DF_V2DF_INT:
36310 case QI_FTYPE_V4SF_V4SF_INT:
36311 case HI_FTYPE_V16SI_V16SI_INT:
36312 case HI_FTYPE_V16SF_V16SF_INT:
36313 nargs = 3;
36314 nargs_constant = 1;
36315 break;
36316 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36317 nargs = 3;
36318 rmode = V4DImode;
36319 nargs_constant = 1;
36320 break;
36321 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36322 nargs = 3;
36323 rmode = V2DImode;
36324 nargs_constant = 1;
36325 break;
36326 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36327 nargs = 3;
36328 rmode = DImode;
36329 nargs_constant = 1;
36330 break;
36331 case V2DI_FTYPE_V2DI_UINT_UINT:
36332 nargs = 3;
36333 nargs_constant = 2;
36334 break;
36335 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36336 nargs = 3;
36337 rmode = V8DImode;
36338 nargs_constant = 1;
36339 break;
36340 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36341 nargs = 5;
36342 rmode = V8DImode;
36343 mask_pos = 2;
36344 nargs_constant = 1;
36345 break;
36346 case QI_FTYPE_V8DF_INT_QI:
36347 case QI_FTYPE_V4DF_INT_QI:
36348 case QI_FTYPE_V2DF_INT_QI:
36349 case HI_FTYPE_V16SF_INT_HI:
36350 case QI_FTYPE_V8SF_INT_QI:
36351 case QI_FTYPE_V4SF_INT_QI:
36352 nargs = 3;
36353 mask_pos = 1;
36354 nargs_constant = 1;
36355 break;
36356 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36357 nargs = 5;
36358 rmode = V4DImode;
36359 mask_pos = 2;
36360 nargs_constant = 1;
36361 break;
36362 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36363 nargs = 5;
36364 rmode = V2DImode;
36365 mask_pos = 2;
36366 nargs_constant = 1;
36367 break;
36368 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36369 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36370 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36371 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36372 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36373 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36374 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36375 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36376 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36377 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36378 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36379 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36380 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36381 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36382 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36383 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36384 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36385 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36386 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36387 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36388 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36389 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36390 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36391 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36392 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36393 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36394 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36395 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36396 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36397 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36398 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36399 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36400 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36401 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36402 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36403 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36404 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36405 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36406 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36407 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36408 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36409 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36410 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36411 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36412 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36413 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36414 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36415 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36416 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36417 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36418 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36419 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36420 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36421 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36422 nargs = 4;
36423 break;
36424 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36425 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
36426 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
36427 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
36428 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
36429 nargs = 4;
36430 nargs_constant = 1;
36431 break;
36432 case QI_FTYPE_V4DI_V4DI_INT_QI:
36433 case QI_FTYPE_V8SI_V8SI_INT_QI:
36434 case QI_FTYPE_V4DF_V4DF_INT_QI:
36435 case QI_FTYPE_V8SF_V8SF_INT_QI:
36436 case QI_FTYPE_V2DI_V2DI_INT_QI:
36437 case QI_FTYPE_V4SI_V4SI_INT_QI:
36438 case QI_FTYPE_V2DF_V2DF_INT_QI:
36439 case QI_FTYPE_V4SF_V4SF_INT_QI:
36440 case DI_FTYPE_V64QI_V64QI_INT_DI:
36441 case SI_FTYPE_V32QI_V32QI_INT_SI:
36442 case HI_FTYPE_V16QI_V16QI_INT_HI:
36443 case SI_FTYPE_V32HI_V32HI_INT_SI:
36444 case HI_FTYPE_V16HI_V16HI_INT_HI:
36445 case QI_FTYPE_V8HI_V8HI_INT_QI:
36446 nargs = 4;
36447 mask_pos = 1;
36448 nargs_constant = 1;
36449 break;
36450 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
36451 nargs = 4;
36452 nargs_constant = 2;
36453 break;
36454 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
36455 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
36456 nargs = 4;
36457 break;
36458 case QI_FTYPE_V8DI_V8DI_INT_QI:
36459 case HI_FTYPE_V16SI_V16SI_INT_HI:
36460 case QI_FTYPE_V8DF_V8DF_INT_QI:
36461 case HI_FTYPE_V16SF_V16SF_INT_HI:
36462 mask_pos = 1;
36463 nargs = 4;
36464 nargs_constant = 1;
36465 break;
36466 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
36467 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
36468 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
36469 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
36470 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
36471 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
36472 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
36473 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
36474 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
36475 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
36476 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
36477 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
36478 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
36479 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
36480 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
36481 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
36482 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
36483 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
36484 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
36485 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
36486 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
36487 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
36488 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
36489 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
36490 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
36491 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
36492 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
36493 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
36494 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
36495 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
36496 nargs = 4;
36497 mask_pos = 2;
36498 nargs_constant = 1;
36499 break;
36500 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
36501 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
36502 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
36503 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
36504 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
36505 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
36506 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
36507 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
36508 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
36509 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
36510 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
36511 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
36512 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
36513 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
36514 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
36515 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
36516 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
36517 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
36518 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
36519 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
36520 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
36521 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
36522 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
36523 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
36524 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
36525 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
36526 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
36527 nargs = 5;
36528 mask_pos = 2;
36529 nargs_constant = 1;
36530 break;
36531 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
36532 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
36533 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
36534 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
36535 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
36536 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
36537 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
36538 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
36539 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
36540 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
36541 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
36542 nargs = 5;
36543 nargs = 5;
36544 mask_pos = 1;
36545 nargs_constant = 1;
36546 break;
36547
36548 default:
36549 gcc_unreachable ();
36550 }
36551
36552 gcc_assert (nargs <= ARRAY_SIZE (args));
36553
36554 if (comparison != UNKNOWN)
36555 {
36556 gcc_assert (nargs == 2);
36557 return ix86_expand_sse_compare (d, exp, target, swap);
36558 }
36559
36560 if (rmode == VOIDmode || rmode == tmode)
36561 {
36562 if (optimize
36563 || target == 0
36564 || GET_MODE (target) != tmode
36565 || !insn_p->operand[0].predicate (target, tmode))
36566 target = gen_reg_rtx (tmode);
36567 real_target = target;
36568 }
36569 else
36570 {
36571 real_target = gen_reg_rtx (tmode);
36572 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
36573 }
36574
36575 for (i = 0; i < nargs; i++)
36576 {
36577 tree arg = CALL_EXPR_ARG (exp, i);
36578 rtx op = expand_normal (arg);
36579 enum machine_mode mode = insn_p->operand[i + 1].mode;
36580 bool match = insn_p->operand[i + 1].predicate (op, mode);
36581
36582 if (last_arg_count && (i + 1) == nargs)
36583 {
36584 /* SIMD shift insns take either an 8-bit immediate or
36585 register as count. But builtin functions take int as
36586 count. If count doesn't match, we put it in register. */
36587 if (!match)
36588 {
36589 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
36590 if (!insn_p->operand[i + 1].predicate (op, mode))
36591 op = copy_to_reg (op);
36592 }
36593 }
36594 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
36595 (!mask_pos && (nargs - i) <= nargs_constant))
36596 {
36597 if (!match)
36598 switch (icode)
36599 {
36600 case CODE_FOR_avx_vinsertf128v4di:
36601 case CODE_FOR_avx_vextractf128v4di:
36602 error ("the last argument must be an 1-bit immediate");
36603 return const0_rtx;
36604
36605 case CODE_FOR_avx512f_cmpv8di3_mask:
36606 case CODE_FOR_avx512f_cmpv16si3_mask:
36607 case CODE_FOR_avx512f_ucmpv8di3_mask:
36608 case CODE_FOR_avx512f_ucmpv16si3_mask:
36609 case CODE_FOR_avx512vl_cmpv4di3_mask:
36610 case CODE_FOR_avx512vl_cmpv8si3_mask:
36611 case CODE_FOR_avx512vl_ucmpv4di3_mask:
36612 case CODE_FOR_avx512vl_ucmpv8si3_mask:
36613 case CODE_FOR_avx512vl_cmpv2di3_mask:
36614 case CODE_FOR_avx512vl_cmpv4si3_mask:
36615 case CODE_FOR_avx512vl_ucmpv2di3_mask:
36616 case CODE_FOR_avx512vl_ucmpv4si3_mask:
36617 error ("the last argument must be a 3-bit immediate");
36618 return const0_rtx;
36619
36620 case CODE_FOR_sse4_1_roundsd:
36621 case CODE_FOR_sse4_1_roundss:
36622
36623 case CODE_FOR_sse4_1_roundpd:
36624 case CODE_FOR_sse4_1_roundps:
36625 case CODE_FOR_avx_roundpd256:
36626 case CODE_FOR_avx_roundps256:
36627
36628 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
36629 case CODE_FOR_sse4_1_roundps_sfix:
36630 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
36631 case CODE_FOR_avx_roundps_sfix256:
36632
36633 case CODE_FOR_sse4_1_blendps:
36634 case CODE_FOR_avx_blendpd256:
36635 case CODE_FOR_avx_vpermilv4df:
36636 case CODE_FOR_avx_vpermilv4df_mask:
36637 case CODE_FOR_avx512f_getmantv8df_mask:
36638 case CODE_FOR_avx512f_getmantv16sf_mask:
36639 case CODE_FOR_avx512vl_getmantv8sf_mask:
36640 case CODE_FOR_avx512vl_getmantv4df_mask:
36641 case CODE_FOR_avx512vl_getmantv4sf_mask:
36642 case CODE_FOR_avx512vl_getmantv2df_mask:
36643 case CODE_FOR_avx512dq_rangepv8df_mask_round:
36644 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
36645 case CODE_FOR_avx512dq_rangepv4df_mask:
36646 case CODE_FOR_avx512dq_rangepv8sf_mask:
36647 case CODE_FOR_avx512dq_rangepv2df_mask:
36648 case CODE_FOR_avx512dq_rangepv4sf_mask:
36649 case CODE_FOR_avx_shufpd256_mask:
36650 error ("the last argument must be a 4-bit immediate");
36651 return const0_rtx;
36652
36653 case CODE_FOR_sha1rnds4:
36654 case CODE_FOR_sse4_1_blendpd:
36655 case CODE_FOR_avx_vpermilv2df:
36656 case CODE_FOR_avx_vpermilv2df_mask:
36657 case CODE_FOR_xop_vpermil2v2df3:
36658 case CODE_FOR_xop_vpermil2v4sf3:
36659 case CODE_FOR_xop_vpermil2v4df3:
36660 case CODE_FOR_xop_vpermil2v8sf3:
36661 case CODE_FOR_avx512f_vinsertf32x4_mask:
36662 case CODE_FOR_avx512f_vinserti32x4_mask:
36663 case CODE_FOR_avx512f_vextractf32x4_mask:
36664 case CODE_FOR_avx512f_vextracti32x4_mask:
36665 case CODE_FOR_sse2_shufpd:
36666 case CODE_FOR_sse2_shufpd_mask:
36667 case CODE_FOR_avx512dq_shuf_f64x2_mask:
36668 case CODE_FOR_avx512dq_shuf_i64x2_mask:
36669 case CODE_FOR_avx512vl_shuf_i32x4_mask:
36670 case CODE_FOR_avx512vl_shuf_f32x4_mask:
36671 error ("the last argument must be a 2-bit immediate");
36672 return const0_rtx;
36673
36674 case CODE_FOR_avx_vextractf128v4df:
36675 case CODE_FOR_avx_vextractf128v8sf:
36676 case CODE_FOR_avx_vextractf128v8si:
36677 case CODE_FOR_avx_vinsertf128v4df:
36678 case CODE_FOR_avx_vinsertf128v8sf:
36679 case CODE_FOR_avx_vinsertf128v8si:
36680 case CODE_FOR_avx512f_vinsertf64x4_mask:
36681 case CODE_FOR_avx512f_vinserti64x4_mask:
36682 case CODE_FOR_avx512f_vextractf64x4_mask:
36683 case CODE_FOR_avx512f_vextracti64x4_mask:
36684 case CODE_FOR_avx512dq_vinsertf32x8_mask:
36685 case CODE_FOR_avx512dq_vinserti32x8_mask:
36686 case CODE_FOR_avx512vl_vinsertv4df:
36687 case CODE_FOR_avx512vl_vinsertv4di:
36688 case CODE_FOR_avx512vl_vinsertv8sf:
36689 case CODE_FOR_avx512vl_vinsertv8si:
36690 error ("the last argument must be a 1-bit immediate");
36691 return const0_rtx;
36692
36693 case CODE_FOR_avx_vmcmpv2df3:
36694 case CODE_FOR_avx_vmcmpv4sf3:
36695 case CODE_FOR_avx_cmpv2df3:
36696 case CODE_FOR_avx_cmpv4sf3:
36697 case CODE_FOR_avx_cmpv4df3:
36698 case CODE_FOR_avx_cmpv8sf3:
36699 case CODE_FOR_avx512f_cmpv8df3_mask:
36700 case CODE_FOR_avx512f_cmpv16sf3_mask:
36701 case CODE_FOR_avx512f_vmcmpv2df3_mask:
36702 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
36703 error ("the last argument must be a 5-bit immediate");
36704 return const0_rtx;
36705
36706 default:
36707 switch (nargs_constant)
36708 {
36709 case 2:
36710 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
36711 (!mask_pos && (nargs - i) == nargs_constant))
36712 {
36713 error ("the next to last argument must be an 8-bit immediate");
36714 break;
36715 }
36716 case 1:
36717 error ("the last argument must be an 8-bit immediate");
36718 break;
36719 default:
36720 gcc_unreachable ();
36721 }
36722 return const0_rtx;
36723 }
36724 }
36725 else
36726 {
36727 if (VECTOR_MODE_P (mode))
36728 op = safe_vector_operand (op, mode);
36729
36730 /* If we aren't optimizing, only allow one memory operand to
36731 be generated. */
36732 if (memory_operand (op, mode))
36733 num_memory++;
36734
36735 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36736 {
36737 if (optimize || !match || num_memory > 1)
36738 op = copy_to_mode_reg (mode, op);
36739 }
36740 else
36741 {
36742 op = copy_to_reg (op);
36743 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
36744 }
36745 }
36746
36747 args[i].op = op;
36748 args[i].mode = mode;
36749 }
36750
36751 switch (nargs)
36752 {
36753 case 1:
36754 pat = GEN_FCN (icode) (real_target, args[0].op);
36755 break;
36756 case 2:
36757 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
36758 break;
36759 case 3:
36760 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36761 args[2].op);
36762 break;
36763 case 4:
36764 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36765 args[2].op, args[3].op);
36766 break;
36767 case 5:
36768 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36769 args[2].op, args[3].op, args[4].op);
36770 case 6:
36771 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36772 args[2].op, args[3].op, args[4].op,
36773 args[5].op);
36774 break;
36775 default:
36776 gcc_unreachable ();
36777 }
36778
36779 if (! pat)
36780 return 0;
36781
36782 emit_insn (pat);
36783 return target;
36784 }
36785
36786 /* Transform pattern of following layout:
36787 (parallel [
36788 set (A B)
36789 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
36790 ])
36791 into:
36792 (set (A B))
36793
36794 Or:
36795 (parallel [ A B
36796 ...
36797 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
36798 ...
36799 ])
36800 into:
36801 (parallel [ A B ... ]) */
36802
36803 static rtx
36804 ix86_erase_embedded_rounding (rtx pat)
36805 {
36806 if (GET_CODE (pat) == INSN)
36807 pat = PATTERN (pat);
36808
36809 gcc_assert (GET_CODE (pat) == PARALLEL);
36810
36811 if (XVECLEN (pat, 0) == 2)
36812 {
36813 rtx p0 = XVECEXP (pat, 0, 0);
36814 rtx p1 = XVECEXP (pat, 0, 1);
36815
36816 gcc_assert (GET_CODE (p0) == SET
36817 && GET_CODE (p1) == UNSPEC
36818 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
36819
36820 return p0;
36821 }
36822 else
36823 {
36824 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
36825 int i = 0;
36826 int j = 0;
36827
36828 for (; i < XVECLEN (pat, 0); ++i)
36829 {
36830 rtx elem = XVECEXP (pat, 0, i);
36831 if (GET_CODE (elem) != UNSPEC
36832 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
36833 res [j++] = elem;
36834 }
36835
36836 /* No more than 1 occurence was removed. */
36837 gcc_assert (j >= XVECLEN (pat, 0) - 1);
36838
36839 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
36840 }
36841 }
36842
36843 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
36844 with rounding. */
36845 static rtx
36846 ix86_expand_sse_comi_round (const struct builtin_description *d,
36847 tree exp, rtx target)
36848 {
36849 rtx pat, set_dst;
36850 tree arg0 = CALL_EXPR_ARG (exp, 0);
36851 tree arg1 = CALL_EXPR_ARG (exp, 1);
36852 tree arg2 = CALL_EXPR_ARG (exp, 2);
36853 tree arg3 = CALL_EXPR_ARG (exp, 3);
36854 rtx op0 = expand_normal (arg0);
36855 rtx op1 = expand_normal (arg1);
36856 rtx op2 = expand_normal (arg2);
36857 rtx op3 = expand_normal (arg3);
36858 enum insn_code icode = d->icode;
36859 const struct insn_data_d *insn_p = &insn_data[icode];
36860 enum machine_mode mode0 = insn_p->operand[0].mode;
36861 enum machine_mode mode1 = insn_p->operand[1].mode;
36862 enum rtx_code comparison = UNEQ;
36863 bool need_ucomi = false;
36864
36865 /* See avxintrin.h for values. */
36866 enum rtx_code comi_comparisons[32] =
36867 {
36868 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
36869 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
36870 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
36871 };
36872 bool need_ucomi_values[32] =
36873 {
36874 true, false, false, true, true, false, false, true,
36875 true, false, false, true, true, false, false, true,
36876 false, true, true, false, false, true, true, false,
36877 false, true, true, false, false, true, true, false
36878 };
36879
36880 if (!CONST_INT_P (op2))
36881 {
36882 error ("the third argument must be comparison constant");
36883 return const0_rtx;
36884 }
36885 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
36886 {
36887 error ("incorect comparison mode");
36888 return const0_rtx;
36889 }
36890
36891 if (!insn_p->operand[2].predicate (op3, SImode))
36892 {
36893 error ("incorrect rounding operand");
36894 return const0_rtx;
36895 }
36896
36897 comparison = comi_comparisons[INTVAL (op2)];
36898 need_ucomi = need_ucomi_values[INTVAL (op2)];
36899
36900 if (VECTOR_MODE_P (mode0))
36901 op0 = safe_vector_operand (op0, mode0);
36902 if (VECTOR_MODE_P (mode1))
36903 op1 = safe_vector_operand (op1, mode1);
36904
36905 target = gen_reg_rtx (SImode);
36906 emit_move_insn (target, const0_rtx);
36907 target = gen_rtx_SUBREG (QImode, target, 0);
36908
36909 if ((optimize && !register_operand (op0, mode0))
36910 || !insn_p->operand[0].predicate (op0, mode0))
36911 op0 = copy_to_mode_reg (mode0, op0);
36912 if ((optimize && !register_operand (op1, mode1))
36913 || !insn_p->operand[1].predicate (op1, mode1))
36914 op1 = copy_to_mode_reg (mode1, op1);
36915
36916 if (need_ucomi)
36917 icode = icode == CODE_FOR_sse_comi_round
36918 ? CODE_FOR_sse_ucomi_round
36919 : CODE_FOR_sse2_ucomi_round;
36920
36921 pat = GEN_FCN (icode) (op0, op1, op3);
36922 if (! pat)
36923 return 0;
36924
36925 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
36926 if (INTVAL (op3) == NO_ROUND)
36927 {
36928 pat = ix86_erase_embedded_rounding (pat);
36929 if (! pat)
36930 return 0;
36931
36932 set_dst = SET_DEST (pat);
36933 }
36934 else
36935 {
36936 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
36937 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
36938 }
36939
36940 emit_insn (pat);
36941 emit_insn (gen_rtx_SET (VOIDmode,
36942 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36943 gen_rtx_fmt_ee (comparison, QImode,
36944 set_dst,
36945 const0_rtx)));
36946
36947 return SUBREG_REG (target);
36948 }
36949
36950 static rtx
36951 ix86_expand_round_builtin (const struct builtin_description *d,
36952 tree exp, rtx target)
36953 {
36954 rtx pat;
36955 unsigned int i, nargs;
36956 struct
36957 {
36958 rtx op;
36959 enum machine_mode mode;
36960 } args[6];
36961 enum insn_code icode = d->icode;
36962 const struct insn_data_d *insn_p = &insn_data[icode];
36963 enum machine_mode tmode = insn_p->operand[0].mode;
36964 unsigned int nargs_constant = 0;
36965 unsigned int redundant_embed_rnd = 0;
36966
36967 switch ((enum ix86_builtin_func_type) d->flag)
36968 {
36969 case UINT64_FTYPE_V2DF_INT:
36970 case UINT64_FTYPE_V4SF_INT:
36971 case UINT_FTYPE_V2DF_INT:
36972 case UINT_FTYPE_V4SF_INT:
36973 case INT64_FTYPE_V2DF_INT:
36974 case INT64_FTYPE_V4SF_INT:
36975 case INT_FTYPE_V2DF_INT:
36976 case INT_FTYPE_V4SF_INT:
36977 nargs = 2;
36978 break;
36979 case V4SF_FTYPE_V4SF_UINT_INT:
36980 case V4SF_FTYPE_V4SF_UINT64_INT:
36981 case V2DF_FTYPE_V2DF_UINT64_INT:
36982 case V4SF_FTYPE_V4SF_INT_INT:
36983 case V4SF_FTYPE_V4SF_INT64_INT:
36984 case V2DF_FTYPE_V2DF_INT64_INT:
36985 case V4SF_FTYPE_V4SF_V4SF_INT:
36986 case V2DF_FTYPE_V2DF_V2DF_INT:
36987 case V4SF_FTYPE_V4SF_V2DF_INT:
36988 case V2DF_FTYPE_V2DF_V4SF_INT:
36989 nargs = 3;
36990 break;
36991 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
36992 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
36993 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
36994 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
36995 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
36996 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
36997 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
36998 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
36999 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37000 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37001 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37002 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37003 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37004 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37005 nargs = 4;
37006 break;
37007 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37008 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37009 nargs_constant = 2;
37010 nargs = 4;
37011 break;
37012 case INT_FTYPE_V4SF_V4SF_INT_INT:
37013 case INT_FTYPE_V2DF_V2DF_INT_INT:
37014 return ix86_expand_sse_comi_round (d, exp, target);
37015 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37016 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37017 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37018 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37019 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37020 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37021 nargs = 5;
37022 break;
37023 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37024 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37025 nargs_constant = 4;
37026 nargs = 5;
37027 break;
37028 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37029 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37030 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37031 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37032 nargs_constant = 3;
37033 nargs = 5;
37034 break;
37035 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37036 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37037 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37038 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37039 nargs = 6;
37040 nargs_constant = 4;
37041 break;
37042 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37043 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37044 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37045 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37046 nargs = 6;
37047 nargs_constant = 3;
37048 break;
37049 default:
37050 gcc_unreachable ();
37051 }
37052 gcc_assert (nargs <= ARRAY_SIZE (args));
37053
37054 if (optimize
37055 || target == 0
37056 || GET_MODE (target) != tmode
37057 || !insn_p->operand[0].predicate (target, tmode))
37058 target = gen_reg_rtx (tmode);
37059
37060 for (i = 0; i < nargs; i++)
37061 {
37062 tree arg = CALL_EXPR_ARG (exp, i);
37063 rtx op = expand_normal (arg);
37064 enum machine_mode mode = insn_p->operand[i + 1].mode;
37065 bool match = insn_p->operand[i + 1].predicate (op, mode);
37066
37067 if (i == nargs - nargs_constant)
37068 {
37069 if (!match)
37070 {
37071 switch (icode)
37072 {
37073 case CODE_FOR_avx512f_getmantv8df_mask_round:
37074 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37075 case CODE_FOR_avx512f_vgetmantv2df_round:
37076 case CODE_FOR_avx512f_vgetmantv4sf_round:
37077 error ("the immediate argument must be a 4-bit immediate");
37078 return const0_rtx;
37079 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37080 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37081 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37082 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37083 error ("the immediate argument must be a 5-bit immediate");
37084 return const0_rtx;
37085 default:
37086 error ("the immediate argument must be an 8-bit immediate");
37087 return const0_rtx;
37088 }
37089 }
37090 }
37091 else if (i == nargs-1)
37092 {
37093 if (!insn_p->operand[nargs].predicate (op, SImode))
37094 {
37095 error ("incorrect rounding operand");
37096 return const0_rtx;
37097 }
37098
37099 /* If there is no rounding use normal version of the pattern. */
37100 if (INTVAL (op) == NO_ROUND)
37101 redundant_embed_rnd = 1;
37102 }
37103 else
37104 {
37105 if (VECTOR_MODE_P (mode))
37106 op = safe_vector_operand (op, mode);
37107
37108 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37109 {
37110 if (optimize || !match)
37111 op = copy_to_mode_reg (mode, op);
37112 }
37113 else
37114 {
37115 op = copy_to_reg (op);
37116 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37117 }
37118 }
37119
37120 args[i].op = op;
37121 args[i].mode = mode;
37122 }
37123
37124 switch (nargs)
37125 {
37126 case 1:
37127 pat = GEN_FCN (icode) (target, args[0].op);
37128 break;
37129 case 2:
37130 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37131 break;
37132 case 3:
37133 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37134 args[2].op);
37135 break;
37136 case 4:
37137 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37138 args[2].op, args[3].op);
37139 break;
37140 case 5:
37141 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37142 args[2].op, args[3].op, args[4].op);
37143 case 6:
37144 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37145 args[2].op, args[3].op, args[4].op,
37146 args[5].op);
37147 break;
37148 default:
37149 gcc_unreachable ();
37150 }
37151
37152 if (!pat)
37153 return 0;
37154
37155 if (redundant_embed_rnd)
37156 pat = ix86_erase_embedded_rounding (pat);
37157
37158 emit_insn (pat);
37159 return target;
37160 }
37161
37162 /* Subroutine of ix86_expand_builtin to take care of special insns
37163 with variable number of operands. */
37164
37165 static rtx
37166 ix86_expand_special_args_builtin (const struct builtin_description *d,
37167 tree exp, rtx target)
37168 {
37169 tree arg;
37170 rtx pat, op;
37171 unsigned int i, nargs, arg_adjust, memory;
37172 bool aligned_mem = false;
37173 struct
37174 {
37175 rtx op;
37176 enum machine_mode mode;
37177 } args[3];
37178 enum insn_code icode = d->icode;
37179 bool last_arg_constant = false;
37180 const struct insn_data_d *insn_p = &insn_data[icode];
37181 enum machine_mode tmode = insn_p->operand[0].mode;
37182 enum { load, store } klass;
37183
37184 switch ((enum ix86_builtin_func_type) d->flag)
37185 {
37186 case VOID_FTYPE_VOID:
37187 emit_insn (GEN_FCN (icode) (target));
37188 return 0;
37189 case VOID_FTYPE_UINT64:
37190 case VOID_FTYPE_UNSIGNED:
37191 nargs = 0;
37192 klass = store;
37193 memory = 0;
37194 break;
37195
37196 case INT_FTYPE_VOID:
37197 case USHORT_FTYPE_VOID:
37198 case UINT64_FTYPE_VOID:
37199 case UNSIGNED_FTYPE_VOID:
37200 nargs = 0;
37201 klass = load;
37202 memory = 0;
37203 break;
37204 case UINT64_FTYPE_PUNSIGNED:
37205 case V2DI_FTYPE_PV2DI:
37206 case V4DI_FTYPE_PV4DI:
37207 case V32QI_FTYPE_PCCHAR:
37208 case V16QI_FTYPE_PCCHAR:
37209 case V8SF_FTYPE_PCV4SF:
37210 case V8SF_FTYPE_PCFLOAT:
37211 case V4SF_FTYPE_PCFLOAT:
37212 case V4DF_FTYPE_PCV2DF:
37213 case V4DF_FTYPE_PCDOUBLE:
37214 case V2DF_FTYPE_PCDOUBLE:
37215 case VOID_FTYPE_PVOID:
37216 case V16SI_FTYPE_PV4SI:
37217 case V16SF_FTYPE_PV4SF:
37218 case V8DI_FTYPE_PV4DI:
37219 case V8DI_FTYPE_PV8DI:
37220 case V8DF_FTYPE_PV4DF:
37221 nargs = 1;
37222 klass = load;
37223 memory = 0;
37224 switch (icode)
37225 {
37226 case CODE_FOR_sse4_1_movntdqa:
37227 case CODE_FOR_avx2_movntdqa:
37228 case CODE_FOR_avx512f_movntdqa:
37229 aligned_mem = true;
37230 break;
37231 default:
37232 break;
37233 }
37234 break;
37235 case VOID_FTYPE_PV2SF_V4SF:
37236 case VOID_FTYPE_PV8DI_V8DI:
37237 case VOID_FTYPE_PV4DI_V4DI:
37238 case VOID_FTYPE_PV2DI_V2DI:
37239 case VOID_FTYPE_PCHAR_V32QI:
37240 case VOID_FTYPE_PCHAR_V16QI:
37241 case VOID_FTYPE_PFLOAT_V16SF:
37242 case VOID_FTYPE_PFLOAT_V8SF:
37243 case VOID_FTYPE_PFLOAT_V4SF:
37244 case VOID_FTYPE_PDOUBLE_V8DF:
37245 case VOID_FTYPE_PDOUBLE_V4DF:
37246 case VOID_FTYPE_PDOUBLE_V2DF:
37247 case VOID_FTYPE_PLONGLONG_LONGLONG:
37248 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37249 case VOID_FTYPE_PINT_INT:
37250 nargs = 1;
37251 klass = store;
37252 /* Reserve memory operand for target. */
37253 memory = ARRAY_SIZE (args);
37254 switch (icode)
37255 {
37256 /* These builtins and instructions require the memory
37257 to be properly aligned. */
37258 case CODE_FOR_avx_movntv4di:
37259 case CODE_FOR_sse2_movntv2di:
37260 case CODE_FOR_avx_movntv8sf:
37261 case CODE_FOR_sse_movntv4sf:
37262 case CODE_FOR_sse4a_vmmovntv4sf:
37263 case CODE_FOR_avx_movntv4df:
37264 case CODE_FOR_sse2_movntv2df:
37265 case CODE_FOR_sse4a_vmmovntv2df:
37266 case CODE_FOR_sse2_movntidi:
37267 case CODE_FOR_sse_movntq:
37268 case CODE_FOR_sse2_movntisi:
37269 case CODE_FOR_avx512f_movntv16sf:
37270 case CODE_FOR_avx512f_movntv8df:
37271 case CODE_FOR_avx512f_movntv8di:
37272 aligned_mem = true;
37273 break;
37274 default:
37275 break;
37276 }
37277 break;
37278 case V4SF_FTYPE_V4SF_PCV2SF:
37279 case V2DF_FTYPE_V2DF_PCDOUBLE:
37280 nargs = 2;
37281 klass = load;
37282 memory = 1;
37283 break;
37284 case V8SF_FTYPE_PCV8SF_V8SI:
37285 case V4DF_FTYPE_PCV4DF_V4DI:
37286 case V4SF_FTYPE_PCV4SF_V4SI:
37287 case V2DF_FTYPE_PCV2DF_V2DI:
37288 case V8SI_FTYPE_PCV8SI_V8SI:
37289 case V4DI_FTYPE_PCV4DI_V4DI:
37290 case V4SI_FTYPE_PCV4SI_V4SI:
37291 case V2DI_FTYPE_PCV2DI_V2DI:
37292 nargs = 2;
37293 klass = load;
37294 memory = 0;
37295 break;
37296 case VOID_FTYPE_PV8DF_V8DF_QI:
37297 case VOID_FTYPE_PV16SF_V16SF_HI:
37298 case VOID_FTYPE_PV8DI_V8DI_QI:
37299 case VOID_FTYPE_PV4DI_V4DI_QI:
37300 case VOID_FTYPE_PV2DI_V2DI_QI:
37301 case VOID_FTYPE_PV16SI_V16SI_HI:
37302 case VOID_FTYPE_PV8SI_V8SI_QI:
37303 case VOID_FTYPE_PV4SI_V4SI_QI:
37304 switch (icode)
37305 {
37306 /* These builtins and instructions require the memory
37307 to be properly aligned. */
37308 case CODE_FOR_avx512f_storev16sf_mask:
37309 case CODE_FOR_avx512f_storev16si_mask:
37310 case CODE_FOR_avx512f_storev8df_mask:
37311 case CODE_FOR_avx512f_storev8di_mask:
37312 case CODE_FOR_avx512vl_storev8sf_mask:
37313 case CODE_FOR_avx512vl_storev8si_mask:
37314 case CODE_FOR_avx512vl_storev4df_mask:
37315 case CODE_FOR_avx512vl_storev4di_mask:
37316 case CODE_FOR_avx512vl_storev4sf_mask:
37317 case CODE_FOR_avx512vl_storev4si_mask:
37318 case CODE_FOR_avx512vl_storev2df_mask:
37319 case CODE_FOR_avx512vl_storev2di_mask:
37320 aligned_mem = true;
37321 break;
37322 default:
37323 break;
37324 }
37325 /* FALLTHRU */
37326 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37327 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37328 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37329 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37330 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37331 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37332 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37333 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37334 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37335 case VOID_FTYPE_PFLOAT_V4SF_QI:
37336 case VOID_FTYPE_PV8SI_V8DI_QI:
37337 case VOID_FTYPE_PV8HI_V8DI_QI:
37338 case VOID_FTYPE_PV16HI_V16SI_HI:
37339 case VOID_FTYPE_PV16QI_V8DI_QI:
37340 case VOID_FTYPE_PV16QI_V16SI_HI:
37341 case VOID_FTYPE_PV4SI_V4DI_QI:
37342 case VOID_FTYPE_PV4SI_V2DI_QI:
37343 case VOID_FTYPE_PV8HI_V4DI_QI:
37344 case VOID_FTYPE_PV8HI_V2DI_QI:
37345 case VOID_FTYPE_PV8HI_V8SI_QI:
37346 case VOID_FTYPE_PV8HI_V4SI_QI:
37347 case VOID_FTYPE_PV16QI_V4DI_QI:
37348 case VOID_FTYPE_PV16QI_V2DI_QI:
37349 case VOID_FTYPE_PV16QI_V8SI_QI:
37350 case VOID_FTYPE_PV16QI_V4SI_QI:
37351 case VOID_FTYPE_PV8HI_V8HI_QI:
37352 case VOID_FTYPE_PV16HI_V16HI_HI:
37353 case VOID_FTYPE_PV32HI_V32HI_SI:
37354 case VOID_FTYPE_PV16QI_V16QI_HI:
37355 case VOID_FTYPE_PV32QI_V32QI_SI:
37356 case VOID_FTYPE_PV64QI_V64QI_DI:
37357 case VOID_FTYPE_PV4DF_V4DF_QI:
37358 case VOID_FTYPE_PV2DF_V2DF_QI:
37359 case VOID_FTYPE_PV8SF_V8SF_QI:
37360 case VOID_FTYPE_PV4SF_V4SF_QI:
37361 nargs = 2;
37362 klass = store;
37363 /* Reserve memory operand for target. */
37364 memory = ARRAY_SIZE (args);
37365 break;
37366 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37367 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37368 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37369 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37370 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37371 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37372 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37373 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37374 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37375 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37376 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37377 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37378 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37379 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37380 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37381 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37382 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37383 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37384 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37385 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37386 nargs = 3;
37387 klass = load;
37388 memory = 0;
37389 switch (icode)
37390 {
37391 /* These builtins and instructions require the memory
37392 to be properly aligned. */
37393 case CODE_FOR_avx512f_loadv16sf_mask:
37394 case CODE_FOR_avx512f_loadv16si_mask:
37395 case CODE_FOR_avx512f_loadv8df_mask:
37396 case CODE_FOR_avx512f_loadv8di_mask:
37397 case CODE_FOR_avx512vl_loadv8sf_mask:
37398 case CODE_FOR_avx512vl_loadv8si_mask:
37399 case CODE_FOR_avx512vl_loadv4df_mask:
37400 case CODE_FOR_avx512vl_loadv4di_mask:
37401 case CODE_FOR_avx512vl_loadv4sf_mask:
37402 case CODE_FOR_avx512vl_loadv4si_mask:
37403 case CODE_FOR_avx512vl_loadv2df_mask:
37404 case CODE_FOR_avx512vl_loadv2di_mask:
37405 case CODE_FOR_avx512bw_loadv64qi_mask:
37406 case CODE_FOR_avx512vl_loadv32qi_mask:
37407 case CODE_FOR_avx512vl_loadv16qi_mask:
37408 case CODE_FOR_avx512bw_loadv32hi_mask:
37409 case CODE_FOR_avx512vl_loadv16hi_mask:
37410 case CODE_FOR_avx512vl_loadv8hi_mask:
37411 aligned_mem = true;
37412 break;
37413 default:
37414 break;
37415 }
37416 break;
37417 case VOID_FTYPE_UINT_UINT_UINT:
37418 case VOID_FTYPE_UINT64_UINT_UINT:
37419 case UCHAR_FTYPE_UINT_UINT_UINT:
37420 case UCHAR_FTYPE_UINT64_UINT_UINT:
37421 nargs = 3;
37422 klass = load;
37423 memory = ARRAY_SIZE (args);
37424 last_arg_constant = true;
37425 break;
37426 default:
37427 gcc_unreachable ();
37428 }
37429
37430 gcc_assert (nargs <= ARRAY_SIZE (args));
37431
37432 if (klass == store)
37433 {
37434 arg = CALL_EXPR_ARG (exp, 0);
37435 op = expand_normal (arg);
37436 gcc_assert (target == 0);
37437 if (memory)
37438 {
37439 op = ix86_zero_extend_to_Pmode (op);
37440 target = gen_rtx_MEM (tmode, op);
37441 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
37442 on it. Try to improve it using get_pointer_alignment,
37443 and if the special builtin is one that requires strict
37444 mode alignment, also from it's GET_MODE_ALIGNMENT.
37445 Failure to do so could lead to ix86_legitimate_combined_insn
37446 rejecting all changes to such insns. */
37447 unsigned int align = get_pointer_alignment (arg);
37448 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
37449 align = GET_MODE_ALIGNMENT (tmode);
37450 if (MEM_ALIGN (target) < align)
37451 set_mem_align (target, align);
37452 }
37453 else
37454 target = force_reg (tmode, op);
37455 arg_adjust = 1;
37456 }
37457 else
37458 {
37459 arg_adjust = 0;
37460 if (optimize
37461 || target == 0
37462 || !register_operand (target, tmode)
37463 || GET_MODE (target) != tmode)
37464 target = gen_reg_rtx (tmode);
37465 }
37466
37467 for (i = 0; i < nargs; i++)
37468 {
37469 enum machine_mode mode = insn_p->operand[i + 1].mode;
37470 bool match;
37471
37472 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
37473 op = expand_normal (arg);
37474 match = insn_p->operand[i + 1].predicate (op, mode);
37475
37476 if (last_arg_constant && (i + 1) == nargs)
37477 {
37478 if (!match)
37479 {
37480 if (icode == CODE_FOR_lwp_lwpvalsi3
37481 || icode == CODE_FOR_lwp_lwpinssi3
37482 || icode == CODE_FOR_lwp_lwpvaldi3
37483 || icode == CODE_FOR_lwp_lwpinsdi3)
37484 error ("the last argument must be a 32-bit immediate");
37485 else
37486 error ("the last argument must be an 8-bit immediate");
37487 return const0_rtx;
37488 }
37489 }
37490 else
37491 {
37492 if (i == memory)
37493 {
37494 /* This must be the memory operand. */
37495 op = ix86_zero_extend_to_Pmode (op);
37496 op = gen_rtx_MEM (mode, op);
37497 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
37498 on it. Try to improve it using get_pointer_alignment,
37499 and if the special builtin is one that requires strict
37500 mode alignment, also from it's GET_MODE_ALIGNMENT.
37501 Failure to do so could lead to ix86_legitimate_combined_insn
37502 rejecting all changes to such insns. */
37503 unsigned int align = get_pointer_alignment (arg);
37504 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
37505 align = GET_MODE_ALIGNMENT (mode);
37506 if (MEM_ALIGN (op) < align)
37507 set_mem_align (op, align);
37508 }
37509 else
37510 {
37511 /* This must be register. */
37512 if (VECTOR_MODE_P (mode))
37513 op = safe_vector_operand (op, mode);
37514
37515 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37516 op = copy_to_mode_reg (mode, op);
37517 else
37518 {
37519 op = copy_to_reg (op);
37520 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37521 }
37522 }
37523 }
37524
37525 args[i].op = op;
37526 args[i].mode = mode;
37527 }
37528
37529 switch (nargs)
37530 {
37531 case 0:
37532 pat = GEN_FCN (icode) (target);
37533 break;
37534 case 1:
37535 pat = GEN_FCN (icode) (target, args[0].op);
37536 break;
37537 case 2:
37538 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37539 break;
37540 case 3:
37541 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37542 break;
37543 default:
37544 gcc_unreachable ();
37545 }
37546
37547 if (! pat)
37548 return 0;
37549 emit_insn (pat);
37550 return klass == store ? 0 : target;
37551 }
37552
37553 /* Return the integer constant in ARG. Constrain it to be in the range
37554 of the subparts of VEC_TYPE; issue an error if not. */
37555
37556 static int
37557 get_element_number (tree vec_type, tree arg)
37558 {
37559 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
37560
37561 if (!tree_fits_uhwi_p (arg)
37562 || (elt = tree_to_uhwi (arg), elt > max))
37563 {
37564 error ("selector must be an integer constant in the range 0..%wi", max);
37565 return 0;
37566 }
37567
37568 return elt;
37569 }
37570
37571 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37572 ix86_expand_vector_init. We DO have language-level syntax for this, in
37573 the form of (type){ init-list }. Except that since we can't place emms
37574 instructions from inside the compiler, we can't allow the use of MMX
37575 registers unless the user explicitly asks for it. So we do *not* define
37576 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
37577 we have builtins invoked by mmintrin.h that gives us license to emit
37578 these sorts of instructions. */
37579
37580 static rtx
37581 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
37582 {
37583 enum machine_mode tmode = TYPE_MODE (type);
37584 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
37585 int i, n_elt = GET_MODE_NUNITS (tmode);
37586 rtvec v = rtvec_alloc (n_elt);
37587
37588 gcc_assert (VECTOR_MODE_P (tmode));
37589 gcc_assert (call_expr_nargs (exp) == n_elt);
37590
37591 for (i = 0; i < n_elt; ++i)
37592 {
37593 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
37594 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
37595 }
37596
37597 if (!target || !register_operand (target, tmode))
37598 target = gen_reg_rtx (tmode);
37599
37600 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
37601 return target;
37602 }
37603
37604 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37605 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
37606 had a language-level syntax for referencing vector elements. */
37607
37608 static rtx
37609 ix86_expand_vec_ext_builtin (tree exp, rtx target)
37610 {
37611 enum machine_mode tmode, mode0;
37612 tree arg0, arg1;
37613 int elt;
37614 rtx op0;
37615
37616 arg0 = CALL_EXPR_ARG (exp, 0);
37617 arg1 = CALL_EXPR_ARG (exp, 1);
37618
37619 op0 = expand_normal (arg0);
37620 elt = get_element_number (TREE_TYPE (arg0), arg1);
37621
37622 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
37623 mode0 = TYPE_MODE (TREE_TYPE (arg0));
37624 gcc_assert (VECTOR_MODE_P (mode0));
37625
37626 op0 = force_reg (mode0, op0);
37627
37628 if (optimize || !target || !register_operand (target, tmode))
37629 target = gen_reg_rtx (tmode);
37630
37631 ix86_expand_vector_extract (true, target, op0, elt);
37632
37633 return target;
37634 }
37635
37636 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37637 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
37638 a language-level syntax for referencing vector elements. */
37639
37640 static rtx
37641 ix86_expand_vec_set_builtin (tree exp)
37642 {
37643 enum machine_mode tmode, mode1;
37644 tree arg0, arg1, arg2;
37645 int elt;
37646 rtx op0, op1, target;
37647
37648 arg0 = CALL_EXPR_ARG (exp, 0);
37649 arg1 = CALL_EXPR_ARG (exp, 1);
37650 arg2 = CALL_EXPR_ARG (exp, 2);
37651
37652 tmode = TYPE_MODE (TREE_TYPE (arg0));
37653 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
37654 gcc_assert (VECTOR_MODE_P (tmode));
37655
37656 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
37657 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
37658 elt = get_element_number (TREE_TYPE (arg0), arg2);
37659
37660 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
37661 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
37662
37663 op0 = force_reg (tmode, op0);
37664 op1 = force_reg (mode1, op1);
37665
37666 /* OP0 is the source of these builtin functions and shouldn't be
37667 modified. Create a copy, use it and return it as target. */
37668 target = gen_reg_rtx (tmode);
37669 emit_move_insn (target, op0);
37670 ix86_expand_vector_set (true, target, op1, elt);
37671
37672 return target;
37673 }
37674
37675 /* Expand an expression EXP that calls a built-in function,
37676 with result going to TARGET if that's convenient
37677 (and in mode MODE if that's convenient).
37678 SUBTARGET may be used as the target for computing one of EXP's operands.
37679 IGNORE is nonzero if the value is to be ignored. */
37680
37681 static rtx
37682 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
37683 enum machine_mode mode, int ignore)
37684 {
37685 const struct builtin_description *d;
37686 size_t i;
37687 enum insn_code icode;
37688 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
37689 tree arg0, arg1, arg2, arg3, arg4;
37690 rtx op0, op1, op2, op3, op4, pat, insn;
37691 enum machine_mode mode0, mode1, mode2, mode3, mode4;
37692 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
37693
37694 /* For CPU builtins that can be folded, fold first and expand the fold. */
37695 switch (fcode)
37696 {
37697 case IX86_BUILTIN_CPU_INIT:
37698 {
37699 /* Make it call __cpu_indicator_init in libgcc. */
37700 tree call_expr, fndecl, type;
37701 type = build_function_type_list (integer_type_node, NULL_TREE);
37702 fndecl = build_fn_decl ("__cpu_indicator_init", type);
37703 call_expr = build_call_expr (fndecl, 0);
37704 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
37705 }
37706 case IX86_BUILTIN_CPU_IS:
37707 case IX86_BUILTIN_CPU_SUPPORTS:
37708 {
37709 tree arg0 = CALL_EXPR_ARG (exp, 0);
37710 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
37711 gcc_assert (fold_expr != NULL_TREE);
37712 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
37713 }
37714 }
37715
37716 /* Determine whether the builtin function is available under the current ISA.
37717 Originally the builtin was not created if it wasn't applicable to the
37718 current ISA based on the command line switches. With function specific
37719 options, we need to check in the context of the function making the call
37720 whether it is supported. */
37721 if (ix86_builtins_isa[fcode].isa
37722 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
37723 {
37724 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
37725 NULL, (enum fpmath_unit) 0, false);
37726
37727 if (!opts)
37728 error ("%qE needs unknown isa option", fndecl);
37729 else
37730 {
37731 gcc_assert (opts != NULL);
37732 error ("%qE needs isa option %s", fndecl, opts);
37733 free (opts);
37734 }
37735 return const0_rtx;
37736 }
37737
37738 switch (fcode)
37739 {
37740 case IX86_BUILTIN_MASKMOVQ:
37741 case IX86_BUILTIN_MASKMOVDQU:
37742 icode = (fcode == IX86_BUILTIN_MASKMOVQ
37743 ? CODE_FOR_mmx_maskmovq
37744 : CODE_FOR_sse2_maskmovdqu);
37745 /* Note the arg order is different from the operand order. */
37746 arg1 = CALL_EXPR_ARG (exp, 0);
37747 arg2 = CALL_EXPR_ARG (exp, 1);
37748 arg0 = CALL_EXPR_ARG (exp, 2);
37749 op0 = expand_normal (arg0);
37750 op1 = expand_normal (arg1);
37751 op2 = expand_normal (arg2);
37752 mode0 = insn_data[icode].operand[0].mode;
37753 mode1 = insn_data[icode].operand[1].mode;
37754 mode2 = insn_data[icode].operand[2].mode;
37755
37756 op0 = ix86_zero_extend_to_Pmode (op0);
37757 op0 = gen_rtx_MEM (mode1, op0);
37758
37759 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37760 op0 = copy_to_mode_reg (mode0, op0);
37761 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37762 op1 = copy_to_mode_reg (mode1, op1);
37763 if (!insn_data[icode].operand[2].predicate (op2, mode2))
37764 op2 = copy_to_mode_reg (mode2, op2);
37765 pat = GEN_FCN (icode) (op0, op1, op2);
37766 if (! pat)
37767 return 0;
37768 emit_insn (pat);
37769 return 0;
37770
37771 case IX86_BUILTIN_LDMXCSR:
37772 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
37773 target = assign_386_stack_local (SImode, SLOT_TEMP);
37774 emit_move_insn (target, op0);
37775 emit_insn (gen_sse_ldmxcsr (target));
37776 return 0;
37777
37778 case IX86_BUILTIN_STMXCSR:
37779 target = assign_386_stack_local (SImode, SLOT_TEMP);
37780 emit_insn (gen_sse_stmxcsr (target));
37781 return copy_to_mode_reg (SImode, target);
37782
37783 case IX86_BUILTIN_CLFLUSH:
37784 arg0 = CALL_EXPR_ARG (exp, 0);
37785 op0 = expand_normal (arg0);
37786 icode = CODE_FOR_sse2_clflush;
37787 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37788 op0 = ix86_zero_extend_to_Pmode (op0);
37789
37790 emit_insn (gen_sse2_clflush (op0));
37791 return 0;
37792
37793 case IX86_BUILTIN_CLFLUSHOPT:
37794 arg0 = CALL_EXPR_ARG (exp, 0);
37795 op0 = expand_normal (arg0);
37796 icode = CODE_FOR_clflushopt;
37797 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37798 op0 = ix86_zero_extend_to_Pmode (op0);
37799
37800 emit_insn (gen_clflushopt (op0));
37801 return 0;
37802
37803 case IX86_BUILTIN_MONITOR:
37804 arg0 = CALL_EXPR_ARG (exp, 0);
37805 arg1 = CALL_EXPR_ARG (exp, 1);
37806 arg2 = CALL_EXPR_ARG (exp, 2);
37807 op0 = expand_normal (arg0);
37808 op1 = expand_normal (arg1);
37809 op2 = expand_normal (arg2);
37810 if (!REG_P (op0))
37811 op0 = ix86_zero_extend_to_Pmode (op0);
37812 if (!REG_P (op1))
37813 op1 = copy_to_mode_reg (SImode, op1);
37814 if (!REG_P (op2))
37815 op2 = copy_to_mode_reg (SImode, op2);
37816 emit_insn (ix86_gen_monitor (op0, op1, op2));
37817 return 0;
37818
37819 case IX86_BUILTIN_MWAIT:
37820 arg0 = CALL_EXPR_ARG (exp, 0);
37821 arg1 = CALL_EXPR_ARG (exp, 1);
37822 op0 = expand_normal (arg0);
37823 op1 = expand_normal (arg1);
37824 if (!REG_P (op0))
37825 op0 = copy_to_mode_reg (SImode, op0);
37826 if (!REG_P (op1))
37827 op1 = copy_to_mode_reg (SImode, op1);
37828 emit_insn (gen_sse3_mwait (op0, op1));
37829 return 0;
37830
37831 case IX86_BUILTIN_VEC_INIT_V2SI:
37832 case IX86_BUILTIN_VEC_INIT_V4HI:
37833 case IX86_BUILTIN_VEC_INIT_V8QI:
37834 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
37835
37836 case IX86_BUILTIN_VEC_EXT_V2DF:
37837 case IX86_BUILTIN_VEC_EXT_V2DI:
37838 case IX86_BUILTIN_VEC_EXT_V4SF:
37839 case IX86_BUILTIN_VEC_EXT_V4SI:
37840 case IX86_BUILTIN_VEC_EXT_V8HI:
37841 case IX86_BUILTIN_VEC_EXT_V2SI:
37842 case IX86_BUILTIN_VEC_EXT_V4HI:
37843 case IX86_BUILTIN_VEC_EXT_V16QI:
37844 return ix86_expand_vec_ext_builtin (exp, target);
37845
37846 case IX86_BUILTIN_VEC_SET_V2DI:
37847 case IX86_BUILTIN_VEC_SET_V4SF:
37848 case IX86_BUILTIN_VEC_SET_V4SI:
37849 case IX86_BUILTIN_VEC_SET_V8HI:
37850 case IX86_BUILTIN_VEC_SET_V4HI:
37851 case IX86_BUILTIN_VEC_SET_V16QI:
37852 return ix86_expand_vec_set_builtin (exp);
37853
37854 case IX86_BUILTIN_INFQ:
37855 case IX86_BUILTIN_HUGE_VALQ:
37856 {
37857 REAL_VALUE_TYPE inf;
37858 rtx tmp;
37859
37860 real_inf (&inf);
37861 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
37862
37863 tmp = validize_mem (force_const_mem (mode, tmp));
37864
37865 if (target == 0)
37866 target = gen_reg_rtx (mode);
37867
37868 emit_move_insn (target, tmp);
37869 return target;
37870 }
37871
37872 case IX86_BUILTIN_RDPMC:
37873 case IX86_BUILTIN_RDTSC:
37874 case IX86_BUILTIN_RDTSCP:
37875
37876 op0 = gen_reg_rtx (DImode);
37877 op1 = gen_reg_rtx (DImode);
37878
37879 if (fcode == IX86_BUILTIN_RDPMC)
37880 {
37881 arg0 = CALL_EXPR_ARG (exp, 0);
37882 op2 = expand_normal (arg0);
37883 if (!register_operand (op2, SImode))
37884 op2 = copy_to_mode_reg (SImode, op2);
37885
37886 insn = (TARGET_64BIT
37887 ? gen_rdpmc_rex64 (op0, op1, op2)
37888 : gen_rdpmc (op0, op2));
37889 emit_insn (insn);
37890 }
37891 else if (fcode == IX86_BUILTIN_RDTSC)
37892 {
37893 insn = (TARGET_64BIT
37894 ? gen_rdtsc_rex64 (op0, op1)
37895 : gen_rdtsc (op0));
37896 emit_insn (insn);
37897 }
37898 else
37899 {
37900 op2 = gen_reg_rtx (SImode);
37901
37902 insn = (TARGET_64BIT
37903 ? gen_rdtscp_rex64 (op0, op1, op2)
37904 : gen_rdtscp (op0, op2));
37905 emit_insn (insn);
37906
37907 arg0 = CALL_EXPR_ARG (exp, 0);
37908 op4 = expand_normal (arg0);
37909 if (!address_operand (op4, VOIDmode))
37910 {
37911 op4 = convert_memory_address (Pmode, op4);
37912 op4 = copy_addr_to_reg (op4);
37913 }
37914 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
37915 }
37916
37917 if (target == 0)
37918 {
37919 /* mode is VOIDmode if __builtin_rd* has been called
37920 without lhs. */
37921 if (mode == VOIDmode)
37922 return target;
37923 target = gen_reg_rtx (mode);
37924 }
37925
37926 if (TARGET_64BIT)
37927 {
37928 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
37929 op1, 1, OPTAB_DIRECT);
37930 op0 = expand_simple_binop (DImode, IOR, op0, op1,
37931 op0, 1, OPTAB_DIRECT);
37932 }
37933
37934 emit_move_insn (target, op0);
37935 return target;
37936
37937 case IX86_BUILTIN_FXSAVE:
37938 case IX86_BUILTIN_FXRSTOR:
37939 case IX86_BUILTIN_FXSAVE64:
37940 case IX86_BUILTIN_FXRSTOR64:
37941 case IX86_BUILTIN_FNSTENV:
37942 case IX86_BUILTIN_FLDENV:
37943 mode0 = BLKmode;
37944 switch (fcode)
37945 {
37946 case IX86_BUILTIN_FXSAVE:
37947 icode = CODE_FOR_fxsave;
37948 break;
37949 case IX86_BUILTIN_FXRSTOR:
37950 icode = CODE_FOR_fxrstor;
37951 break;
37952 case IX86_BUILTIN_FXSAVE64:
37953 icode = CODE_FOR_fxsave64;
37954 break;
37955 case IX86_BUILTIN_FXRSTOR64:
37956 icode = CODE_FOR_fxrstor64;
37957 break;
37958 case IX86_BUILTIN_FNSTENV:
37959 icode = CODE_FOR_fnstenv;
37960 break;
37961 case IX86_BUILTIN_FLDENV:
37962 icode = CODE_FOR_fldenv;
37963 break;
37964 default:
37965 gcc_unreachable ();
37966 }
37967
37968 arg0 = CALL_EXPR_ARG (exp, 0);
37969 op0 = expand_normal (arg0);
37970
37971 if (!address_operand (op0, VOIDmode))
37972 {
37973 op0 = convert_memory_address (Pmode, op0);
37974 op0 = copy_addr_to_reg (op0);
37975 }
37976 op0 = gen_rtx_MEM (mode0, op0);
37977
37978 pat = GEN_FCN (icode) (op0);
37979 if (pat)
37980 emit_insn (pat);
37981 return 0;
37982
37983 case IX86_BUILTIN_XSAVE:
37984 case IX86_BUILTIN_XRSTOR:
37985 case IX86_BUILTIN_XSAVE64:
37986 case IX86_BUILTIN_XRSTOR64:
37987 case IX86_BUILTIN_XSAVEOPT:
37988 case IX86_BUILTIN_XSAVEOPT64:
37989 case IX86_BUILTIN_XSAVES:
37990 case IX86_BUILTIN_XRSTORS:
37991 case IX86_BUILTIN_XSAVES64:
37992 case IX86_BUILTIN_XRSTORS64:
37993 case IX86_BUILTIN_XSAVEC:
37994 case IX86_BUILTIN_XSAVEC64:
37995 arg0 = CALL_EXPR_ARG (exp, 0);
37996 arg1 = CALL_EXPR_ARG (exp, 1);
37997 op0 = expand_normal (arg0);
37998 op1 = expand_normal (arg1);
37999
38000 if (!address_operand (op0, VOIDmode))
38001 {
38002 op0 = convert_memory_address (Pmode, op0);
38003 op0 = copy_addr_to_reg (op0);
38004 }
38005 op0 = gen_rtx_MEM (BLKmode, op0);
38006
38007 op1 = force_reg (DImode, op1);
38008
38009 if (TARGET_64BIT)
38010 {
38011 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38012 NULL, 1, OPTAB_DIRECT);
38013 switch (fcode)
38014 {
38015 case IX86_BUILTIN_XSAVE:
38016 icode = CODE_FOR_xsave_rex64;
38017 break;
38018 case IX86_BUILTIN_XRSTOR:
38019 icode = CODE_FOR_xrstor_rex64;
38020 break;
38021 case IX86_BUILTIN_XSAVE64:
38022 icode = CODE_FOR_xsave64;
38023 break;
38024 case IX86_BUILTIN_XRSTOR64:
38025 icode = CODE_FOR_xrstor64;
38026 break;
38027 case IX86_BUILTIN_XSAVEOPT:
38028 icode = CODE_FOR_xsaveopt_rex64;
38029 break;
38030 case IX86_BUILTIN_XSAVEOPT64:
38031 icode = CODE_FOR_xsaveopt64;
38032 break;
38033 case IX86_BUILTIN_XSAVES:
38034 icode = CODE_FOR_xsaves_rex64;
38035 break;
38036 case IX86_BUILTIN_XRSTORS:
38037 icode = CODE_FOR_xrstors_rex64;
38038 break;
38039 case IX86_BUILTIN_XSAVES64:
38040 icode = CODE_FOR_xsaves64;
38041 break;
38042 case IX86_BUILTIN_XRSTORS64:
38043 icode = CODE_FOR_xrstors64;
38044 break;
38045 case IX86_BUILTIN_XSAVEC:
38046 icode = CODE_FOR_xsavec_rex64;
38047 break;
38048 case IX86_BUILTIN_XSAVEC64:
38049 icode = CODE_FOR_xsavec64;
38050 break;
38051 default:
38052 gcc_unreachable ();
38053 }
38054
38055 op2 = gen_lowpart (SImode, op2);
38056 op1 = gen_lowpart (SImode, op1);
38057 pat = GEN_FCN (icode) (op0, op1, op2);
38058 }
38059 else
38060 {
38061 switch (fcode)
38062 {
38063 case IX86_BUILTIN_XSAVE:
38064 icode = CODE_FOR_xsave;
38065 break;
38066 case IX86_BUILTIN_XRSTOR:
38067 icode = CODE_FOR_xrstor;
38068 break;
38069 case IX86_BUILTIN_XSAVEOPT:
38070 icode = CODE_FOR_xsaveopt;
38071 break;
38072 case IX86_BUILTIN_XSAVES:
38073 icode = CODE_FOR_xsaves;
38074 break;
38075 case IX86_BUILTIN_XRSTORS:
38076 icode = CODE_FOR_xrstors;
38077 break;
38078 case IX86_BUILTIN_XSAVEC:
38079 icode = CODE_FOR_xsavec;
38080 break;
38081 default:
38082 gcc_unreachable ();
38083 }
38084 pat = GEN_FCN (icode) (op0, op1);
38085 }
38086
38087 if (pat)
38088 emit_insn (pat);
38089 return 0;
38090
38091 case IX86_BUILTIN_LLWPCB:
38092 arg0 = CALL_EXPR_ARG (exp, 0);
38093 op0 = expand_normal (arg0);
38094 icode = CODE_FOR_lwp_llwpcb;
38095 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38096 op0 = ix86_zero_extend_to_Pmode (op0);
38097 emit_insn (gen_lwp_llwpcb (op0));
38098 return 0;
38099
38100 case IX86_BUILTIN_SLWPCB:
38101 icode = CODE_FOR_lwp_slwpcb;
38102 if (!target
38103 || !insn_data[icode].operand[0].predicate (target, Pmode))
38104 target = gen_reg_rtx (Pmode);
38105 emit_insn (gen_lwp_slwpcb (target));
38106 return target;
38107
38108 case IX86_BUILTIN_BEXTRI32:
38109 case IX86_BUILTIN_BEXTRI64:
38110 arg0 = CALL_EXPR_ARG (exp, 0);
38111 arg1 = CALL_EXPR_ARG (exp, 1);
38112 op0 = expand_normal (arg0);
38113 op1 = expand_normal (arg1);
38114 icode = (fcode == IX86_BUILTIN_BEXTRI32
38115 ? CODE_FOR_tbm_bextri_si
38116 : CODE_FOR_tbm_bextri_di);
38117 if (!CONST_INT_P (op1))
38118 {
38119 error ("last argument must be an immediate");
38120 return const0_rtx;
38121 }
38122 else
38123 {
38124 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
38125 unsigned char lsb_index = INTVAL (op1) & 0xFF;
38126 op1 = GEN_INT (length);
38127 op2 = GEN_INT (lsb_index);
38128 pat = GEN_FCN (icode) (target, op0, op1, op2);
38129 if (pat)
38130 emit_insn (pat);
38131 return target;
38132 }
38133
38134 case IX86_BUILTIN_RDRAND16_STEP:
38135 icode = CODE_FOR_rdrandhi_1;
38136 mode0 = HImode;
38137 goto rdrand_step;
38138
38139 case IX86_BUILTIN_RDRAND32_STEP:
38140 icode = CODE_FOR_rdrandsi_1;
38141 mode0 = SImode;
38142 goto rdrand_step;
38143
38144 case IX86_BUILTIN_RDRAND64_STEP:
38145 icode = CODE_FOR_rdranddi_1;
38146 mode0 = DImode;
38147
38148 rdrand_step:
38149 op0 = gen_reg_rtx (mode0);
38150 emit_insn (GEN_FCN (icode) (op0));
38151
38152 arg0 = CALL_EXPR_ARG (exp, 0);
38153 op1 = expand_normal (arg0);
38154 if (!address_operand (op1, VOIDmode))
38155 {
38156 op1 = convert_memory_address (Pmode, op1);
38157 op1 = copy_addr_to_reg (op1);
38158 }
38159 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38160
38161 op1 = gen_reg_rtx (SImode);
38162 emit_move_insn (op1, CONST1_RTX (SImode));
38163
38164 /* Emit SImode conditional move. */
38165 if (mode0 == HImode)
38166 {
38167 op2 = gen_reg_rtx (SImode);
38168 emit_insn (gen_zero_extendhisi2 (op2, op0));
38169 }
38170 else if (mode0 == SImode)
38171 op2 = op0;
38172 else
38173 op2 = gen_rtx_SUBREG (SImode, op0, 0);
38174
38175 if (target == 0
38176 || !register_operand (target, SImode))
38177 target = gen_reg_rtx (SImode);
38178
38179 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
38180 const0_rtx);
38181 emit_insn (gen_rtx_SET (VOIDmode, target,
38182 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
38183 return target;
38184
38185 case IX86_BUILTIN_RDSEED16_STEP:
38186 icode = CODE_FOR_rdseedhi_1;
38187 mode0 = HImode;
38188 goto rdseed_step;
38189
38190 case IX86_BUILTIN_RDSEED32_STEP:
38191 icode = CODE_FOR_rdseedsi_1;
38192 mode0 = SImode;
38193 goto rdseed_step;
38194
38195 case IX86_BUILTIN_RDSEED64_STEP:
38196 icode = CODE_FOR_rdseeddi_1;
38197 mode0 = DImode;
38198
38199 rdseed_step:
38200 op0 = gen_reg_rtx (mode0);
38201 emit_insn (GEN_FCN (icode) (op0));
38202
38203 arg0 = CALL_EXPR_ARG (exp, 0);
38204 op1 = expand_normal (arg0);
38205 if (!address_operand (op1, VOIDmode))
38206 {
38207 op1 = convert_memory_address (Pmode, op1);
38208 op1 = copy_addr_to_reg (op1);
38209 }
38210 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38211
38212 op2 = gen_reg_rtx (QImode);
38213
38214 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
38215 const0_rtx);
38216 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
38217
38218 if (target == 0
38219 || !register_operand (target, SImode))
38220 target = gen_reg_rtx (SImode);
38221
38222 emit_insn (gen_zero_extendqisi2 (target, op2));
38223 return target;
38224
38225 case IX86_BUILTIN_SBB32:
38226 icode = CODE_FOR_subsi3_carry;
38227 mode0 = SImode;
38228 goto addcarryx;
38229
38230 case IX86_BUILTIN_SBB64:
38231 icode = CODE_FOR_subdi3_carry;
38232 mode0 = DImode;
38233 goto addcarryx;
38234
38235 case IX86_BUILTIN_ADDCARRYX32:
38236 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
38237 mode0 = SImode;
38238 goto addcarryx;
38239
38240 case IX86_BUILTIN_ADDCARRYX64:
38241 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
38242 mode0 = DImode;
38243
38244 addcarryx:
38245 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
38246 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
38247 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
38248 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
38249
38250 op0 = gen_reg_rtx (QImode);
38251
38252 /* Generate CF from input operand. */
38253 op1 = expand_normal (arg0);
38254 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
38255 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
38256
38257 /* Gen ADCX instruction to compute X+Y+CF. */
38258 op2 = expand_normal (arg1);
38259 op3 = expand_normal (arg2);
38260
38261 if (!REG_P (op2))
38262 op2 = copy_to_mode_reg (mode0, op2);
38263 if (!REG_P (op3))
38264 op3 = copy_to_mode_reg (mode0, op3);
38265
38266 op0 = gen_reg_rtx (mode0);
38267
38268 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
38269 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
38270 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
38271
38272 /* Store the result. */
38273 op4 = expand_normal (arg3);
38274 if (!address_operand (op4, VOIDmode))
38275 {
38276 op4 = convert_memory_address (Pmode, op4);
38277 op4 = copy_addr_to_reg (op4);
38278 }
38279 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
38280
38281 /* Return current CF value. */
38282 if (target == 0)
38283 target = gen_reg_rtx (QImode);
38284
38285 PUT_MODE (pat, QImode);
38286 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
38287 return target;
38288
38289 case IX86_BUILTIN_READ_FLAGS:
38290 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
38291
38292 if (optimize
38293 || target == NULL_RTX
38294 || !nonimmediate_operand (target, word_mode)
38295 || GET_MODE (target) != word_mode)
38296 target = gen_reg_rtx (word_mode);
38297
38298 emit_insn (gen_pop (target));
38299 return target;
38300
38301 case IX86_BUILTIN_WRITE_FLAGS:
38302
38303 arg0 = CALL_EXPR_ARG (exp, 0);
38304 op0 = expand_normal (arg0);
38305 if (!general_no_elim_operand (op0, word_mode))
38306 op0 = copy_to_mode_reg (word_mode, op0);
38307
38308 emit_insn (gen_push (op0));
38309 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
38310 return 0;
38311
38312 case IX86_BUILTIN_KORTESTC16:
38313 icode = CODE_FOR_kortestchi;
38314 mode0 = HImode;
38315 mode1 = CCCmode;
38316 goto kortest;
38317
38318 case IX86_BUILTIN_KORTESTZ16:
38319 icode = CODE_FOR_kortestzhi;
38320 mode0 = HImode;
38321 mode1 = CCZmode;
38322
38323 kortest:
38324 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
38325 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
38326 op0 = expand_normal (arg0);
38327 op1 = expand_normal (arg1);
38328
38329 op0 = copy_to_reg (op0);
38330 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
38331 op1 = copy_to_reg (op1);
38332 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
38333
38334 target = gen_reg_rtx (QImode);
38335 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
38336
38337 /* Emit kortest. */
38338 emit_insn (GEN_FCN (icode) (op0, op1));
38339 /* And use setcc to return result from flags. */
38340 ix86_expand_setcc (target, EQ,
38341 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
38342 return target;
38343
38344 case IX86_BUILTIN_GATHERSIV2DF:
38345 icode = CODE_FOR_avx2_gathersiv2df;
38346 goto gather_gen;
38347 case IX86_BUILTIN_GATHERSIV4DF:
38348 icode = CODE_FOR_avx2_gathersiv4df;
38349 goto gather_gen;
38350 case IX86_BUILTIN_GATHERDIV2DF:
38351 icode = CODE_FOR_avx2_gatherdiv2df;
38352 goto gather_gen;
38353 case IX86_BUILTIN_GATHERDIV4DF:
38354 icode = CODE_FOR_avx2_gatherdiv4df;
38355 goto gather_gen;
38356 case IX86_BUILTIN_GATHERSIV4SF:
38357 icode = CODE_FOR_avx2_gathersiv4sf;
38358 goto gather_gen;
38359 case IX86_BUILTIN_GATHERSIV8SF:
38360 icode = CODE_FOR_avx2_gathersiv8sf;
38361 goto gather_gen;
38362 case IX86_BUILTIN_GATHERDIV4SF:
38363 icode = CODE_FOR_avx2_gatherdiv4sf;
38364 goto gather_gen;
38365 case IX86_BUILTIN_GATHERDIV8SF:
38366 icode = CODE_FOR_avx2_gatherdiv8sf;
38367 goto gather_gen;
38368 case IX86_BUILTIN_GATHERSIV2DI:
38369 icode = CODE_FOR_avx2_gathersiv2di;
38370 goto gather_gen;
38371 case IX86_BUILTIN_GATHERSIV4DI:
38372 icode = CODE_FOR_avx2_gathersiv4di;
38373 goto gather_gen;
38374 case IX86_BUILTIN_GATHERDIV2DI:
38375 icode = CODE_FOR_avx2_gatherdiv2di;
38376 goto gather_gen;
38377 case IX86_BUILTIN_GATHERDIV4DI:
38378 icode = CODE_FOR_avx2_gatherdiv4di;
38379 goto gather_gen;
38380 case IX86_BUILTIN_GATHERSIV4SI:
38381 icode = CODE_FOR_avx2_gathersiv4si;
38382 goto gather_gen;
38383 case IX86_BUILTIN_GATHERSIV8SI:
38384 icode = CODE_FOR_avx2_gathersiv8si;
38385 goto gather_gen;
38386 case IX86_BUILTIN_GATHERDIV4SI:
38387 icode = CODE_FOR_avx2_gatherdiv4si;
38388 goto gather_gen;
38389 case IX86_BUILTIN_GATHERDIV8SI:
38390 icode = CODE_FOR_avx2_gatherdiv8si;
38391 goto gather_gen;
38392 case IX86_BUILTIN_GATHERALTSIV4DF:
38393 icode = CODE_FOR_avx2_gathersiv4df;
38394 goto gather_gen;
38395 case IX86_BUILTIN_GATHERALTDIV8SF:
38396 icode = CODE_FOR_avx2_gatherdiv8sf;
38397 goto gather_gen;
38398 case IX86_BUILTIN_GATHERALTSIV4DI:
38399 icode = CODE_FOR_avx2_gathersiv4di;
38400 goto gather_gen;
38401 case IX86_BUILTIN_GATHERALTDIV8SI:
38402 icode = CODE_FOR_avx2_gatherdiv8si;
38403 goto gather_gen;
38404 case IX86_BUILTIN_GATHER3SIV16SF:
38405 icode = CODE_FOR_avx512f_gathersiv16sf;
38406 goto gather_gen;
38407 case IX86_BUILTIN_GATHER3SIV8DF:
38408 icode = CODE_FOR_avx512f_gathersiv8df;
38409 goto gather_gen;
38410 case IX86_BUILTIN_GATHER3DIV16SF:
38411 icode = CODE_FOR_avx512f_gatherdiv16sf;
38412 goto gather_gen;
38413 case IX86_BUILTIN_GATHER3DIV8DF:
38414 icode = CODE_FOR_avx512f_gatherdiv8df;
38415 goto gather_gen;
38416 case IX86_BUILTIN_GATHER3SIV16SI:
38417 icode = CODE_FOR_avx512f_gathersiv16si;
38418 goto gather_gen;
38419 case IX86_BUILTIN_GATHER3SIV8DI:
38420 icode = CODE_FOR_avx512f_gathersiv8di;
38421 goto gather_gen;
38422 case IX86_BUILTIN_GATHER3DIV16SI:
38423 icode = CODE_FOR_avx512f_gatherdiv16si;
38424 goto gather_gen;
38425 case IX86_BUILTIN_GATHER3DIV8DI:
38426 icode = CODE_FOR_avx512f_gatherdiv8di;
38427 goto gather_gen;
38428 case IX86_BUILTIN_GATHER3ALTSIV8DF:
38429 icode = CODE_FOR_avx512f_gathersiv8df;
38430 goto gather_gen;
38431 case IX86_BUILTIN_GATHER3ALTDIV16SF:
38432 icode = CODE_FOR_avx512f_gatherdiv16sf;
38433 goto gather_gen;
38434 case IX86_BUILTIN_GATHER3ALTSIV8DI:
38435 icode = CODE_FOR_avx512f_gathersiv8di;
38436 goto gather_gen;
38437 case IX86_BUILTIN_GATHER3ALTDIV16SI:
38438 icode = CODE_FOR_avx512f_gatherdiv16si;
38439 goto gather_gen;
38440 case IX86_BUILTIN_GATHER3SIV2DF:
38441 icode = CODE_FOR_avx512vl_gathersiv2df;
38442 goto gather_gen;
38443 case IX86_BUILTIN_GATHER3SIV4DF:
38444 icode = CODE_FOR_avx512vl_gathersiv4df;
38445 goto gather_gen;
38446 case IX86_BUILTIN_GATHER3DIV2DF:
38447 icode = CODE_FOR_avx512vl_gatherdiv2df;
38448 goto gather_gen;
38449 case IX86_BUILTIN_GATHER3DIV4DF:
38450 icode = CODE_FOR_avx512vl_gatherdiv4df;
38451 goto gather_gen;
38452 case IX86_BUILTIN_GATHER3SIV4SF:
38453 icode = CODE_FOR_avx512vl_gathersiv4sf;
38454 goto gather_gen;
38455 case IX86_BUILTIN_GATHER3SIV8SF:
38456 icode = CODE_FOR_avx512vl_gathersiv8sf;
38457 goto gather_gen;
38458 case IX86_BUILTIN_GATHER3DIV4SF:
38459 icode = CODE_FOR_avx512vl_gatherdiv4sf;
38460 goto gather_gen;
38461 case IX86_BUILTIN_GATHER3DIV8SF:
38462 icode = CODE_FOR_avx512vl_gatherdiv8sf;
38463 goto gather_gen;
38464 case IX86_BUILTIN_GATHER3SIV2DI:
38465 icode = CODE_FOR_avx512vl_gathersiv2di;
38466 goto gather_gen;
38467 case IX86_BUILTIN_GATHER3SIV4DI:
38468 icode = CODE_FOR_avx512vl_gathersiv4di;
38469 goto gather_gen;
38470 case IX86_BUILTIN_GATHER3DIV2DI:
38471 icode = CODE_FOR_avx512vl_gatherdiv2di;
38472 goto gather_gen;
38473 case IX86_BUILTIN_GATHER3DIV4DI:
38474 icode = CODE_FOR_avx512vl_gatherdiv4di;
38475 goto gather_gen;
38476 case IX86_BUILTIN_GATHER3SIV4SI:
38477 icode = CODE_FOR_avx512vl_gathersiv4si;
38478 goto gather_gen;
38479 case IX86_BUILTIN_GATHER3SIV8SI:
38480 icode = CODE_FOR_avx512vl_gathersiv8si;
38481 goto gather_gen;
38482 case IX86_BUILTIN_GATHER3DIV4SI:
38483 icode = CODE_FOR_avx512vl_gatherdiv4si;
38484 goto gather_gen;
38485 case IX86_BUILTIN_GATHER3DIV8SI:
38486 icode = CODE_FOR_avx512vl_gatherdiv8si;
38487 goto gather_gen;
38488 case IX86_BUILTIN_GATHER3ALTSIV4DF:
38489 icode = CODE_FOR_avx512vl_gathersiv4df;
38490 goto gather_gen;
38491 case IX86_BUILTIN_GATHER3ALTDIV8SF:
38492 icode = CODE_FOR_avx512vl_gatherdiv8sf;
38493 goto gather_gen;
38494 case IX86_BUILTIN_GATHER3ALTSIV4DI:
38495 icode = CODE_FOR_avx512vl_gathersiv4di;
38496 goto gather_gen;
38497 case IX86_BUILTIN_GATHER3ALTDIV8SI:
38498 icode = CODE_FOR_avx512vl_gatherdiv8si;
38499 goto gather_gen;
38500 case IX86_BUILTIN_SCATTERSIV16SF:
38501 icode = CODE_FOR_avx512f_scattersiv16sf;
38502 goto scatter_gen;
38503 case IX86_BUILTIN_SCATTERSIV8DF:
38504 icode = CODE_FOR_avx512f_scattersiv8df;
38505 goto scatter_gen;
38506 case IX86_BUILTIN_SCATTERDIV16SF:
38507 icode = CODE_FOR_avx512f_scatterdiv16sf;
38508 goto scatter_gen;
38509 case IX86_BUILTIN_SCATTERDIV8DF:
38510 icode = CODE_FOR_avx512f_scatterdiv8df;
38511 goto scatter_gen;
38512 case IX86_BUILTIN_SCATTERSIV16SI:
38513 icode = CODE_FOR_avx512f_scattersiv16si;
38514 goto scatter_gen;
38515 case IX86_BUILTIN_SCATTERSIV8DI:
38516 icode = CODE_FOR_avx512f_scattersiv8di;
38517 goto scatter_gen;
38518 case IX86_BUILTIN_SCATTERDIV16SI:
38519 icode = CODE_FOR_avx512f_scatterdiv16si;
38520 goto scatter_gen;
38521 case IX86_BUILTIN_SCATTERDIV8DI:
38522 icode = CODE_FOR_avx512f_scatterdiv8di;
38523 goto scatter_gen;
38524 case IX86_BUILTIN_SCATTERSIV8SF:
38525 icode = CODE_FOR_avx512vl_scattersiv8sf;
38526 goto scatter_gen;
38527 case IX86_BUILTIN_SCATTERSIV4SF:
38528 icode = CODE_FOR_avx512vl_scattersiv4sf;
38529 goto scatter_gen;
38530 case IX86_BUILTIN_SCATTERSIV4DF:
38531 icode = CODE_FOR_avx512vl_scattersiv4df;
38532 goto scatter_gen;
38533 case IX86_BUILTIN_SCATTERSIV2DF:
38534 icode = CODE_FOR_avx512vl_scattersiv2df;
38535 goto scatter_gen;
38536 case IX86_BUILTIN_SCATTERDIV8SF:
38537 icode = CODE_FOR_avx512vl_scatterdiv8sf;
38538 goto scatter_gen;
38539 case IX86_BUILTIN_SCATTERDIV4SF:
38540 icode = CODE_FOR_avx512vl_scatterdiv4sf;
38541 goto scatter_gen;
38542 case IX86_BUILTIN_SCATTERDIV4DF:
38543 icode = CODE_FOR_avx512vl_scatterdiv4df;
38544 goto scatter_gen;
38545 case IX86_BUILTIN_SCATTERDIV2DF:
38546 icode = CODE_FOR_avx512vl_scatterdiv2df;
38547 goto scatter_gen;
38548 case IX86_BUILTIN_SCATTERSIV8SI:
38549 icode = CODE_FOR_avx512vl_scattersiv8si;
38550 goto scatter_gen;
38551 case IX86_BUILTIN_SCATTERSIV4SI:
38552 icode = CODE_FOR_avx512vl_scattersiv4si;
38553 goto scatter_gen;
38554 case IX86_BUILTIN_SCATTERSIV4DI:
38555 icode = CODE_FOR_avx512vl_scattersiv4di;
38556 goto scatter_gen;
38557 case IX86_BUILTIN_SCATTERSIV2DI:
38558 icode = CODE_FOR_avx512vl_scattersiv2di;
38559 goto scatter_gen;
38560 case IX86_BUILTIN_SCATTERDIV8SI:
38561 icode = CODE_FOR_avx512vl_scatterdiv8si;
38562 goto scatter_gen;
38563 case IX86_BUILTIN_SCATTERDIV4SI:
38564 icode = CODE_FOR_avx512vl_scatterdiv4si;
38565 goto scatter_gen;
38566 case IX86_BUILTIN_SCATTERDIV4DI:
38567 icode = CODE_FOR_avx512vl_scatterdiv4di;
38568 goto scatter_gen;
38569 case IX86_BUILTIN_SCATTERDIV2DI:
38570 icode = CODE_FOR_avx512vl_scatterdiv2di;
38571 goto scatter_gen;
38572 case IX86_BUILTIN_GATHERPFDPD:
38573 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
38574 goto vec_prefetch_gen;
38575 case IX86_BUILTIN_GATHERPFDPS:
38576 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
38577 goto vec_prefetch_gen;
38578 case IX86_BUILTIN_GATHERPFQPD:
38579 icode = CODE_FOR_avx512pf_gatherpfv8didf;
38580 goto vec_prefetch_gen;
38581 case IX86_BUILTIN_GATHERPFQPS:
38582 icode = CODE_FOR_avx512pf_gatherpfv8disf;
38583 goto vec_prefetch_gen;
38584 case IX86_BUILTIN_SCATTERPFDPD:
38585 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
38586 goto vec_prefetch_gen;
38587 case IX86_BUILTIN_SCATTERPFDPS:
38588 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
38589 goto vec_prefetch_gen;
38590 case IX86_BUILTIN_SCATTERPFQPD:
38591 icode = CODE_FOR_avx512pf_scatterpfv8didf;
38592 goto vec_prefetch_gen;
38593 case IX86_BUILTIN_SCATTERPFQPS:
38594 icode = CODE_FOR_avx512pf_scatterpfv8disf;
38595 goto vec_prefetch_gen;
38596
38597 gather_gen:
38598 rtx half;
38599 rtx (*gen) (rtx, rtx);
38600
38601 arg0 = CALL_EXPR_ARG (exp, 0);
38602 arg1 = CALL_EXPR_ARG (exp, 1);
38603 arg2 = CALL_EXPR_ARG (exp, 2);
38604 arg3 = CALL_EXPR_ARG (exp, 3);
38605 arg4 = CALL_EXPR_ARG (exp, 4);
38606 op0 = expand_normal (arg0);
38607 op1 = expand_normal (arg1);
38608 op2 = expand_normal (arg2);
38609 op3 = expand_normal (arg3);
38610 op4 = expand_normal (arg4);
38611 /* Note the arg order is different from the operand order. */
38612 mode0 = insn_data[icode].operand[1].mode;
38613 mode2 = insn_data[icode].operand[3].mode;
38614 mode3 = insn_data[icode].operand[4].mode;
38615 mode4 = insn_data[icode].operand[5].mode;
38616
38617 if (target == NULL_RTX
38618 || GET_MODE (target) != insn_data[icode].operand[0].mode
38619 || !insn_data[icode].operand[0].predicate (target,
38620 GET_MODE (target)))
38621 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
38622 else
38623 subtarget = target;
38624
38625 switch (fcode)
38626 {
38627 case IX86_BUILTIN_GATHER3ALTSIV8DF:
38628 case IX86_BUILTIN_GATHER3ALTSIV8DI:
38629 half = gen_reg_rtx (V8SImode);
38630 if (!nonimmediate_operand (op2, V16SImode))
38631 op2 = copy_to_mode_reg (V16SImode, op2);
38632 emit_insn (gen_vec_extract_lo_v16si (half, op2));
38633 op2 = half;
38634 break;
38635 case IX86_BUILTIN_GATHER3ALTSIV4DF:
38636 case IX86_BUILTIN_GATHER3ALTSIV4DI:
38637 case IX86_BUILTIN_GATHERALTSIV4DF:
38638 case IX86_BUILTIN_GATHERALTSIV4DI:
38639 half = gen_reg_rtx (V4SImode);
38640 if (!nonimmediate_operand (op2, V8SImode))
38641 op2 = copy_to_mode_reg (V8SImode, op2);
38642 emit_insn (gen_vec_extract_lo_v8si (half, op2));
38643 op2 = half;
38644 break;
38645 case IX86_BUILTIN_GATHER3ALTDIV16SF:
38646 case IX86_BUILTIN_GATHER3ALTDIV16SI:
38647 half = gen_reg_rtx (mode0);
38648 if (mode0 == V8SFmode)
38649 gen = gen_vec_extract_lo_v16sf;
38650 else
38651 gen = gen_vec_extract_lo_v16si;
38652 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38653 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38654 emit_insn (gen (half, op0));
38655 op0 = half;
38656 if (GET_MODE (op3) != VOIDmode)
38657 {
38658 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38659 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38660 emit_insn (gen (half, op3));
38661 op3 = half;
38662 }
38663 break;
38664 case IX86_BUILTIN_GATHER3ALTDIV8SF:
38665 case IX86_BUILTIN_GATHER3ALTDIV8SI:
38666 case IX86_BUILTIN_GATHERALTDIV8SF:
38667 case IX86_BUILTIN_GATHERALTDIV8SI:
38668 half = gen_reg_rtx (mode0);
38669 if (mode0 == V4SFmode)
38670 gen = gen_vec_extract_lo_v8sf;
38671 else
38672 gen = gen_vec_extract_lo_v8si;
38673 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38674 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38675 emit_insn (gen (half, op0));
38676 op0 = half;
38677 if (GET_MODE (op3) != VOIDmode)
38678 {
38679 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38680 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38681 emit_insn (gen (half, op3));
38682 op3 = half;
38683 }
38684 break;
38685 default:
38686 break;
38687 }
38688
38689 /* Force memory operand only with base register here. But we
38690 don't want to do it on memory operand for other builtin
38691 functions. */
38692 op1 = ix86_zero_extend_to_Pmode (op1);
38693
38694 if (!insn_data[icode].operand[1].predicate (op0, mode0))
38695 op0 = copy_to_mode_reg (mode0, op0);
38696 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
38697 op1 = copy_to_mode_reg (Pmode, op1);
38698 if (!insn_data[icode].operand[3].predicate (op2, mode2))
38699 op2 = copy_to_mode_reg (mode2, op2);
38700 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
38701 {
38702 if (!insn_data[icode].operand[4].predicate (op3, mode3))
38703 op3 = copy_to_mode_reg (mode3, op3);
38704 }
38705 else
38706 {
38707 op3 = copy_to_reg (op3);
38708 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
38709 }
38710 if (!insn_data[icode].operand[5].predicate (op4, mode4))
38711 {
38712 error ("the last argument must be scale 1, 2, 4, 8");
38713 return const0_rtx;
38714 }
38715
38716 /* Optimize. If mask is known to have all high bits set,
38717 replace op0 with pc_rtx to signal that the instruction
38718 overwrites the whole destination and doesn't use its
38719 previous contents. */
38720 if (optimize)
38721 {
38722 if (TREE_CODE (arg3) == INTEGER_CST)
38723 {
38724 if (integer_all_onesp (arg3))
38725 op0 = pc_rtx;
38726 }
38727 else if (TREE_CODE (arg3) == VECTOR_CST)
38728 {
38729 unsigned int negative = 0;
38730 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
38731 {
38732 tree cst = VECTOR_CST_ELT (arg3, i);
38733 if (TREE_CODE (cst) == INTEGER_CST
38734 && tree_int_cst_sign_bit (cst))
38735 negative++;
38736 else if (TREE_CODE (cst) == REAL_CST
38737 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
38738 negative++;
38739 }
38740 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
38741 op0 = pc_rtx;
38742 }
38743 else if (TREE_CODE (arg3) == SSA_NAME
38744 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38745 {
38746 /* Recognize also when mask is like:
38747 __v2df src = _mm_setzero_pd ();
38748 __v2df mask = _mm_cmpeq_pd (src, src);
38749 or
38750 __v8sf src = _mm256_setzero_ps ();
38751 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38752 as that is a cheaper way to load all ones into
38753 a register than having to load a constant from
38754 memory. */
38755 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
38756 if (is_gimple_call (def_stmt))
38757 {
38758 tree fndecl = gimple_call_fndecl (def_stmt);
38759 if (fndecl
38760 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38761 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38762 {
38763 case IX86_BUILTIN_CMPPD:
38764 case IX86_BUILTIN_CMPPS:
38765 case IX86_BUILTIN_CMPPD256:
38766 case IX86_BUILTIN_CMPPS256:
38767 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38768 break;
38769 /* FALLTHRU */
38770 case IX86_BUILTIN_CMPEQPD:
38771 case IX86_BUILTIN_CMPEQPS:
38772 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38773 && initializer_zerop (gimple_call_arg (def_stmt,
38774 1)))
38775 op0 = pc_rtx;
38776 break;
38777 default:
38778 break;
38779 }
38780 }
38781 }
38782 }
38783
38784 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38785 if (! pat)
38786 return const0_rtx;
38787 emit_insn (pat);
38788
38789 switch (fcode)
38790 {
38791 case IX86_BUILTIN_GATHER3DIV16SF:
38792 if (target == NULL_RTX)
38793 target = gen_reg_rtx (V8SFmode);
38794 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38795 break;
38796 case IX86_BUILTIN_GATHER3DIV16SI:
38797 if (target == NULL_RTX)
38798 target = gen_reg_rtx (V8SImode);
38799 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38800 break;
38801 case IX86_BUILTIN_GATHER3DIV8SF:
38802 case IX86_BUILTIN_GATHERDIV8SF:
38803 if (target == NULL_RTX)
38804 target = gen_reg_rtx (V4SFmode);
38805 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38806 break;
38807 case IX86_BUILTIN_GATHER3DIV8SI:
38808 case IX86_BUILTIN_GATHERDIV8SI:
38809 if (target == NULL_RTX)
38810 target = gen_reg_rtx (V4SImode);
38811 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38812 break;
38813 default:
38814 target = subtarget;
38815 break;
38816 }
38817 return target;
38818
38819 scatter_gen:
38820 arg0 = CALL_EXPR_ARG (exp, 0);
38821 arg1 = CALL_EXPR_ARG (exp, 1);
38822 arg2 = CALL_EXPR_ARG (exp, 2);
38823 arg3 = CALL_EXPR_ARG (exp, 3);
38824 arg4 = CALL_EXPR_ARG (exp, 4);
38825 op0 = expand_normal (arg0);
38826 op1 = expand_normal (arg1);
38827 op2 = expand_normal (arg2);
38828 op3 = expand_normal (arg3);
38829 op4 = expand_normal (arg4);
38830 mode1 = insn_data[icode].operand[1].mode;
38831 mode2 = insn_data[icode].operand[2].mode;
38832 mode3 = insn_data[icode].operand[3].mode;
38833 mode4 = insn_data[icode].operand[4].mode;
38834
38835 /* Force memory operand only with base register here. But we
38836 don't want to do it on memory operand for other builtin
38837 functions. */
38838 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38839
38840 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38841 op0 = copy_to_mode_reg (Pmode, op0);
38842
38843 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38844 {
38845 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38846 op1 = copy_to_mode_reg (mode1, op1);
38847 }
38848 else
38849 {
38850 op1 = copy_to_reg (op1);
38851 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
38852 }
38853
38854 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38855 op2 = copy_to_mode_reg (mode2, op2);
38856
38857 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38858 op3 = copy_to_mode_reg (mode3, op3);
38859
38860 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38861 {
38862 error ("the last argument must be scale 1, 2, 4, 8");
38863 return const0_rtx;
38864 }
38865
38866 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38867 if (! pat)
38868 return const0_rtx;
38869
38870 emit_insn (pat);
38871 return 0;
38872
38873 vec_prefetch_gen:
38874 arg0 = CALL_EXPR_ARG (exp, 0);
38875 arg1 = CALL_EXPR_ARG (exp, 1);
38876 arg2 = CALL_EXPR_ARG (exp, 2);
38877 arg3 = CALL_EXPR_ARG (exp, 3);
38878 arg4 = CALL_EXPR_ARG (exp, 4);
38879 op0 = expand_normal (arg0);
38880 op1 = expand_normal (arg1);
38881 op2 = expand_normal (arg2);
38882 op3 = expand_normal (arg3);
38883 op4 = expand_normal (arg4);
38884 mode0 = insn_data[icode].operand[0].mode;
38885 mode1 = insn_data[icode].operand[1].mode;
38886 mode3 = insn_data[icode].operand[3].mode;
38887 mode4 = insn_data[icode].operand[4].mode;
38888
38889 if (GET_MODE (op0) == mode0
38890 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
38891 {
38892 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38893 op0 = copy_to_mode_reg (mode0, op0);
38894 }
38895 else if (op0 != constm1_rtx)
38896 {
38897 op0 = copy_to_reg (op0);
38898 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
38899 }
38900
38901 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38902 op1 = copy_to_mode_reg (mode1, op1);
38903
38904 /* Force memory operand only with base register here. But we
38905 don't want to do it on memory operand for other builtin
38906 functions. */
38907 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38908
38909 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38910 op2 = copy_to_mode_reg (Pmode, op2);
38911
38912 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38913 {
38914 error ("the forth argument must be scale 1, 2, 4, 8");
38915 return const0_rtx;
38916 }
38917
38918 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38919 {
38920 error ("incorrect hint operand");
38921 return const0_rtx;
38922 }
38923
38924 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38925 if (! pat)
38926 return const0_rtx;
38927
38928 emit_insn (pat);
38929
38930 return 0;
38931
38932 case IX86_BUILTIN_XABORT:
38933 icode = CODE_FOR_xabort;
38934 arg0 = CALL_EXPR_ARG (exp, 0);
38935 op0 = expand_normal (arg0);
38936 mode0 = insn_data[icode].operand[0].mode;
38937 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38938 {
38939 error ("the xabort's argument must be an 8-bit immediate");
38940 return const0_rtx;
38941 }
38942 emit_insn (gen_xabort (op0));
38943 return 0;
38944
38945 default:
38946 break;
38947 }
38948
38949 for (i = 0, d = bdesc_special_args;
38950 i < ARRAY_SIZE (bdesc_special_args);
38951 i++, d++)
38952 if (d->code == fcode)
38953 return ix86_expand_special_args_builtin (d, exp, target);
38954
38955 for (i = 0, d = bdesc_args;
38956 i < ARRAY_SIZE (bdesc_args);
38957 i++, d++)
38958 if (d->code == fcode)
38959 switch (fcode)
38960 {
38961 case IX86_BUILTIN_FABSQ:
38962 case IX86_BUILTIN_COPYSIGNQ:
38963 if (!TARGET_SSE)
38964 /* Emit a normal call if SSE isn't available. */
38965 return expand_call (exp, target, ignore);
38966 default:
38967 return ix86_expand_args_builtin (d, exp, target);
38968 }
38969
38970 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
38971 if (d->code == fcode)
38972 return ix86_expand_sse_comi (d, exp, target);
38973
38974 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
38975 if (d->code == fcode)
38976 return ix86_expand_round_builtin (d, exp, target);
38977
38978 for (i = 0, d = bdesc_pcmpestr;
38979 i < ARRAY_SIZE (bdesc_pcmpestr);
38980 i++, d++)
38981 if (d->code == fcode)
38982 return ix86_expand_sse_pcmpestr (d, exp, target);
38983
38984 for (i = 0, d = bdesc_pcmpistr;
38985 i < ARRAY_SIZE (bdesc_pcmpistr);
38986 i++, d++)
38987 if (d->code == fcode)
38988 return ix86_expand_sse_pcmpistr (d, exp, target);
38989
38990 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
38991 if (d->code == fcode)
38992 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38993 (enum ix86_builtin_func_type)
38994 d->flag, d->comparison);
38995
38996 gcc_unreachable ();
38997 }
38998
38999 /* This returns the target-specific builtin with code CODE if
39000 current_function_decl has visibility on this builtin, which is checked
39001 using isa flags. Returns NULL_TREE otherwise. */
39002
39003 static tree ix86_get_builtin (enum ix86_builtins code)
39004 {
39005 struct cl_target_option *opts;
39006 tree target_tree = NULL_TREE;
39007
39008 /* Determine the isa flags of current_function_decl. */
39009
39010 if (current_function_decl)
39011 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39012
39013 if (target_tree == NULL)
39014 target_tree = target_option_default_node;
39015
39016 opts = TREE_TARGET_OPTION (target_tree);
39017
39018 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39019 return ix86_builtin_decl (code, true);
39020 else
39021 return NULL_TREE;
39022 }
39023
39024 /* Returns a function decl for a vectorized version of the builtin function
39025 with builtin function code FN and the result vector type TYPE, or NULL_TREE
39026 if it is not available. */
39027
39028 static tree
39029 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
39030 tree type_in)
39031 {
39032 enum machine_mode in_mode, out_mode;
39033 int in_n, out_n;
39034 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
39035
39036 if (TREE_CODE (type_out) != VECTOR_TYPE
39037 || TREE_CODE (type_in) != VECTOR_TYPE
39038 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
39039 return NULL_TREE;
39040
39041 out_mode = TYPE_MODE (TREE_TYPE (type_out));
39042 out_n = TYPE_VECTOR_SUBPARTS (type_out);
39043 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39044 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39045
39046 switch (fn)
39047 {
39048 case BUILT_IN_SQRT:
39049 if (out_mode == DFmode && in_mode == DFmode)
39050 {
39051 if (out_n == 2 && in_n == 2)
39052 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
39053 else if (out_n == 4 && in_n == 4)
39054 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
39055 else if (out_n == 8 && in_n == 8)
39056 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
39057 }
39058 break;
39059
39060 case BUILT_IN_EXP2F:
39061 if (out_mode == SFmode && in_mode == SFmode)
39062 {
39063 if (out_n == 16 && in_n == 16)
39064 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
39065 }
39066 break;
39067
39068 case BUILT_IN_SQRTF:
39069 if (out_mode == SFmode && in_mode == SFmode)
39070 {
39071 if (out_n == 4 && in_n == 4)
39072 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
39073 else if (out_n == 8 && in_n == 8)
39074 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
39075 else if (out_n == 16 && in_n == 16)
39076 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
39077 }
39078 break;
39079
39080 case BUILT_IN_IFLOOR:
39081 case BUILT_IN_LFLOOR:
39082 case BUILT_IN_LLFLOOR:
39083 /* The round insn does not trap on denormals. */
39084 if (flag_trapping_math || !TARGET_ROUND)
39085 break;
39086
39087 if (out_mode == SImode && in_mode == DFmode)
39088 {
39089 if (out_n == 4 && in_n == 2)
39090 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
39091 else if (out_n == 8 && in_n == 4)
39092 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
39093 else if (out_n == 16 && in_n == 8)
39094 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
39095 }
39096 break;
39097
39098 case BUILT_IN_IFLOORF:
39099 case BUILT_IN_LFLOORF:
39100 case BUILT_IN_LLFLOORF:
39101 /* The round insn does not trap on denormals. */
39102 if (flag_trapping_math || !TARGET_ROUND)
39103 break;
39104
39105 if (out_mode == SImode && in_mode == SFmode)
39106 {
39107 if (out_n == 4 && in_n == 4)
39108 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
39109 else if (out_n == 8 && in_n == 8)
39110 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
39111 }
39112 break;
39113
39114 case BUILT_IN_ICEIL:
39115 case BUILT_IN_LCEIL:
39116 case BUILT_IN_LLCEIL:
39117 /* The round insn does not trap on denormals. */
39118 if (flag_trapping_math || !TARGET_ROUND)
39119 break;
39120
39121 if (out_mode == SImode && in_mode == DFmode)
39122 {
39123 if (out_n == 4 && in_n == 2)
39124 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
39125 else if (out_n == 8 && in_n == 4)
39126 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
39127 else if (out_n == 16 && in_n == 8)
39128 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
39129 }
39130 break;
39131
39132 case BUILT_IN_ICEILF:
39133 case BUILT_IN_LCEILF:
39134 case BUILT_IN_LLCEILF:
39135 /* The round insn does not trap on denormals. */
39136 if (flag_trapping_math || !TARGET_ROUND)
39137 break;
39138
39139 if (out_mode == SImode && in_mode == SFmode)
39140 {
39141 if (out_n == 4 && in_n == 4)
39142 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
39143 else if (out_n == 8 && in_n == 8)
39144 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
39145 }
39146 break;
39147
39148 case BUILT_IN_IRINT:
39149 case BUILT_IN_LRINT:
39150 case BUILT_IN_LLRINT:
39151 if (out_mode == SImode && in_mode == DFmode)
39152 {
39153 if (out_n == 4 && in_n == 2)
39154 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
39155 else if (out_n == 8 && in_n == 4)
39156 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
39157 }
39158 break;
39159
39160 case BUILT_IN_IRINTF:
39161 case BUILT_IN_LRINTF:
39162 case BUILT_IN_LLRINTF:
39163 if (out_mode == SImode && in_mode == SFmode)
39164 {
39165 if (out_n == 4 && in_n == 4)
39166 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
39167 else if (out_n == 8 && in_n == 8)
39168 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
39169 }
39170 break;
39171
39172 case BUILT_IN_IROUND:
39173 case BUILT_IN_LROUND:
39174 case BUILT_IN_LLROUND:
39175 /* The round insn does not trap on denormals. */
39176 if (flag_trapping_math || !TARGET_ROUND)
39177 break;
39178
39179 if (out_mode == SImode && in_mode == DFmode)
39180 {
39181 if (out_n == 4 && in_n == 2)
39182 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
39183 else if (out_n == 8 && in_n == 4)
39184 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
39185 else if (out_n == 16 && in_n == 8)
39186 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
39187 }
39188 break;
39189
39190 case BUILT_IN_IROUNDF:
39191 case BUILT_IN_LROUNDF:
39192 case BUILT_IN_LLROUNDF:
39193 /* The round insn does not trap on denormals. */
39194 if (flag_trapping_math || !TARGET_ROUND)
39195 break;
39196
39197 if (out_mode == SImode && in_mode == SFmode)
39198 {
39199 if (out_n == 4 && in_n == 4)
39200 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
39201 else if (out_n == 8 && in_n == 8)
39202 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
39203 }
39204 break;
39205
39206 case BUILT_IN_COPYSIGN:
39207 if (out_mode == DFmode && in_mode == DFmode)
39208 {
39209 if (out_n == 2 && in_n == 2)
39210 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
39211 else if (out_n == 4 && in_n == 4)
39212 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
39213 else if (out_n == 8 && in_n == 8)
39214 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
39215 }
39216 break;
39217
39218 case BUILT_IN_COPYSIGNF:
39219 if (out_mode == SFmode && in_mode == SFmode)
39220 {
39221 if (out_n == 4 && in_n == 4)
39222 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
39223 else if (out_n == 8 && in_n == 8)
39224 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
39225 else if (out_n == 16 && in_n == 16)
39226 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
39227 }
39228 break;
39229
39230 case BUILT_IN_FLOOR:
39231 /* The round insn does not trap on denormals. */
39232 if (flag_trapping_math || !TARGET_ROUND)
39233 break;
39234
39235 if (out_mode == DFmode && in_mode == DFmode)
39236 {
39237 if (out_n == 2 && in_n == 2)
39238 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
39239 else if (out_n == 4 && in_n == 4)
39240 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
39241 }
39242 break;
39243
39244 case BUILT_IN_FLOORF:
39245 /* The round insn does not trap on denormals. */
39246 if (flag_trapping_math || !TARGET_ROUND)
39247 break;
39248
39249 if (out_mode == SFmode && in_mode == SFmode)
39250 {
39251 if (out_n == 4 && in_n == 4)
39252 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
39253 else if (out_n == 8 && in_n == 8)
39254 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
39255 }
39256 break;
39257
39258 case BUILT_IN_CEIL:
39259 /* The round insn does not trap on denormals. */
39260 if (flag_trapping_math || !TARGET_ROUND)
39261 break;
39262
39263 if (out_mode == DFmode && in_mode == DFmode)
39264 {
39265 if (out_n == 2 && in_n == 2)
39266 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
39267 else if (out_n == 4 && in_n == 4)
39268 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
39269 }
39270 break;
39271
39272 case BUILT_IN_CEILF:
39273 /* The round insn does not trap on denormals. */
39274 if (flag_trapping_math || !TARGET_ROUND)
39275 break;
39276
39277 if (out_mode == SFmode && in_mode == SFmode)
39278 {
39279 if (out_n == 4 && in_n == 4)
39280 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
39281 else if (out_n == 8 && in_n == 8)
39282 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
39283 }
39284 break;
39285
39286 case BUILT_IN_TRUNC:
39287 /* The round insn does not trap on denormals. */
39288 if (flag_trapping_math || !TARGET_ROUND)
39289 break;
39290
39291 if (out_mode == DFmode && in_mode == DFmode)
39292 {
39293 if (out_n == 2 && in_n == 2)
39294 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
39295 else if (out_n == 4 && in_n == 4)
39296 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
39297 }
39298 break;
39299
39300 case BUILT_IN_TRUNCF:
39301 /* The round insn does not trap on denormals. */
39302 if (flag_trapping_math || !TARGET_ROUND)
39303 break;
39304
39305 if (out_mode == SFmode && in_mode == SFmode)
39306 {
39307 if (out_n == 4 && in_n == 4)
39308 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
39309 else if (out_n == 8 && in_n == 8)
39310 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
39311 }
39312 break;
39313
39314 case BUILT_IN_RINT:
39315 /* The round insn does not trap on denormals. */
39316 if (flag_trapping_math || !TARGET_ROUND)
39317 break;
39318
39319 if (out_mode == DFmode && in_mode == DFmode)
39320 {
39321 if (out_n == 2 && in_n == 2)
39322 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
39323 else if (out_n == 4 && in_n == 4)
39324 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
39325 }
39326 break;
39327
39328 case BUILT_IN_RINTF:
39329 /* The round insn does not trap on denormals. */
39330 if (flag_trapping_math || !TARGET_ROUND)
39331 break;
39332
39333 if (out_mode == SFmode && in_mode == SFmode)
39334 {
39335 if (out_n == 4 && in_n == 4)
39336 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
39337 else if (out_n == 8 && in_n == 8)
39338 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
39339 }
39340 break;
39341
39342 case BUILT_IN_ROUND:
39343 /* The round insn does not trap on denormals. */
39344 if (flag_trapping_math || !TARGET_ROUND)
39345 break;
39346
39347 if (out_mode == DFmode && in_mode == DFmode)
39348 {
39349 if (out_n == 2 && in_n == 2)
39350 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
39351 else if (out_n == 4 && in_n == 4)
39352 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
39353 }
39354 break;
39355
39356 case BUILT_IN_ROUNDF:
39357 /* The round insn does not trap on denormals. */
39358 if (flag_trapping_math || !TARGET_ROUND)
39359 break;
39360
39361 if (out_mode == SFmode && in_mode == SFmode)
39362 {
39363 if (out_n == 4 && in_n == 4)
39364 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
39365 else if (out_n == 8 && in_n == 8)
39366 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
39367 }
39368 break;
39369
39370 case BUILT_IN_FMA:
39371 if (out_mode == DFmode && in_mode == DFmode)
39372 {
39373 if (out_n == 2 && in_n == 2)
39374 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
39375 if (out_n == 4 && in_n == 4)
39376 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
39377 }
39378 break;
39379
39380 case BUILT_IN_FMAF:
39381 if (out_mode == SFmode && in_mode == SFmode)
39382 {
39383 if (out_n == 4 && in_n == 4)
39384 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
39385 if (out_n == 8 && in_n == 8)
39386 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
39387 }
39388 break;
39389
39390 default:
39391 break;
39392 }
39393
39394 /* Dispatch to a handler for a vectorization library. */
39395 if (ix86_veclib_handler)
39396 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
39397 type_in);
39398
39399 return NULL_TREE;
39400 }
39401
39402 /* Handler for an SVML-style interface to
39403 a library with vectorized intrinsics. */
39404
39405 static tree
39406 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
39407 {
39408 char name[20];
39409 tree fntype, new_fndecl, args;
39410 unsigned arity;
39411 const char *bname;
39412 enum machine_mode el_mode, in_mode;
39413 int n, in_n;
39414
39415 /* The SVML is suitable for unsafe math only. */
39416 if (!flag_unsafe_math_optimizations)
39417 return NULL_TREE;
39418
39419 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39420 n = TYPE_VECTOR_SUBPARTS (type_out);
39421 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39422 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39423 if (el_mode != in_mode
39424 || n != in_n)
39425 return NULL_TREE;
39426
39427 switch (fn)
39428 {
39429 case BUILT_IN_EXP:
39430 case BUILT_IN_LOG:
39431 case BUILT_IN_LOG10:
39432 case BUILT_IN_POW:
39433 case BUILT_IN_TANH:
39434 case BUILT_IN_TAN:
39435 case BUILT_IN_ATAN:
39436 case BUILT_IN_ATAN2:
39437 case BUILT_IN_ATANH:
39438 case BUILT_IN_CBRT:
39439 case BUILT_IN_SINH:
39440 case BUILT_IN_SIN:
39441 case BUILT_IN_ASINH:
39442 case BUILT_IN_ASIN:
39443 case BUILT_IN_COSH:
39444 case BUILT_IN_COS:
39445 case BUILT_IN_ACOSH:
39446 case BUILT_IN_ACOS:
39447 if (el_mode != DFmode || n != 2)
39448 return NULL_TREE;
39449 break;
39450
39451 case BUILT_IN_EXPF:
39452 case BUILT_IN_LOGF:
39453 case BUILT_IN_LOG10F:
39454 case BUILT_IN_POWF:
39455 case BUILT_IN_TANHF:
39456 case BUILT_IN_TANF:
39457 case BUILT_IN_ATANF:
39458 case BUILT_IN_ATAN2F:
39459 case BUILT_IN_ATANHF:
39460 case BUILT_IN_CBRTF:
39461 case BUILT_IN_SINHF:
39462 case BUILT_IN_SINF:
39463 case BUILT_IN_ASINHF:
39464 case BUILT_IN_ASINF:
39465 case BUILT_IN_COSHF:
39466 case BUILT_IN_COSF:
39467 case BUILT_IN_ACOSHF:
39468 case BUILT_IN_ACOSF:
39469 if (el_mode != SFmode || n != 4)
39470 return NULL_TREE;
39471 break;
39472
39473 default:
39474 return NULL_TREE;
39475 }
39476
39477 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
39478
39479 if (fn == BUILT_IN_LOGF)
39480 strcpy (name, "vmlsLn4");
39481 else if (fn == BUILT_IN_LOG)
39482 strcpy (name, "vmldLn2");
39483 else if (n == 4)
39484 {
39485 sprintf (name, "vmls%s", bname+10);
39486 name[strlen (name)-1] = '4';
39487 }
39488 else
39489 sprintf (name, "vmld%s2", bname+10);
39490
39491 /* Convert to uppercase. */
39492 name[4] &= ~0x20;
39493
39494 arity = 0;
39495 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
39496 args;
39497 args = TREE_CHAIN (args))
39498 arity++;
39499
39500 if (arity == 1)
39501 fntype = build_function_type_list (type_out, type_in, NULL);
39502 else
39503 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39504
39505 /* Build a function declaration for the vectorized function. */
39506 new_fndecl = build_decl (BUILTINS_LOCATION,
39507 FUNCTION_DECL, get_identifier (name), fntype);
39508 TREE_PUBLIC (new_fndecl) = 1;
39509 DECL_EXTERNAL (new_fndecl) = 1;
39510 DECL_IS_NOVOPS (new_fndecl) = 1;
39511 TREE_READONLY (new_fndecl) = 1;
39512
39513 return new_fndecl;
39514 }
39515
39516 /* Handler for an ACML-style interface to
39517 a library with vectorized intrinsics. */
39518
39519 static tree
39520 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
39521 {
39522 char name[20] = "__vr.._";
39523 tree fntype, new_fndecl, args;
39524 unsigned arity;
39525 const char *bname;
39526 enum machine_mode el_mode, in_mode;
39527 int n, in_n;
39528
39529 /* The ACML is 64bits only and suitable for unsafe math only as
39530 it does not correctly support parts of IEEE with the required
39531 precision such as denormals. */
39532 if (!TARGET_64BIT
39533 || !flag_unsafe_math_optimizations)
39534 return NULL_TREE;
39535
39536 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39537 n = TYPE_VECTOR_SUBPARTS (type_out);
39538 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39539 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39540 if (el_mode != in_mode
39541 || n != in_n)
39542 return NULL_TREE;
39543
39544 switch (fn)
39545 {
39546 case BUILT_IN_SIN:
39547 case BUILT_IN_COS:
39548 case BUILT_IN_EXP:
39549 case BUILT_IN_LOG:
39550 case BUILT_IN_LOG2:
39551 case BUILT_IN_LOG10:
39552 name[4] = 'd';
39553 name[5] = '2';
39554 if (el_mode != DFmode
39555 || n != 2)
39556 return NULL_TREE;
39557 break;
39558
39559 case BUILT_IN_SINF:
39560 case BUILT_IN_COSF:
39561 case BUILT_IN_EXPF:
39562 case BUILT_IN_POWF:
39563 case BUILT_IN_LOGF:
39564 case BUILT_IN_LOG2F:
39565 case BUILT_IN_LOG10F:
39566 name[4] = 's';
39567 name[5] = '4';
39568 if (el_mode != SFmode
39569 || n != 4)
39570 return NULL_TREE;
39571 break;
39572
39573 default:
39574 return NULL_TREE;
39575 }
39576
39577 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
39578 sprintf (name + 7, "%s", bname+10);
39579
39580 arity = 0;
39581 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
39582 args;
39583 args = TREE_CHAIN (args))
39584 arity++;
39585
39586 if (arity == 1)
39587 fntype = build_function_type_list (type_out, type_in, NULL);
39588 else
39589 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39590
39591 /* Build a function declaration for the vectorized function. */
39592 new_fndecl = build_decl (BUILTINS_LOCATION,
39593 FUNCTION_DECL, get_identifier (name), fntype);
39594 TREE_PUBLIC (new_fndecl) = 1;
39595 DECL_EXTERNAL (new_fndecl) = 1;
39596 DECL_IS_NOVOPS (new_fndecl) = 1;
39597 TREE_READONLY (new_fndecl) = 1;
39598
39599 return new_fndecl;
39600 }
39601
39602 /* Returns a decl of a function that implements gather load with
39603 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39604 Return NULL_TREE if it is not available. */
39605
39606 static tree
39607 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39608 const_tree index_type, int scale)
39609 {
39610 bool si;
39611 enum ix86_builtins code;
39612
39613 if (! TARGET_AVX2)
39614 return NULL_TREE;
39615
39616 if ((TREE_CODE (index_type) != INTEGER_TYPE
39617 && !POINTER_TYPE_P (index_type))
39618 || (TYPE_MODE (index_type) != SImode
39619 && TYPE_MODE (index_type) != DImode))
39620 return NULL_TREE;
39621
39622 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39623 return NULL_TREE;
39624
39625 /* v*gather* insn sign extends index to pointer mode. */
39626 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39627 && TYPE_UNSIGNED (index_type))
39628 return NULL_TREE;
39629
39630 if (scale <= 0
39631 || scale > 8
39632 || (scale & (scale - 1)) != 0)
39633 return NULL_TREE;
39634
39635 si = TYPE_MODE (index_type) == SImode;
39636 switch (TYPE_MODE (mem_vectype))
39637 {
39638 case V2DFmode:
39639 if (TARGET_AVX512VL)
39640 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39641 else
39642 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39643 break;
39644 case V4DFmode:
39645 if (TARGET_AVX512VL)
39646 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39647 else
39648 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39649 break;
39650 case V2DImode:
39651 if (TARGET_AVX512VL)
39652 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39653 else
39654 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39655 break;
39656 case V4DImode:
39657 if (TARGET_AVX512VL)
39658 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39659 else
39660 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39661 break;
39662 case V4SFmode:
39663 if (TARGET_AVX512VL)
39664 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39665 else
39666 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39667 break;
39668 case V8SFmode:
39669 if (TARGET_AVX512VL)
39670 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39671 else
39672 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39673 break;
39674 case V4SImode:
39675 if (TARGET_AVX512VL)
39676 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39677 else
39678 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39679 break;
39680 case V8SImode:
39681 if (TARGET_AVX512VL)
39682 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39683 else
39684 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39685 break;
39686 case V8DFmode:
39687 if (TARGET_AVX512F)
39688 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39689 else
39690 return NULL_TREE;
39691 break;
39692 case V8DImode:
39693 if (TARGET_AVX512F)
39694 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39695 else
39696 return NULL_TREE;
39697 break;
39698 case V16SFmode:
39699 if (TARGET_AVX512F)
39700 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39701 else
39702 return NULL_TREE;
39703 break;
39704 case V16SImode:
39705 if (TARGET_AVX512F)
39706 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39707 else
39708 return NULL_TREE;
39709 break;
39710 default:
39711 return NULL_TREE;
39712 }
39713
39714 return ix86_get_builtin (code);
39715 }
39716
39717 /* Returns a code for a target-specific builtin that implements
39718 reciprocal of the function, or NULL_TREE if not available. */
39719
39720 static tree
39721 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
39722 {
39723 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
39724 && flag_finite_math_only && !flag_trapping_math
39725 && flag_unsafe_math_optimizations))
39726 return NULL_TREE;
39727
39728 if (md_fn)
39729 /* Machine dependent builtins. */
39730 switch (fn)
39731 {
39732 /* Vectorized version of sqrt to rsqrt conversion. */
39733 case IX86_BUILTIN_SQRTPS_NR:
39734 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39735
39736 case IX86_BUILTIN_SQRTPS_NR256:
39737 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39738
39739 default:
39740 return NULL_TREE;
39741 }
39742 else
39743 /* Normal builtins. */
39744 switch (fn)
39745 {
39746 /* Sqrt to rsqrt conversion. */
39747 case BUILT_IN_SQRTF:
39748 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
39749
39750 default:
39751 return NULL_TREE;
39752 }
39753 }
39754 \f
39755 /* Helper for avx_vpermilps256_operand et al. This is also used by
39756 the expansion functions to turn the parallel back into a mask.
39757 The return value is 0 for no match and the imm8+1 for a match. */
39758
39759 int
39760 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
39761 {
39762 unsigned i, nelt = GET_MODE_NUNITS (mode);
39763 unsigned mask = 0;
39764 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
39765
39766 if (XVECLEN (par, 0) != (int) nelt)
39767 return 0;
39768
39769 /* Validate that all of the elements are constants, and not totally
39770 out of range. Copy the data into an integral array to make the
39771 subsequent checks easier. */
39772 for (i = 0; i < nelt; ++i)
39773 {
39774 rtx er = XVECEXP (par, 0, i);
39775 unsigned HOST_WIDE_INT ei;
39776
39777 if (!CONST_INT_P (er))
39778 return 0;
39779 ei = INTVAL (er);
39780 if (ei >= nelt)
39781 return 0;
39782 ipar[i] = ei;
39783 }
39784
39785 switch (mode)
39786 {
39787 case V8DFmode:
39788 /* In the 512-bit DFmode case, we can only move elements within
39789 a 128-bit lane. First fill the second part of the mask,
39790 then fallthru. */
39791 for (i = 4; i < 6; ++i)
39792 {
39793 if (ipar[i] < 4 || ipar[i] >= 6)
39794 return 0;
39795 mask |= (ipar[i] - 4) << i;
39796 }
39797 for (i = 6; i < 8; ++i)
39798 {
39799 if (ipar[i] < 6)
39800 return 0;
39801 mask |= (ipar[i] - 6) << i;
39802 }
39803 /* FALLTHRU */
39804
39805 case V4DFmode:
39806 /* In the 256-bit DFmode case, we can only move elements within
39807 a 128-bit lane. */
39808 for (i = 0; i < 2; ++i)
39809 {
39810 if (ipar[i] >= 2)
39811 return 0;
39812 mask |= ipar[i] << i;
39813 }
39814 for (i = 2; i < 4; ++i)
39815 {
39816 if (ipar[i] < 2)
39817 return 0;
39818 mask |= (ipar[i] - 2) << i;
39819 }
39820 break;
39821
39822 case V16SFmode:
39823 /* In 512 bit SFmode case, permutation in the upper 256 bits
39824 must mirror the permutation in the lower 256-bits. */
39825 for (i = 0; i < 8; ++i)
39826 if (ipar[i] + 8 != ipar[i + 8])
39827 return 0;
39828 /* FALLTHRU */
39829
39830 case V8SFmode:
39831 /* In 256 bit SFmode case, we have full freedom of
39832 movement within the low 128-bit lane, but the high 128-bit
39833 lane must mirror the exact same pattern. */
39834 for (i = 0; i < 4; ++i)
39835 if (ipar[i] + 4 != ipar[i + 4])
39836 return 0;
39837 nelt = 4;
39838 /* FALLTHRU */
39839
39840 case V2DFmode:
39841 case V4SFmode:
39842 /* In the 128-bit case, we've full freedom in the placement of
39843 the elements from the source operand. */
39844 for (i = 0; i < nelt; ++i)
39845 mask |= ipar[i] << (i * (nelt / 2));
39846 break;
39847
39848 default:
39849 gcc_unreachable ();
39850 }
39851
39852 /* Make sure success has a non-zero value by adding one. */
39853 return mask + 1;
39854 }
39855
39856 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
39857 the expansion functions to turn the parallel back into a mask.
39858 The return value is 0 for no match and the imm8+1 for a match. */
39859
39860 int
39861 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
39862 {
39863 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39864 unsigned mask = 0;
39865 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
39866
39867 if (XVECLEN (par, 0) != (int) nelt)
39868 return 0;
39869
39870 /* Validate that all of the elements are constants, and not totally
39871 out of range. Copy the data into an integral array to make the
39872 subsequent checks easier. */
39873 for (i = 0; i < nelt; ++i)
39874 {
39875 rtx er = XVECEXP (par, 0, i);
39876 unsigned HOST_WIDE_INT ei;
39877
39878 if (!CONST_INT_P (er))
39879 return 0;
39880 ei = INTVAL (er);
39881 if (ei >= 2 * nelt)
39882 return 0;
39883 ipar[i] = ei;
39884 }
39885
39886 /* Validate that the halves of the permute are halves. */
39887 for (i = 0; i < nelt2 - 1; ++i)
39888 if (ipar[i] + 1 != ipar[i + 1])
39889 return 0;
39890 for (i = nelt2; i < nelt - 1; ++i)
39891 if (ipar[i] + 1 != ipar[i + 1])
39892 return 0;
39893
39894 /* Reconstruct the mask. */
39895 for (i = 0; i < 2; ++i)
39896 {
39897 unsigned e = ipar[i * nelt2];
39898 if (e % nelt2)
39899 return 0;
39900 e /= nelt2;
39901 mask |= e << (i * 4);
39902 }
39903
39904 /* Make sure success has a non-zero value by adding one. */
39905 return mask + 1;
39906 }
39907 \f
39908 /* Return a register priority for hard reg REGNO. */
39909 static int
39910 ix86_register_priority (int hard_regno)
39911 {
39912 /* ebp and r13 as the base always wants a displacement, r12 as the
39913 base always wants an index. So discourage their usage in an
39914 address. */
39915 if (hard_regno == R12_REG || hard_regno == R13_REG)
39916 return 0;
39917 if (hard_regno == BP_REG)
39918 return 1;
39919 /* New x86-64 int registers result in bigger code size. Discourage
39920 them. */
39921 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
39922 return 2;
39923 /* New x86-64 SSE registers result in bigger code size. Discourage
39924 them. */
39925 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
39926 return 2;
39927 /* Usage of AX register results in smaller code. Prefer it. */
39928 if (hard_regno == 0)
39929 return 4;
39930 return 3;
39931 }
39932
39933 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39934
39935 Put float CONST_DOUBLE in the constant pool instead of fp regs.
39936 QImode must go into class Q_REGS.
39937 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
39938 movdf to do mem-to-mem moves through integer regs. */
39939
39940 static reg_class_t
39941 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39942 {
39943 enum machine_mode mode = GET_MODE (x);
39944
39945 /* We're only allowed to return a subclass of CLASS. Many of the
39946 following checks fail for NO_REGS, so eliminate that early. */
39947 if (regclass == NO_REGS)
39948 return NO_REGS;
39949
39950 /* All classes can load zeros. */
39951 if (x == CONST0_RTX (mode))
39952 return regclass;
39953
39954 /* Force constants into memory if we are loading a (nonzero) constant into
39955 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
39956 instructions to load from a constant. */
39957 if (CONSTANT_P (x)
39958 && (MAYBE_MMX_CLASS_P (regclass)
39959 || MAYBE_SSE_CLASS_P (regclass)
39960 || MAYBE_MASK_CLASS_P (regclass)))
39961 return NO_REGS;
39962
39963 /* Prefer SSE regs only, if we can use them for math. */
39964 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
39965 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39966
39967 /* Floating-point constants need more complex checks. */
39968 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
39969 {
39970 /* General regs can load everything. */
39971 if (reg_class_subset_p (regclass, GENERAL_REGS))
39972 return regclass;
39973
39974 /* Floats can load 0 and 1 plus some others. Note that we eliminated
39975 zero above. We only want to wind up preferring 80387 registers if
39976 we plan on doing computation with them. */
39977 if (TARGET_80387
39978 && standard_80387_constant_p (x) > 0)
39979 {
39980 /* Limit class to non-sse. */
39981 if (regclass == FLOAT_SSE_REGS)
39982 return FLOAT_REGS;
39983 if (regclass == FP_TOP_SSE_REGS)
39984 return FP_TOP_REG;
39985 if (regclass == FP_SECOND_SSE_REGS)
39986 return FP_SECOND_REG;
39987 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
39988 return regclass;
39989 }
39990
39991 return NO_REGS;
39992 }
39993
39994 /* Generally when we see PLUS here, it's the function invariant
39995 (plus soft-fp const_int). Which can only be computed into general
39996 regs. */
39997 if (GET_CODE (x) == PLUS)
39998 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
39999
40000 /* QImode constants are easy to load, but non-constant QImode data
40001 must go into Q_REGS. */
40002 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
40003 {
40004 if (reg_class_subset_p (regclass, Q_REGS))
40005 return regclass;
40006 if (reg_class_subset_p (Q_REGS, regclass))
40007 return Q_REGS;
40008 return NO_REGS;
40009 }
40010
40011 return regclass;
40012 }
40013
40014 /* Discourage putting floating-point values in SSE registers unless
40015 SSE math is being used, and likewise for the 387 registers. */
40016 static reg_class_t
40017 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
40018 {
40019 enum machine_mode mode = GET_MODE (x);
40020
40021 /* Restrict the output reload class to the register bank that we are doing
40022 math on. If we would like not to return a subset of CLASS, reject this
40023 alternative: if reload cannot do this, it will still use its choice. */
40024 mode = GET_MODE (x);
40025 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
40026 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
40027
40028 if (X87_FLOAT_MODE_P (mode))
40029 {
40030 if (regclass == FP_TOP_SSE_REGS)
40031 return FP_TOP_REG;
40032 else if (regclass == FP_SECOND_SSE_REGS)
40033 return FP_SECOND_REG;
40034 else
40035 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
40036 }
40037
40038 return regclass;
40039 }
40040
40041 static reg_class_t
40042 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
40043 enum machine_mode mode, secondary_reload_info *sri)
40044 {
40045 /* Double-word spills from general registers to non-offsettable memory
40046 references (zero-extended addresses) require special handling. */
40047 if (TARGET_64BIT
40048 && MEM_P (x)
40049 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
40050 && INTEGER_CLASS_P (rclass)
40051 && !offsettable_memref_p (x))
40052 {
40053 sri->icode = (in_p
40054 ? CODE_FOR_reload_noff_load
40055 : CODE_FOR_reload_noff_store);
40056 /* Add the cost of moving address to a temporary. */
40057 sri->extra_cost = 1;
40058
40059 return NO_REGS;
40060 }
40061
40062 /* QImode spills from non-QI registers require
40063 intermediate register on 32bit targets. */
40064 if (mode == QImode
40065 && (MAYBE_MASK_CLASS_P (rclass)
40066 || (!TARGET_64BIT && !in_p
40067 && INTEGER_CLASS_P (rclass)
40068 && MAYBE_NON_Q_CLASS_P (rclass))))
40069 {
40070 int regno;
40071
40072 if (REG_P (x))
40073 regno = REGNO (x);
40074 else
40075 regno = -1;
40076
40077 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
40078 regno = true_regnum (x);
40079
40080 /* Return Q_REGS if the operand is in memory. */
40081 if (regno == -1)
40082 return Q_REGS;
40083 }
40084
40085 /* This condition handles corner case where an expression involving
40086 pointers gets vectorized. We're trying to use the address of a
40087 stack slot as a vector initializer.
40088
40089 (set (reg:V2DI 74 [ vect_cst_.2 ])
40090 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
40091
40092 Eventually frame gets turned into sp+offset like this:
40093
40094 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40095 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
40096 (const_int 392 [0x188]))))
40097
40098 That later gets turned into:
40099
40100 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40101 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
40102 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
40103
40104 We'll have the following reload recorded:
40105
40106 Reload 0: reload_in (DI) =
40107 (plus:DI (reg/f:DI 7 sp)
40108 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
40109 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40110 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
40111 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
40112 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40113 reload_reg_rtx: (reg:V2DI 22 xmm1)
40114
40115 Which isn't going to work since SSE instructions can't handle scalar
40116 additions. Returning GENERAL_REGS forces the addition into integer
40117 register and reload can handle subsequent reloads without problems. */
40118
40119 if (in_p && GET_CODE (x) == PLUS
40120 && SSE_CLASS_P (rclass)
40121 && SCALAR_INT_MODE_P (mode))
40122 return GENERAL_REGS;
40123
40124 return NO_REGS;
40125 }
40126
40127 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
40128
40129 static bool
40130 ix86_class_likely_spilled_p (reg_class_t rclass)
40131 {
40132 switch (rclass)
40133 {
40134 case AREG:
40135 case DREG:
40136 case CREG:
40137 case BREG:
40138 case AD_REGS:
40139 case SIREG:
40140 case DIREG:
40141 case SSE_FIRST_REG:
40142 case FP_TOP_REG:
40143 case FP_SECOND_REG:
40144 return true;
40145
40146 default:
40147 break;
40148 }
40149
40150 return false;
40151 }
40152
40153 /* If we are copying between general and FP registers, we need a memory
40154 location. The same is true for SSE and MMX registers.
40155
40156 To optimize register_move_cost performance, allow inline variant.
40157
40158 The macro can't work reliably when one of the CLASSES is class containing
40159 registers from multiple units (SSE, MMX, integer). We avoid this by never
40160 combining those units in single alternative in the machine description.
40161 Ensure that this constraint holds to avoid unexpected surprises.
40162
40163 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
40164 enforce these sanity checks. */
40165
40166 static inline bool
40167 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
40168 enum machine_mode mode, int strict)
40169 {
40170 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
40171 return false;
40172 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
40173 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
40174 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
40175 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
40176 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
40177 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
40178 {
40179 gcc_assert (!strict || lra_in_progress);
40180 return true;
40181 }
40182
40183 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
40184 return true;
40185
40186 /* Between mask and general, we have moves no larger than word size. */
40187 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
40188 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
40189 return true;
40190
40191 /* ??? This is a lie. We do have moves between mmx/general, and for
40192 mmx/sse2. But by saying we need secondary memory we discourage the
40193 register allocator from using the mmx registers unless needed. */
40194 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
40195 return true;
40196
40197 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40198 {
40199 /* SSE1 doesn't have any direct moves from other classes. */
40200 if (!TARGET_SSE2)
40201 return true;
40202
40203 /* If the target says that inter-unit moves are more expensive
40204 than moving through memory, then don't generate them. */
40205 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
40206 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
40207 return true;
40208
40209 /* Between SSE and general, we have moves no larger than word size. */
40210 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40211 return true;
40212 }
40213
40214 return false;
40215 }
40216
40217 bool
40218 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
40219 enum machine_mode mode, int strict)
40220 {
40221 return inline_secondary_memory_needed (class1, class2, mode, strict);
40222 }
40223
40224 /* Implement the TARGET_CLASS_MAX_NREGS hook.
40225
40226 On the 80386, this is the size of MODE in words,
40227 except in the FP regs, where a single reg is always enough. */
40228
40229 static unsigned char
40230 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
40231 {
40232 if (MAYBE_INTEGER_CLASS_P (rclass))
40233 {
40234 if (mode == XFmode)
40235 return (TARGET_64BIT ? 2 : 3);
40236 else if (mode == XCmode)
40237 return (TARGET_64BIT ? 4 : 6);
40238 else
40239 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
40240 }
40241 else
40242 {
40243 if (COMPLEX_MODE_P (mode))
40244 return 2;
40245 else
40246 return 1;
40247 }
40248 }
40249
40250 /* Return true if the registers in CLASS cannot represent the change from
40251 modes FROM to TO. */
40252
40253 bool
40254 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
40255 enum reg_class regclass)
40256 {
40257 if (from == to)
40258 return false;
40259
40260 /* x87 registers can't do subreg at all, as all values are reformatted
40261 to extended precision. */
40262 if (MAYBE_FLOAT_CLASS_P (regclass))
40263 return true;
40264
40265 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
40266 {
40267 /* Vector registers do not support QI or HImode loads. If we don't
40268 disallow a change to these modes, reload will assume it's ok to
40269 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
40270 the vec_dupv4hi pattern. */
40271 if (GET_MODE_SIZE (from) < 4)
40272 return true;
40273 }
40274
40275 return false;
40276 }
40277
40278 /* Return the cost of moving data of mode M between a
40279 register and memory. A value of 2 is the default; this cost is
40280 relative to those in `REGISTER_MOVE_COST'.
40281
40282 This function is used extensively by register_move_cost that is used to
40283 build tables at startup. Make it inline in this case.
40284 When IN is 2, return maximum of in and out move cost.
40285
40286 If moving between registers and memory is more expensive than
40287 between two registers, you should define this macro to express the
40288 relative cost.
40289
40290 Model also increased moving costs of QImode registers in non
40291 Q_REGS classes.
40292 */
40293 static inline int
40294 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
40295 int in)
40296 {
40297 int cost;
40298 if (FLOAT_CLASS_P (regclass))
40299 {
40300 int index;
40301 switch (mode)
40302 {
40303 case SFmode:
40304 index = 0;
40305 break;
40306 case DFmode:
40307 index = 1;
40308 break;
40309 case XFmode:
40310 index = 2;
40311 break;
40312 default:
40313 return 100;
40314 }
40315 if (in == 2)
40316 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
40317 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
40318 }
40319 if (SSE_CLASS_P (regclass))
40320 {
40321 int index;
40322 switch (GET_MODE_SIZE (mode))
40323 {
40324 case 4:
40325 index = 0;
40326 break;
40327 case 8:
40328 index = 1;
40329 break;
40330 case 16:
40331 index = 2;
40332 break;
40333 default:
40334 return 100;
40335 }
40336 if (in == 2)
40337 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
40338 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
40339 }
40340 if (MMX_CLASS_P (regclass))
40341 {
40342 int index;
40343 switch (GET_MODE_SIZE (mode))
40344 {
40345 case 4:
40346 index = 0;
40347 break;
40348 case 8:
40349 index = 1;
40350 break;
40351 default:
40352 return 100;
40353 }
40354 if (in)
40355 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
40356 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
40357 }
40358 switch (GET_MODE_SIZE (mode))
40359 {
40360 case 1:
40361 if (Q_CLASS_P (regclass) || TARGET_64BIT)
40362 {
40363 if (!in)
40364 return ix86_cost->int_store[0];
40365 if (TARGET_PARTIAL_REG_DEPENDENCY
40366 && optimize_function_for_speed_p (cfun))
40367 cost = ix86_cost->movzbl_load;
40368 else
40369 cost = ix86_cost->int_load[0];
40370 if (in == 2)
40371 return MAX (cost, ix86_cost->int_store[0]);
40372 return cost;
40373 }
40374 else
40375 {
40376 if (in == 2)
40377 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
40378 if (in)
40379 return ix86_cost->movzbl_load;
40380 else
40381 return ix86_cost->int_store[0] + 4;
40382 }
40383 break;
40384 case 2:
40385 if (in == 2)
40386 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
40387 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
40388 default:
40389 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
40390 if (mode == TFmode)
40391 mode = XFmode;
40392 if (in == 2)
40393 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
40394 else if (in)
40395 cost = ix86_cost->int_load[2];
40396 else
40397 cost = ix86_cost->int_store[2];
40398 return (cost * (((int) GET_MODE_SIZE (mode)
40399 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
40400 }
40401 }
40402
40403 static int
40404 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
40405 bool in)
40406 {
40407 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40408 }
40409
40410
40411 /* Return the cost of moving data from a register in class CLASS1 to
40412 one in class CLASS2.
40413
40414 It is not required that the cost always equal 2 when FROM is the same as TO;
40415 on some machines it is expensive to move between registers if they are not
40416 general registers. */
40417
40418 static int
40419 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
40420 reg_class_t class2_i)
40421 {
40422 enum reg_class class1 = (enum reg_class) class1_i;
40423 enum reg_class class2 = (enum reg_class) class2_i;
40424
40425 /* In case we require secondary memory, compute cost of the store followed
40426 by load. In order to avoid bad register allocation choices, we need
40427 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
40428
40429 if (inline_secondary_memory_needed (class1, class2, mode, 0))
40430 {
40431 int cost = 1;
40432
40433 cost += inline_memory_move_cost (mode, class1, 2);
40434 cost += inline_memory_move_cost (mode, class2, 2);
40435
40436 /* In case of copying from general_purpose_register we may emit multiple
40437 stores followed by single load causing memory size mismatch stall.
40438 Count this as arbitrarily high cost of 20. */
40439 if (targetm.class_max_nregs (class1, mode)
40440 > targetm.class_max_nregs (class2, mode))
40441 cost += 20;
40442
40443 /* In the case of FP/MMX moves, the registers actually overlap, and we
40444 have to switch modes in order to treat them differently. */
40445 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40446 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40447 cost += 20;
40448
40449 return cost;
40450 }
40451
40452 /* Moves between SSE/MMX and integer unit are expensive. */
40453 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40454 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40455
40456 /* ??? By keeping returned value relatively high, we limit the number
40457 of moves between integer and MMX/SSE registers for all targets.
40458 Additionally, high value prevents problem with x86_modes_tieable_p(),
40459 where integer modes in MMX/SSE registers are not tieable
40460 because of missing QImode and HImode moves to, from or between
40461 MMX/SSE registers. */
40462 return MAX (8, ix86_cost->mmxsse_to_integer);
40463
40464 if (MAYBE_FLOAT_CLASS_P (class1))
40465 return ix86_cost->fp_move;
40466 if (MAYBE_SSE_CLASS_P (class1))
40467 return ix86_cost->sse_move;
40468 if (MAYBE_MMX_CLASS_P (class1))
40469 return ix86_cost->mmx_move;
40470 return 2;
40471 }
40472
40473 /* Return TRUE if hard register REGNO can hold a value of machine-mode
40474 MODE. */
40475
40476 bool
40477 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
40478 {
40479 /* Flags and only flags can only hold CCmode values. */
40480 if (CC_REGNO_P (regno))
40481 return GET_MODE_CLASS (mode) == MODE_CC;
40482 if (GET_MODE_CLASS (mode) == MODE_CC
40483 || GET_MODE_CLASS (mode) == MODE_RANDOM
40484 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40485 return false;
40486 if (STACK_REGNO_P (regno))
40487 return VALID_FP_MODE_P (mode);
40488 if (MASK_REGNO_P (regno))
40489 return (VALID_MASK_REG_MODE (mode)
40490 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
40491 if (SSE_REGNO_P (regno))
40492 {
40493 /* We implement the move patterns for all vector modes into and
40494 out of SSE registers, even when no operation instructions
40495 are available. */
40496
40497 /* For AVX-512 we allow, regardless of regno:
40498 - XI mode
40499 - any of 512-bit wide vector mode
40500 - any scalar mode. */
40501 if (TARGET_AVX512F
40502 && (mode == XImode
40503 || VALID_AVX512F_REG_MODE (mode)
40504 || VALID_AVX512F_SCALAR_MODE (mode)))
40505 return true;
40506
40507 /* TODO check for QI/HI scalars. */
40508 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
40509 if (TARGET_AVX512VL
40510 && (mode == OImode
40511 || mode == TImode
40512 || VALID_AVX256_REG_MODE (mode)
40513 || VALID_AVX512VL_128_REG_MODE (mode)))
40514 return true;
40515
40516 /* xmm16-xmm31 are only available for AVX-512. */
40517 if (EXT_REX_SSE_REGNO_P (regno))
40518 return false;
40519
40520 /* OImode and AVX modes are available only when AVX is enabled. */
40521 return ((TARGET_AVX
40522 && VALID_AVX256_REG_OR_OI_MODE (mode))
40523 || VALID_SSE_REG_MODE (mode)
40524 || VALID_SSE2_REG_MODE (mode)
40525 || VALID_MMX_REG_MODE (mode)
40526 || VALID_MMX_REG_MODE_3DNOW (mode));
40527 }
40528 if (MMX_REGNO_P (regno))
40529 {
40530 /* We implement the move patterns for 3DNOW modes even in MMX mode,
40531 so if the register is available at all, then we can move data of
40532 the given mode into or out of it. */
40533 return (VALID_MMX_REG_MODE (mode)
40534 || VALID_MMX_REG_MODE_3DNOW (mode));
40535 }
40536
40537 if (mode == QImode)
40538 {
40539 /* Take care for QImode values - they can be in non-QI regs,
40540 but then they do cause partial register stalls. */
40541 if (ANY_QI_REGNO_P (regno))
40542 return true;
40543 if (!TARGET_PARTIAL_REG_STALL)
40544 return true;
40545 /* LRA checks if the hard register is OK for the given mode.
40546 QImode values can live in non-QI regs, so we allow all
40547 registers here. */
40548 if (lra_in_progress)
40549 return true;
40550 return !can_create_pseudo_p ();
40551 }
40552 /* We handle both integer and floats in the general purpose registers. */
40553 else if (VALID_INT_MODE_P (mode))
40554 return true;
40555 else if (VALID_FP_MODE_P (mode))
40556 return true;
40557 else if (VALID_DFP_MODE_P (mode))
40558 return true;
40559 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
40560 on to use that value in smaller contexts, this can easily force a
40561 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
40562 supporting DImode, allow it. */
40563 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40564 return true;
40565
40566 return false;
40567 }
40568
40569 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
40570 tieable integer mode. */
40571
40572 static bool
40573 ix86_tieable_integer_mode_p (enum machine_mode mode)
40574 {
40575 switch (mode)
40576 {
40577 case HImode:
40578 case SImode:
40579 return true;
40580
40581 case QImode:
40582 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40583
40584 case DImode:
40585 return TARGET_64BIT;
40586
40587 default:
40588 return false;
40589 }
40590 }
40591
40592 /* Return true if MODE1 is accessible in a register that can hold MODE2
40593 without copying. That is, all register classes that can hold MODE2
40594 can also hold MODE1. */
40595
40596 bool
40597 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
40598 {
40599 if (mode1 == mode2)
40600 return true;
40601
40602 if (ix86_tieable_integer_mode_p (mode1)
40603 && ix86_tieable_integer_mode_p (mode2))
40604 return true;
40605
40606 /* MODE2 being XFmode implies fp stack or general regs, which means we
40607 can tie any smaller floating point modes to it. Note that we do not
40608 tie this with TFmode. */
40609 if (mode2 == XFmode)
40610 return mode1 == SFmode || mode1 == DFmode;
40611
40612 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40613 that we can tie it with SFmode. */
40614 if (mode2 == DFmode)
40615 return mode1 == SFmode;
40616
40617 /* If MODE2 is only appropriate for an SSE register, then tie with
40618 any other mode acceptable to SSE registers. */
40619 if (GET_MODE_SIZE (mode2) == 32
40620 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40621 return (GET_MODE_SIZE (mode1) == 32
40622 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40623 if (GET_MODE_SIZE (mode2) == 16
40624 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40625 return (GET_MODE_SIZE (mode1) == 16
40626 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40627
40628 /* If MODE2 is appropriate for an MMX register, then tie
40629 with any other mode acceptable to MMX registers. */
40630 if (GET_MODE_SIZE (mode2) == 8
40631 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40632 return (GET_MODE_SIZE (mode1) == 8
40633 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40634
40635 return false;
40636 }
40637
40638 /* Return the cost of moving between two registers of mode MODE. */
40639
40640 static int
40641 ix86_set_reg_reg_cost (enum machine_mode mode)
40642 {
40643 unsigned int units = UNITS_PER_WORD;
40644
40645 switch (GET_MODE_CLASS (mode))
40646 {
40647 default:
40648 break;
40649
40650 case MODE_CC:
40651 units = GET_MODE_SIZE (CCmode);
40652 break;
40653
40654 case MODE_FLOAT:
40655 if ((TARGET_SSE && mode == TFmode)
40656 || (TARGET_80387 && mode == XFmode)
40657 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40658 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40659 units = GET_MODE_SIZE (mode);
40660 break;
40661
40662 case MODE_COMPLEX_FLOAT:
40663 if ((TARGET_SSE && mode == TCmode)
40664 || (TARGET_80387 && mode == XCmode)
40665 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40666 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40667 units = GET_MODE_SIZE (mode);
40668 break;
40669
40670 case MODE_VECTOR_INT:
40671 case MODE_VECTOR_FLOAT:
40672 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40673 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40674 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40675 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40676 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40677 units = GET_MODE_SIZE (mode);
40678 }
40679
40680 /* Return the cost of moving between two registers of mode MODE,
40681 assuming that the move will be in pieces of at most UNITS bytes. */
40682 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
40683 }
40684
40685 /* Compute a (partial) cost for rtx X. Return true if the complete
40686 cost has been computed, and false if subexpressions should be
40687 scanned. In either case, *TOTAL contains the cost result. */
40688
40689 static bool
40690 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
40691 bool speed)
40692 {
40693 rtx mask;
40694 enum rtx_code code = (enum rtx_code) code_i;
40695 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40696 enum machine_mode mode = GET_MODE (x);
40697 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40698
40699 switch (code)
40700 {
40701 case SET:
40702 if (register_operand (SET_DEST (x), VOIDmode)
40703 && reg_or_0_operand (SET_SRC (x), VOIDmode))
40704 {
40705 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40706 return true;
40707 }
40708 return false;
40709
40710 case CONST_INT:
40711 case CONST:
40712 case LABEL_REF:
40713 case SYMBOL_REF:
40714 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
40715 *total = 3;
40716 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
40717 *total = 2;
40718 else if (flag_pic && SYMBOLIC_CONST (x)
40719 && !(TARGET_64BIT
40720 && (GET_CODE (x) == LABEL_REF
40721 || (GET_CODE (x) == SYMBOL_REF
40722 && SYMBOL_REF_LOCAL_P (x)))))
40723 *total = 1;
40724 else
40725 *total = 0;
40726 return true;
40727
40728 case CONST_DOUBLE:
40729 if (mode == VOIDmode)
40730 {
40731 *total = 0;
40732 return true;
40733 }
40734 switch (standard_80387_constant_p (x))
40735 {
40736 case 1: /* 0.0 */
40737 *total = 1;
40738 return true;
40739 default: /* Other constants */
40740 *total = 2;
40741 return true;
40742 case 0:
40743 case -1:
40744 break;
40745 }
40746 if (SSE_FLOAT_MODE_P (mode))
40747 {
40748 case CONST_VECTOR:
40749 switch (standard_sse_constant_p (x))
40750 {
40751 case 0:
40752 break;
40753 case 1: /* 0: xor eliminates false dependency */
40754 *total = 0;
40755 return true;
40756 default: /* -1: cmp contains false dependency */
40757 *total = 1;
40758 return true;
40759 }
40760 }
40761 /* Fall back to (MEM (SYMBOL_REF)), since that's where
40762 it'll probably end up. Add a penalty for size. */
40763 *total = (COSTS_N_INSNS (1)
40764 + (flag_pic != 0 && !TARGET_64BIT)
40765 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
40766 return true;
40767
40768 case ZERO_EXTEND:
40769 /* The zero extensions is often completely free on x86_64, so make
40770 it as cheap as possible. */
40771 if (TARGET_64BIT && mode == DImode
40772 && GET_MODE (XEXP (x, 0)) == SImode)
40773 *total = 1;
40774 else if (TARGET_ZERO_EXTEND_WITH_AND)
40775 *total = cost->add;
40776 else
40777 *total = cost->movzx;
40778 return false;
40779
40780 case SIGN_EXTEND:
40781 *total = cost->movsx;
40782 return false;
40783
40784 case ASHIFT:
40785 if (SCALAR_INT_MODE_P (mode)
40786 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40787 && CONST_INT_P (XEXP (x, 1)))
40788 {
40789 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40790 if (value == 1)
40791 {
40792 *total = cost->add;
40793 return false;
40794 }
40795 if ((value == 2 || value == 3)
40796 && cost->lea <= cost->shift_const)
40797 {
40798 *total = cost->lea;
40799 return false;
40800 }
40801 }
40802 /* FALLTHRU */
40803
40804 case ROTATE:
40805 case ASHIFTRT:
40806 case LSHIFTRT:
40807 case ROTATERT:
40808 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40809 {
40810 /* ??? Should be SSE vector operation cost. */
40811 /* At least for published AMD latencies, this really is the same
40812 as the latency for a simple fpu operation like fabs. */
40813 /* V*QImode is emulated with 1-11 insns. */
40814 if (mode == V16QImode || mode == V32QImode)
40815 {
40816 int count = 11;
40817 if (TARGET_XOP && mode == V16QImode)
40818 {
40819 /* For XOP we use vpshab, which requires a broadcast of the
40820 value to the variable shift insn. For constants this
40821 means a V16Q const in mem; even when we can perform the
40822 shift with one insn set the cost to prefer paddb. */
40823 if (CONSTANT_P (XEXP (x, 1)))
40824 {
40825 *total = (cost->fabs
40826 + rtx_cost (XEXP (x, 0), code, 0, speed)
40827 + (speed ? 2 : COSTS_N_BYTES (16)));
40828 return true;
40829 }
40830 count = 3;
40831 }
40832 else if (TARGET_SSSE3)
40833 count = 7;
40834 *total = cost->fabs * count;
40835 }
40836 else
40837 *total = cost->fabs;
40838 }
40839 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40840 {
40841 if (CONST_INT_P (XEXP (x, 1)))
40842 {
40843 if (INTVAL (XEXP (x, 1)) > 32)
40844 *total = cost->shift_const + COSTS_N_INSNS (2);
40845 else
40846 *total = cost->shift_const * 2;
40847 }
40848 else
40849 {
40850 if (GET_CODE (XEXP (x, 1)) == AND)
40851 *total = cost->shift_var * 2;
40852 else
40853 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
40854 }
40855 }
40856 else
40857 {
40858 if (CONST_INT_P (XEXP (x, 1)))
40859 *total = cost->shift_const;
40860 else if (GET_CODE (XEXP (x, 1)) == SUBREG
40861 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
40862 {
40863 /* Return the cost after shift-and truncation. */
40864 *total = cost->shift_var;
40865 return true;
40866 }
40867 else
40868 *total = cost->shift_var;
40869 }
40870 return false;
40871
40872 case FMA:
40873 {
40874 rtx sub;
40875
40876 gcc_assert (FLOAT_MODE_P (mode));
40877 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40878
40879 /* ??? SSE scalar/vector cost should be used here. */
40880 /* ??? Bald assumption that fma has the same cost as fmul. */
40881 *total = cost->fmul;
40882 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
40883
40884 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
40885 sub = XEXP (x, 0);
40886 if (GET_CODE (sub) == NEG)
40887 sub = XEXP (sub, 0);
40888 *total += rtx_cost (sub, FMA, 0, speed);
40889
40890 sub = XEXP (x, 2);
40891 if (GET_CODE (sub) == NEG)
40892 sub = XEXP (sub, 0);
40893 *total += rtx_cost (sub, FMA, 2, speed);
40894 return true;
40895 }
40896
40897 case MULT:
40898 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40899 {
40900 /* ??? SSE scalar cost should be used here. */
40901 *total = cost->fmul;
40902 return false;
40903 }
40904 else if (X87_FLOAT_MODE_P (mode))
40905 {
40906 *total = cost->fmul;
40907 return false;
40908 }
40909 else if (FLOAT_MODE_P (mode))
40910 {
40911 /* ??? SSE vector cost should be used here. */
40912 *total = cost->fmul;
40913 return false;
40914 }
40915 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40916 {
40917 /* V*QImode is emulated with 7-13 insns. */
40918 if (mode == V16QImode || mode == V32QImode)
40919 {
40920 int extra = 11;
40921 if (TARGET_XOP && mode == V16QImode)
40922 extra = 5;
40923 else if (TARGET_SSSE3)
40924 extra = 6;
40925 *total = cost->fmul * 2 + cost->fabs * extra;
40926 }
40927 /* V*DImode is emulated with 5-8 insns. */
40928 else if (mode == V2DImode || mode == V4DImode)
40929 {
40930 if (TARGET_XOP && mode == V2DImode)
40931 *total = cost->fmul * 2 + cost->fabs * 3;
40932 else
40933 *total = cost->fmul * 3 + cost->fabs * 5;
40934 }
40935 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40936 insns, including two PMULUDQ. */
40937 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40938 *total = cost->fmul * 2 + cost->fabs * 5;
40939 else
40940 *total = cost->fmul;
40941 return false;
40942 }
40943 else
40944 {
40945 rtx op0 = XEXP (x, 0);
40946 rtx op1 = XEXP (x, 1);
40947 int nbits;
40948 if (CONST_INT_P (XEXP (x, 1)))
40949 {
40950 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40951 for (nbits = 0; value != 0; value &= value - 1)
40952 nbits++;
40953 }
40954 else
40955 /* This is arbitrary. */
40956 nbits = 7;
40957
40958 /* Compute costs correctly for widening multiplication. */
40959 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
40960 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
40961 == GET_MODE_SIZE (mode))
40962 {
40963 int is_mulwiden = 0;
40964 enum machine_mode inner_mode = GET_MODE (op0);
40965
40966 if (GET_CODE (op0) == GET_CODE (op1))
40967 is_mulwiden = 1, op1 = XEXP (op1, 0);
40968 else if (CONST_INT_P (op1))
40969 {
40970 if (GET_CODE (op0) == SIGN_EXTEND)
40971 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
40972 == INTVAL (op1);
40973 else
40974 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
40975 }
40976
40977 if (is_mulwiden)
40978 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
40979 }
40980
40981 *total = (cost->mult_init[MODE_INDEX (mode)]
40982 + nbits * cost->mult_bit
40983 + rtx_cost (op0, outer_code, opno, speed)
40984 + rtx_cost (op1, outer_code, opno, speed));
40985
40986 return true;
40987 }
40988
40989 case DIV:
40990 case UDIV:
40991 case MOD:
40992 case UMOD:
40993 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40994 /* ??? SSE cost should be used here. */
40995 *total = cost->fdiv;
40996 else if (X87_FLOAT_MODE_P (mode))
40997 *total = cost->fdiv;
40998 else if (FLOAT_MODE_P (mode))
40999 /* ??? SSE vector cost should be used here. */
41000 *total = cost->fdiv;
41001 else
41002 *total = cost->divide[MODE_INDEX (mode)];
41003 return false;
41004
41005 case PLUS:
41006 if (GET_MODE_CLASS (mode) == MODE_INT
41007 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
41008 {
41009 if (GET_CODE (XEXP (x, 0)) == PLUS
41010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
41011 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
41012 && CONSTANT_P (XEXP (x, 1)))
41013 {
41014 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
41015 if (val == 2 || val == 4 || val == 8)
41016 {
41017 *total = cost->lea;
41018 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
41019 outer_code, opno, speed);
41020 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
41021 outer_code, opno, speed);
41022 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
41023 return true;
41024 }
41025 }
41026 else if (GET_CODE (XEXP (x, 0)) == MULT
41027 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
41028 {
41029 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
41030 if (val == 2 || val == 4 || val == 8)
41031 {
41032 *total = cost->lea;
41033 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
41034 outer_code, opno, speed);
41035 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
41036 return true;
41037 }
41038 }
41039 else if (GET_CODE (XEXP (x, 0)) == PLUS)
41040 {
41041 *total = cost->lea;
41042 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
41043 outer_code, opno, speed);
41044 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
41045 outer_code, opno, speed);
41046 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
41047 return true;
41048 }
41049 }
41050 /* FALLTHRU */
41051
41052 case MINUS:
41053 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41054 {
41055 /* ??? SSE cost should be used here. */
41056 *total = cost->fadd;
41057 return false;
41058 }
41059 else if (X87_FLOAT_MODE_P (mode))
41060 {
41061 *total = cost->fadd;
41062 return false;
41063 }
41064 else if (FLOAT_MODE_P (mode))
41065 {
41066 /* ??? SSE vector cost should be used here. */
41067 *total = cost->fadd;
41068 return false;
41069 }
41070 /* FALLTHRU */
41071
41072 case AND:
41073 case IOR:
41074 case XOR:
41075 if (GET_MODE_CLASS (mode) == MODE_INT
41076 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41077 {
41078 *total = (cost->add * 2
41079 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
41080 << (GET_MODE (XEXP (x, 0)) != DImode))
41081 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
41082 << (GET_MODE (XEXP (x, 1)) != DImode)));
41083 return true;
41084 }
41085 /* FALLTHRU */
41086
41087 case NEG:
41088 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41089 {
41090 /* ??? SSE cost should be used here. */
41091 *total = cost->fchs;
41092 return false;
41093 }
41094 else if (X87_FLOAT_MODE_P (mode))
41095 {
41096 *total = cost->fchs;
41097 return false;
41098 }
41099 else if (FLOAT_MODE_P (mode))
41100 {
41101 /* ??? SSE vector cost should be used here. */
41102 *total = cost->fchs;
41103 return false;
41104 }
41105 /* FALLTHRU */
41106
41107 case NOT:
41108 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41109 {
41110 /* ??? Should be SSE vector operation cost. */
41111 /* At least for published AMD latencies, this really is the same
41112 as the latency for a simple fpu operation like fabs. */
41113 *total = cost->fabs;
41114 }
41115 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41116 *total = cost->add * 2;
41117 else
41118 *total = cost->add;
41119 return false;
41120
41121 case COMPARE:
41122 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
41123 && XEXP (XEXP (x, 0), 1) == const1_rtx
41124 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
41125 && XEXP (x, 1) == const0_rtx)
41126 {
41127 /* This kind of construct is implemented using test[bwl].
41128 Treat it as if we had an AND. */
41129 *total = (cost->add
41130 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
41131 + rtx_cost (const1_rtx, outer_code, opno, speed));
41132 return true;
41133 }
41134 return false;
41135
41136 case FLOAT_EXTEND:
41137 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41138 *total = 0;
41139 return false;
41140
41141 case ABS:
41142 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41143 /* ??? SSE cost should be used here. */
41144 *total = cost->fabs;
41145 else if (X87_FLOAT_MODE_P (mode))
41146 *total = cost->fabs;
41147 else if (FLOAT_MODE_P (mode))
41148 /* ??? SSE vector cost should be used here. */
41149 *total = cost->fabs;
41150 return false;
41151
41152 case SQRT:
41153 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41154 /* ??? SSE cost should be used here. */
41155 *total = cost->fsqrt;
41156 else if (X87_FLOAT_MODE_P (mode))
41157 *total = cost->fsqrt;
41158 else if (FLOAT_MODE_P (mode))
41159 /* ??? SSE vector cost should be used here. */
41160 *total = cost->fsqrt;
41161 return false;
41162
41163 case UNSPEC:
41164 if (XINT (x, 1) == UNSPEC_TP)
41165 *total = 0;
41166 return false;
41167
41168 case VEC_SELECT:
41169 case VEC_CONCAT:
41170 case VEC_DUPLICATE:
41171 /* ??? Assume all of these vector manipulation patterns are
41172 recognizable. In which case they all pretty much have the
41173 same cost. */
41174 *total = cost->fabs;
41175 return true;
41176 case VEC_MERGE:
41177 mask = XEXP (x, 2);
41178 /* This is masked instruction, assume the same cost,
41179 as nonmasked variant. */
41180 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
41181 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
41182 else
41183 *total = cost->fabs;
41184 return true;
41185
41186 default:
41187 return false;
41188 }
41189 }
41190
41191 #if TARGET_MACHO
41192
41193 static int current_machopic_label_num;
41194
41195 /* Given a symbol name and its associated stub, write out the
41196 definition of the stub. */
41197
41198 void
41199 machopic_output_stub (FILE *file, const char *symb, const char *stub)
41200 {
41201 unsigned int length;
41202 char *binder_name, *symbol_name, lazy_ptr_name[32];
41203 int label = ++current_machopic_label_num;
41204
41205 /* For 64-bit we shouldn't get here. */
41206 gcc_assert (!TARGET_64BIT);
41207
41208 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
41209 symb = targetm.strip_name_encoding (symb);
41210
41211 length = strlen (stub);
41212 binder_name = XALLOCAVEC (char, length + 32);
41213 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
41214
41215 length = strlen (symb);
41216 symbol_name = XALLOCAVEC (char, length + 32);
41217 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
41218
41219 sprintf (lazy_ptr_name, "L%d$lz", label);
41220
41221 if (MACHOPIC_ATT_STUB)
41222 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41223 else if (MACHOPIC_PURE)
41224 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41225 else
41226 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41227
41228 fprintf (file, "%s:\n", stub);
41229 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41230
41231 if (MACHOPIC_ATT_STUB)
41232 {
41233 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41234 }
41235 else if (MACHOPIC_PURE)
41236 {
41237 /* PIC stub. */
41238 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41239 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41240 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
41241 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41242 label, lazy_ptr_name, label);
41243 fprintf (file, "\tjmp\t*%%ecx\n");
41244 }
41245 else
41246 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41247
41248 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41249 it needs no stub-binding-helper. */
41250 if (MACHOPIC_ATT_STUB)
41251 return;
41252
41253 fprintf (file, "%s:\n", binder_name);
41254
41255 if (MACHOPIC_PURE)
41256 {
41257 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41258 fprintf (file, "\tpushl\t%%ecx\n");
41259 }
41260 else
41261 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41262
41263 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41264
41265 /* N.B. Keep the correspondence of these
41266 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41267 old-pic/new-pic/non-pic stubs; altering this will break
41268 compatibility with existing dylibs. */
41269 if (MACHOPIC_PURE)
41270 {
41271 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41272 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41273 }
41274 else
41275 /* 16-byte -mdynamic-no-pic stub. */
41276 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41277
41278 fprintf (file, "%s:\n", lazy_ptr_name);
41279 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41280 fprintf (file, ASM_LONG "%s\n", binder_name);
41281 }
41282 #endif /* TARGET_MACHO */
41283
41284 /* Order the registers for register allocator. */
41285
41286 void
41287 x86_order_regs_for_local_alloc (void)
41288 {
41289 int pos = 0;
41290 int i;
41291
41292 /* First allocate the local general purpose registers. */
41293 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41294 if (GENERAL_REGNO_P (i) && call_used_regs[i])
41295 reg_alloc_order [pos++] = i;
41296
41297 /* Global general purpose registers. */
41298 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41299 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41300 reg_alloc_order [pos++] = i;
41301
41302 /* x87 registers come first in case we are doing FP math
41303 using them. */
41304 if (!TARGET_SSE_MATH)
41305 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41306 reg_alloc_order [pos++] = i;
41307
41308 /* SSE registers. */
41309 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41310 reg_alloc_order [pos++] = i;
41311 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41312 reg_alloc_order [pos++] = i;
41313
41314 /* Extended REX SSE registers. */
41315 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41316 reg_alloc_order [pos++] = i;
41317
41318 /* Mask register. */
41319 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41320 reg_alloc_order [pos++] = i;
41321
41322 /* x87 registers. */
41323 if (TARGET_SSE_MATH)
41324 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41325 reg_alloc_order [pos++] = i;
41326
41327 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41328 reg_alloc_order [pos++] = i;
41329
41330 /* Initialize the rest of array as we do not allocate some registers
41331 at all. */
41332 while (pos < FIRST_PSEUDO_REGISTER)
41333 reg_alloc_order [pos++] = 0;
41334 }
41335
41336 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41337 in struct attribute_spec handler. */
41338 static tree
41339 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
41340 tree args,
41341 int,
41342 bool *no_add_attrs)
41343 {
41344 if (TREE_CODE (*node) != FUNCTION_TYPE
41345 && TREE_CODE (*node) != METHOD_TYPE
41346 && TREE_CODE (*node) != FIELD_DECL
41347 && TREE_CODE (*node) != TYPE_DECL)
41348 {
41349 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41350 name);
41351 *no_add_attrs = true;
41352 return NULL_TREE;
41353 }
41354 if (TARGET_64BIT)
41355 {
41356 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41357 name);
41358 *no_add_attrs = true;
41359 return NULL_TREE;
41360 }
41361 if (is_attribute_p ("callee_pop_aggregate_return", name))
41362 {
41363 tree cst;
41364
41365 cst = TREE_VALUE (args);
41366 if (TREE_CODE (cst) != INTEGER_CST)
41367 {
41368 warning (OPT_Wattributes,
41369 "%qE attribute requires an integer constant argument",
41370 name);
41371 *no_add_attrs = true;
41372 }
41373 else if (compare_tree_int (cst, 0) != 0
41374 && compare_tree_int (cst, 1) != 0)
41375 {
41376 warning (OPT_Wattributes,
41377 "argument to %qE attribute is neither zero, nor one",
41378 name);
41379 *no_add_attrs = true;
41380 }
41381
41382 return NULL_TREE;
41383 }
41384
41385 return NULL_TREE;
41386 }
41387
41388 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41389 struct attribute_spec.handler. */
41390 static tree
41391 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
41392 bool *no_add_attrs)
41393 {
41394 if (TREE_CODE (*node) != FUNCTION_TYPE
41395 && TREE_CODE (*node) != METHOD_TYPE
41396 && TREE_CODE (*node) != FIELD_DECL
41397 && TREE_CODE (*node) != TYPE_DECL)
41398 {
41399 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41400 name);
41401 *no_add_attrs = true;
41402 return NULL_TREE;
41403 }
41404
41405 /* Can combine regparm with all attributes but fastcall. */
41406 if (is_attribute_p ("ms_abi", name))
41407 {
41408 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41409 {
41410 error ("ms_abi and sysv_abi attributes are not compatible");
41411 }
41412
41413 return NULL_TREE;
41414 }
41415 else if (is_attribute_p ("sysv_abi", name))
41416 {
41417 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41418 {
41419 error ("ms_abi and sysv_abi attributes are not compatible");
41420 }
41421
41422 return NULL_TREE;
41423 }
41424
41425 return NULL_TREE;
41426 }
41427
41428 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41429 struct attribute_spec.handler. */
41430 static tree
41431 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
41432 bool *no_add_attrs)
41433 {
41434 tree *type = NULL;
41435 if (DECL_P (*node))
41436 {
41437 if (TREE_CODE (*node) == TYPE_DECL)
41438 type = &TREE_TYPE (*node);
41439 }
41440 else
41441 type = node;
41442
41443 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41444 {
41445 warning (OPT_Wattributes, "%qE attribute ignored",
41446 name);
41447 *no_add_attrs = true;
41448 }
41449
41450 else if ((is_attribute_p ("ms_struct", name)
41451 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41452 || ((is_attribute_p ("gcc_struct", name)
41453 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41454 {
41455 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41456 name);
41457 *no_add_attrs = true;
41458 }
41459
41460 return NULL_TREE;
41461 }
41462
41463 static tree
41464 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
41465 bool *no_add_attrs)
41466 {
41467 if (TREE_CODE (*node) != FUNCTION_DECL)
41468 {
41469 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41470 name);
41471 *no_add_attrs = true;
41472 }
41473 return NULL_TREE;
41474 }
41475
41476 static bool
41477 ix86_ms_bitfield_layout_p (const_tree record_type)
41478 {
41479 return ((TARGET_MS_BITFIELD_LAYOUT
41480 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41481 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41482 }
41483
41484 /* Returns an expression indicating where the this parameter is
41485 located on entry to the FUNCTION. */
41486
41487 static rtx
41488 x86_this_parameter (tree function)
41489 {
41490 tree type = TREE_TYPE (function);
41491 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41492 int nregs;
41493
41494 if (TARGET_64BIT)
41495 {
41496 const int *parm_regs;
41497
41498 if (ix86_function_type_abi (type) == MS_ABI)
41499 parm_regs = x86_64_ms_abi_int_parameter_registers;
41500 else
41501 parm_regs = x86_64_int_parameter_registers;
41502 return gen_rtx_REG (Pmode, parm_regs[aggr]);
41503 }
41504
41505 nregs = ix86_function_regparm (type, function);
41506
41507 if (nregs > 0 && !stdarg_p (type))
41508 {
41509 int regno;
41510 unsigned int ccvt = ix86_get_callcvt (type);
41511
41512 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41513 regno = aggr ? DX_REG : CX_REG;
41514 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41515 {
41516 regno = CX_REG;
41517 if (aggr)
41518 return gen_rtx_MEM (SImode,
41519 plus_constant (Pmode, stack_pointer_rtx, 4));
41520 }
41521 else
41522 {
41523 regno = AX_REG;
41524 if (aggr)
41525 {
41526 regno = DX_REG;
41527 if (nregs == 1)
41528 return gen_rtx_MEM (SImode,
41529 plus_constant (Pmode,
41530 stack_pointer_rtx, 4));
41531 }
41532 }
41533 return gen_rtx_REG (SImode, regno);
41534 }
41535
41536 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41537 aggr ? 8 : 4));
41538 }
41539
41540 /* Determine whether x86_output_mi_thunk can succeed. */
41541
41542 static bool
41543 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
41544 const_tree function)
41545 {
41546 /* 64-bit can handle anything. */
41547 if (TARGET_64BIT)
41548 return true;
41549
41550 /* For 32-bit, everything's fine if we have one free register. */
41551 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41552 return true;
41553
41554 /* Need a free register for vcall_offset. */
41555 if (vcall_offset)
41556 return false;
41557
41558 /* Need a free register for GOT references. */
41559 if (flag_pic && !targetm.binds_local_p (function))
41560 return false;
41561
41562 /* Otherwise ok. */
41563 return true;
41564 }
41565
41566 /* Output the assembler code for a thunk function. THUNK_DECL is the
41567 declaration for the thunk function itself, FUNCTION is the decl for
41568 the target function. DELTA is an immediate constant offset to be
41569 added to THIS. If VCALL_OFFSET is nonzero, the word at
41570 *(*this + vcall_offset) should be added to THIS. */
41571
41572 static void
41573 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
41574 HOST_WIDE_INT vcall_offset, tree function)
41575 {
41576 rtx this_param = x86_this_parameter (function);
41577 rtx this_reg, tmp, fnaddr;
41578 unsigned int tmp_regno;
41579 rtx_insn *insn;
41580
41581 if (TARGET_64BIT)
41582 tmp_regno = R10_REG;
41583 else
41584 {
41585 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41586 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41587 tmp_regno = AX_REG;
41588 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41589 tmp_regno = DX_REG;
41590 else
41591 tmp_regno = CX_REG;
41592 }
41593
41594 emit_note (NOTE_INSN_PROLOGUE_END);
41595
41596 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
41597 pull it in now and let DELTA benefit. */
41598 if (REG_P (this_param))
41599 this_reg = this_param;
41600 else if (vcall_offset)
41601 {
41602 /* Put the this parameter into %eax. */
41603 this_reg = gen_rtx_REG (Pmode, AX_REG);
41604 emit_move_insn (this_reg, this_param);
41605 }
41606 else
41607 this_reg = NULL_RTX;
41608
41609 /* Adjust the this parameter by a fixed constant. */
41610 if (delta)
41611 {
41612 rtx delta_rtx = GEN_INT (delta);
41613 rtx delta_dst = this_reg ? this_reg : this_param;
41614
41615 if (TARGET_64BIT)
41616 {
41617 if (!x86_64_general_operand (delta_rtx, Pmode))
41618 {
41619 tmp = gen_rtx_REG (Pmode, tmp_regno);
41620 emit_move_insn (tmp, delta_rtx);
41621 delta_rtx = tmp;
41622 }
41623 }
41624
41625 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41626 }
41627
41628 /* Adjust the this parameter by a value stored in the vtable. */
41629 if (vcall_offset)
41630 {
41631 rtx vcall_addr, vcall_mem, this_mem;
41632
41633 tmp = gen_rtx_REG (Pmode, tmp_regno);
41634
41635 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41636 if (Pmode != ptr_mode)
41637 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41638 emit_move_insn (tmp, this_mem);
41639
41640 /* Adjust the this parameter. */
41641 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41642 if (TARGET_64BIT
41643 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41644 {
41645 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41646 emit_move_insn (tmp2, GEN_INT (vcall_offset));
41647 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41648 }
41649
41650 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41651 if (Pmode != ptr_mode)
41652 emit_insn (gen_addsi_1_zext (this_reg,
41653 gen_rtx_REG (ptr_mode,
41654 REGNO (this_reg)),
41655 vcall_mem));
41656 else
41657 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41658 }
41659
41660 /* If necessary, drop THIS back to its stack slot. */
41661 if (this_reg && this_reg != this_param)
41662 emit_move_insn (this_param, this_reg);
41663
41664 fnaddr = XEXP (DECL_RTL (function), 0);
41665 if (TARGET_64BIT)
41666 {
41667 if (!flag_pic || targetm.binds_local_p (function)
41668 || TARGET_PECOFF)
41669 ;
41670 else
41671 {
41672 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41673 tmp = gen_rtx_CONST (Pmode, tmp);
41674 fnaddr = gen_const_mem (Pmode, tmp);
41675 }
41676 }
41677 else
41678 {
41679 if (!flag_pic || targetm.binds_local_p (function))
41680 ;
41681 #if TARGET_MACHO
41682 else if (TARGET_MACHO)
41683 {
41684 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41685 fnaddr = XEXP (fnaddr, 0);
41686 }
41687 #endif /* TARGET_MACHO */
41688 else
41689 {
41690 tmp = gen_rtx_REG (Pmode, CX_REG);
41691 output_set_got (tmp, NULL_RTX);
41692
41693 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41694 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41695 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41696 fnaddr = gen_const_mem (Pmode, fnaddr);
41697 }
41698 }
41699
41700 /* Our sibling call patterns do not allow memories, because we have no
41701 predicate that can distinguish between frame and non-frame memory.
41702 For our purposes here, we can get away with (ab)using a jump pattern,
41703 because we're going to do no optimization. */
41704 if (MEM_P (fnaddr))
41705 {
41706 if (sibcall_insn_operand (fnaddr, word_mode))
41707 {
41708 fnaddr = XEXP (DECL_RTL (function), 0);
41709 tmp = gen_rtx_MEM (QImode, fnaddr);
41710 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41711 tmp = emit_call_insn (tmp);
41712 SIBLING_CALL_P (tmp) = 1;
41713 }
41714 else
41715 emit_jump_insn (gen_indirect_jump (fnaddr));
41716 }
41717 else
41718 {
41719 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41720 fnaddr = legitimize_pic_address (fnaddr,
41721 gen_rtx_REG (Pmode, tmp_regno));
41722
41723 if (!sibcall_insn_operand (fnaddr, word_mode))
41724 {
41725 tmp = gen_rtx_REG (word_mode, tmp_regno);
41726 if (GET_MODE (fnaddr) != word_mode)
41727 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41728 emit_move_insn (tmp, fnaddr);
41729 fnaddr = tmp;
41730 }
41731
41732 tmp = gen_rtx_MEM (QImode, fnaddr);
41733 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41734 tmp = emit_call_insn (tmp);
41735 SIBLING_CALL_P (tmp) = 1;
41736 }
41737 emit_barrier ();
41738
41739 /* Emit just enough of rest_of_compilation to get the insns emitted.
41740 Note that use_thunk calls assemble_start_function et al. */
41741 insn = get_insns ();
41742 shorten_branches (insn);
41743 final_start_function (insn, file, 1);
41744 final (insn, file, 1);
41745 final_end_function ();
41746 }
41747
41748 static void
41749 x86_file_start (void)
41750 {
41751 default_file_start ();
41752 if (TARGET_16BIT)
41753 fputs ("\t.code16gcc\n", asm_out_file);
41754 #if TARGET_MACHO
41755 darwin_file_start ();
41756 #endif
41757 if (X86_FILE_START_VERSION_DIRECTIVE)
41758 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41759 if (X86_FILE_START_FLTUSED)
41760 fputs ("\t.global\t__fltused\n", asm_out_file);
41761 if (ix86_asm_dialect == ASM_INTEL)
41762 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41763 }
41764
41765 int
41766 x86_field_alignment (tree field, int computed)
41767 {
41768 enum machine_mode mode;
41769 tree type = TREE_TYPE (field);
41770
41771 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41772 return computed;
41773 mode = TYPE_MODE (strip_array_types (type));
41774 if (mode == DFmode || mode == DCmode
41775 || GET_MODE_CLASS (mode) == MODE_INT
41776 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41777 return MIN (32, computed);
41778 return computed;
41779 }
41780
41781 /* Print call to TARGET to FILE. */
41782
41783 static void
41784 x86_print_call_or_nop (FILE *file, const char *target)
41785 {
41786 if (flag_nop_mcount)
41787 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
41788 else
41789 fprintf (file, "1:\tcall\t%s\n", target);
41790 }
41791
41792 /* Output assembler code to FILE to increment profiler label # LABELNO
41793 for profiling a function entry. */
41794 void
41795 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41796 {
41797 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
41798 : MCOUNT_NAME);
41799 if (TARGET_64BIT)
41800 {
41801 #ifndef NO_PROFILE_COUNTERS
41802 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41803 #endif
41804
41805 if (!TARGET_PECOFF && flag_pic)
41806 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41807 else
41808 x86_print_call_or_nop (file, mcount_name);
41809 }
41810 else if (flag_pic)
41811 {
41812 #ifndef NO_PROFILE_COUNTERS
41813 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41814 LPREFIX, labelno);
41815 #endif
41816 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41817 }
41818 else
41819 {
41820 #ifndef NO_PROFILE_COUNTERS
41821 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
41822 LPREFIX, labelno);
41823 #endif
41824 x86_print_call_or_nop (file, mcount_name);
41825 }
41826
41827 if (flag_record_mcount)
41828 {
41829 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
41830 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
41831 fprintf (file, "\t.previous\n");
41832 }
41833 }
41834
41835 /* We don't have exact information about the insn sizes, but we may assume
41836 quite safely that we are informed about all 1 byte insns and memory
41837 address sizes. This is enough to eliminate unnecessary padding in
41838 99% of cases. */
41839
41840 static int
41841 min_insn_size (rtx_insn *insn)
41842 {
41843 int l = 0, len;
41844
41845 if (!INSN_P (insn) || !active_insn_p (insn))
41846 return 0;
41847
41848 /* Discard alignments we've emit and jump instructions. */
41849 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
41850 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
41851 return 0;
41852
41853 /* Important case - calls are always 5 bytes.
41854 It is common to have many calls in the row. */
41855 if (CALL_P (insn)
41856 && symbolic_reference_mentioned_p (PATTERN (insn))
41857 && !SIBLING_CALL_P (insn))
41858 return 5;
41859 len = get_attr_length (insn);
41860 if (len <= 1)
41861 return 1;
41862
41863 /* For normal instructions we rely on get_attr_length being exact,
41864 with a few exceptions. */
41865 if (!JUMP_P (insn))
41866 {
41867 enum attr_type type = get_attr_type (insn);
41868
41869 switch (type)
41870 {
41871 case TYPE_MULTI:
41872 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
41873 || asm_noperands (PATTERN (insn)) >= 0)
41874 return 0;
41875 break;
41876 case TYPE_OTHER:
41877 case TYPE_FCMP:
41878 break;
41879 default:
41880 /* Otherwise trust get_attr_length. */
41881 return len;
41882 }
41883
41884 l = get_attr_length_address (insn);
41885 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
41886 l = 4;
41887 }
41888 if (l)
41889 return 1+l;
41890 else
41891 return 2;
41892 }
41893
41894 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41895
41896 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
41897 window. */
41898
41899 static void
41900 ix86_avoid_jump_mispredicts (void)
41901 {
41902 rtx_insn *insn, *start = get_insns ();
41903 int nbytes = 0, njumps = 0;
41904 int isjump = 0;
41905
41906 /* Look for all minimal intervals of instructions containing 4 jumps.
41907 The intervals are bounded by START and INSN. NBYTES is the total
41908 size of instructions in the interval including INSN and not including
41909 START. When the NBYTES is smaller than 16 bytes, it is possible
41910 that the end of START and INSN ends up in the same 16byte page.
41911
41912 The smallest offset in the page INSN can start is the case where START
41913 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
41914 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
41915
41916 Don't consider asm goto as jump, while it can contain a jump, it doesn't
41917 have to, control transfer to label(s) can be performed through other
41918 means, and also we estimate minimum length of all asm stmts as 0. */
41919 for (insn = start; insn; insn = NEXT_INSN (insn))
41920 {
41921 int min_size;
41922
41923 if (LABEL_P (insn))
41924 {
41925 int align = label_to_alignment (insn);
41926 int max_skip = label_to_max_skip (insn);
41927
41928 if (max_skip > 15)
41929 max_skip = 15;
41930 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
41931 already in the current 16 byte page, because otherwise
41932 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
41933 bytes to reach 16 byte boundary. */
41934 if (align <= 0
41935 || (align <= 3 && max_skip != (1 << align) - 1))
41936 max_skip = 0;
41937 if (dump_file)
41938 fprintf (dump_file, "Label %i with max_skip %i\n",
41939 INSN_UID (insn), max_skip);
41940 if (max_skip)
41941 {
41942 while (nbytes + max_skip >= 16)
41943 {
41944 start = NEXT_INSN (start);
41945 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41946 || CALL_P (start))
41947 njumps--, isjump = 1;
41948 else
41949 isjump = 0;
41950 nbytes -= min_insn_size (start);
41951 }
41952 }
41953 continue;
41954 }
41955
41956 min_size = min_insn_size (insn);
41957 nbytes += min_size;
41958 if (dump_file)
41959 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
41960 INSN_UID (insn), min_size);
41961 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
41962 || CALL_P (insn))
41963 njumps++;
41964 else
41965 continue;
41966
41967 while (njumps > 3)
41968 {
41969 start = NEXT_INSN (start);
41970 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41971 || CALL_P (start))
41972 njumps--, isjump = 1;
41973 else
41974 isjump = 0;
41975 nbytes -= min_insn_size (start);
41976 }
41977 gcc_assert (njumps >= 0);
41978 if (dump_file)
41979 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
41980 INSN_UID (start), INSN_UID (insn), nbytes);
41981
41982 if (njumps == 3 && isjump && nbytes < 16)
41983 {
41984 int padsize = 15 - nbytes + min_insn_size (insn);
41985
41986 if (dump_file)
41987 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
41988 INSN_UID (insn), padsize);
41989 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
41990 }
41991 }
41992 }
41993 #endif
41994
41995 /* AMD Athlon works faster
41996 when RET is not destination of conditional jump or directly preceded
41997 by other jump instruction. We avoid the penalty by inserting NOP just
41998 before the RET instructions in such cases. */
41999 static void
42000 ix86_pad_returns (void)
42001 {
42002 edge e;
42003 edge_iterator ei;
42004
42005 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42006 {
42007 basic_block bb = e->src;
42008 rtx_insn *ret = BB_END (bb);
42009 rtx_insn *prev;
42010 bool replace = false;
42011
42012 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
42013 || optimize_bb_for_size_p (bb))
42014 continue;
42015 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
42016 if (active_insn_p (prev) || LABEL_P (prev))
42017 break;
42018 if (prev && LABEL_P (prev))
42019 {
42020 edge e;
42021 edge_iterator ei;
42022
42023 FOR_EACH_EDGE (e, ei, bb->preds)
42024 if (EDGE_FREQUENCY (e) && e->src->index >= 0
42025 && !(e->flags & EDGE_FALLTHRU))
42026 {
42027 replace = true;
42028 break;
42029 }
42030 }
42031 if (!replace)
42032 {
42033 prev = prev_active_insn (ret);
42034 if (prev
42035 && ((JUMP_P (prev) && any_condjump_p (prev))
42036 || CALL_P (prev)))
42037 replace = true;
42038 /* Empty functions get branch mispredict even when
42039 the jump destination is not visible to us. */
42040 if (!prev && !optimize_function_for_size_p (cfun))
42041 replace = true;
42042 }
42043 if (replace)
42044 {
42045 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
42046 delete_insn (ret);
42047 }
42048 }
42049 }
42050
42051 /* Count the minimum number of instructions in BB. Return 4 if the
42052 number of instructions >= 4. */
42053
42054 static int
42055 ix86_count_insn_bb (basic_block bb)
42056 {
42057 rtx_insn *insn;
42058 int insn_count = 0;
42059
42060 /* Count number of instructions in this block. Return 4 if the number
42061 of instructions >= 4. */
42062 FOR_BB_INSNS (bb, insn)
42063 {
42064 /* Only happen in exit blocks. */
42065 if (JUMP_P (insn)
42066 && ANY_RETURN_P (PATTERN (insn)))
42067 break;
42068
42069 if (NONDEBUG_INSN_P (insn)
42070 && GET_CODE (PATTERN (insn)) != USE
42071 && GET_CODE (PATTERN (insn)) != CLOBBER)
42072 {
42073 insn_count++;
42074 if (insn_count >= 4)
42075 return insn_count;
42076 }
42077 }
42078
42079 return insn_count;
42080 }
42081
42082
42083 /* Count the minimum number of instructions in code path in BB.
42084 Return 4 if the number of instructions >= 4. */
42085
42086 static int
42087 ix86_count_insn (basic_block bb)
42088 {
42089 edge e;
42090 edge_iterator ei;
42091 int min_prev_count;
42092
42093 /* Only bother counting instructions along paths with no
42094 more than 2 basic blocks between entry and exit. Given
42095 that BB has an edge to exit, determine if a predecessor
42096 of BB has an edge from entry. If so, compute the number
42097 of instructions in the predecessor block. If there
42098 happen to be multiple such blocks, compute the minimum. */
42099 min_prev_count = 4;
42100 FOR_EACH_EDGE (e, ei, bb->preds)
42101 {
42102 edge prev_e;
42103 edge_iterator prev_ei;
42104
42105 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42106 {
42107 min_prev_count = 0;
42108 break;
42109 }
42110 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42111 {
42112 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42113 {
42114 int count = ix86_count_insn_bb (e->src);
42115 if (count < min_prev_count)
42116 min_prev_count = count;
42117 break;
42118 }
42119 }
42120 }
42121
42122 if (min_prev_count < 4)
42123 min_prev_count += ix86_count_insn_bb (bb);
42124
42125 return min_prev_count;
42126 }
42127
42128 /* Pad short function to 4 instructions. */
42129
42130 static void
42131 ix86_pad_short_function (void)
42132 {
42133 edge e;
42134 edge_iterator ei;
42135
42136 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42137 {
42138 rtx_insn *ret = BB_END (e->src);
42139 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42140 {
42141 int insn_count = ix86_count_insn (e->src);
42142
42143 /* Pad short function. */
42144 if (insn_count < 4)
42145 {
42146 rtx_insn *insn = ret;
42147
42148 /* Find epilogue. */
42149 while (insn
42150 && (!NOTE_P (insn)
42151 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42152 insn = PREV_INSN (insn);
42153
42154 if (!insn)
42155 insn = ret;
42156
42157 /* Two NOPs count as one instruction. */
42158 insn_count = 2 * (4 - insn_count);
42159 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42160 }
42161 }
42162 }
42163 }
42164
42165 /* Fix up a Windows system unwinder issue. If an EH region falls through into
42166 the epilogue, the Windows system unwinder will apply epilogue logic and
42167 produce incorrect offsets. This can be avoided by adding a nop between
42168 the last insn that can throw and the first insn of the epilogue. */
42169
42170 static void
42171 ix86_seh_fixup_eh_fallthru (void)
42172 {
42173 edge e;
42174 edge_iterator ei;
42175
42176 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42177 {
42178 rtx_insn *insn, *next;
42179
42180 /* Find the beginning of the epilogue. */
42181 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42182 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42183 break;
42184 if (insn == NULL)
42185 continue;
42186
42187 /* We only care about preceding insns that can throw. */
42188 insn = prev_active_insn (insn);
42189 if (insn == NULL || !can_throw_internal (insn))
42190 continue;
42191
42192 /* Do not separate calls from their debug information. */
42193 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42194 if (NOTE_P (next)
42195 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
42196 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
42197 insn = next;
42198 else
42199 break;
42200
42201 emit_insn_after (gen_nops (const1_rtx), insn);
42202 }
42203 }
42204
42205 /* Implement machine specific optimizations. We implement padding of returns
42206 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
42207 static void
42208 ix86_reorg (void)
42209 {
42210 /* We are freeing block_for_insn in the toplev to keep compatibility
42211 with old MDEP_REORGS that are not CFG based. Recompute it now. */
42212 compute_bb_for_insn ();
42213
42214 if (TARGET_SEH && current_function_has_exception_handlers ())
42215 ix86_seh_fixup_eh_fallthru ();
42216
42217 if (optimize && optimize_function_for_speed_p (cfun))
42218 {
42219 if (TARGET_PAD_SHORT_FUNCTION)
42220 ix86_pad_short_function ();
42221 else if (TARGET_PAD_RETURNS)
42222 ix86_pad_returns ();
42223 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42224 if (TARGET_FOUR_JUMP_LIMIT)
42225 ix86_avoid_jump_mispredicts ();
42226 #endif
42227 }
42228 }
42229
42230 /* Return nonzero when QImode register that must be represented via REX prefix
42231 is used. */
42232 bool
42233 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
42234 {
42235 int i;
42236 extract_insn_cached (insn);
42237 for (i = 0; i < recog_data.n_operands; i++)
42238 if (GENERAL_REG_P (recog_data.operand[i])
42239 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
42240 return true;
42241 return false;
42242 }
42243
42244 /* Return true when INSN mentions register that must be encoded using REX
42245 prefix. */
42246 bool
42247 x86_extended_reg_mentioned_p (rtx insn)
42248 {
42249 subrtx_iterator::array_type array;
42250 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
42251 {
42252 const_rtx x = *iter;
42253 if (REG_P (x)
42254 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
42255 return true;
42256 }
42257 return false;
42258 }
42259
42260 /* If profitable, negate (without causing overflow) integer constant
42261 of mode MODE at location LOC. Return true in this case. */
42262 bool
42263 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
42264 {
42265 HOST_WIDE_INT val;
42266
42267 if (!CONST_INT_P (*loc))
42268 return false;
42269
42270 switch (mode)
42271 {
42272 case DImode:
42273 /* DImode x86_64 constants must fit in 32 bits. */
42274 gcc_assert (x86_64_immediate_operand (*loc, mode));
42275
42276 mode = SImode;
42277 break;
42278
42279 case SImode:
42280 case HImode:
42281 case QImode:
42282 break;
42283
42284 default:
42285 gcc_unreachable ();
42286 }
42287
42288 /* Avoid overflows. */
42289 if (mode_signbit_p (mode, *loc))
42290 return false;
42291
42292 val = INTVAL (*loc);
42293
42294 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42295 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
42296 if ((val < 0 && val != -128)
42297 || val == 128)
42298 {
42299 *loc = GEN_INT (-val);
42300 return true;
42301 }
42302
42303 return false;
42304 }
42305
42306 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
42307 optabs would emit if we didn't have TFmode patterns. */
42308
42309 void
42310 x86_emit_floatuns (rtx operands[2])
42311 {
42312 rtx_code_label *neglab, *donelab;
42313 rtx i0, i1, f0, in, out;
42314 enum machine_mode mode, inmode;
42315
42316 inmode = GET_MODE (operands[1]);
42317 gcc_assert (inmode == SImode || inmode == DImode);
42318
42319 out = operands[0];
42320 in = force_reg (inmode, operands[1]);
42321 mode = GET_MODE (out);
42322 neglab = gen_label_rtx ();
42323 donelab = gen_label_rtx ();
42324 f0 = gen_reg_rtx (mode);
42325
42326 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42327
42328 expand_float (out, in, 0);
42329
42330 emit_jump_insn (gen_jump (donelab));
42331 emit_barrier ();
42332
42333 emit_label (neglab);
42334
42335 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42336 1, OPTAB_DIRECT);
42337 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42338 1, OPTAB_DIRECT);
42339 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42340
42341 expand_float (f0, i0, 0);
42342
42343 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
42344
42345 emit_label (donelab);
42346 }
42347 \f
42348 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42349 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42350 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42351 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
42352
42353 /* Get a vector mode of the same size as the original but with elements
42354 twice as wide. This is only guaranteed to apply to integral vectors. */
42355
42356 static inline enum machine_mode
42357 get_mode_wider_vector (enum machine_mode o)
42358 {
42359 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
42360 enum machine_mode n = GET_MODE_WIDER_MODE (o);
42361 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42362 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42363 return n;
42364 }
42365
42366 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
42367 fill target with val via vec_duplicate. */
42368
42369 static bool
42370 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
42371 {
42372 bool ok;
42373 rtx_insn *insn;
42374 rtx dup;
42375
42376 /* First attempt to recognize VAL as-is. */
42377 dup = gen_rtx_VEC_DUPLICATE (mode, val);
42378 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
42379 if (recog_memoized (insn) < 0)
42380 {
42381 rtx_insn *seq;
42382 /* If that fails, force VAL into a register. */
42383
42384 start_sequence ();
42385 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
42386 seq = get_insns ();
42387 end_sequence ();
42388 if (seq)
42389 emit_insn_before (seq, insn);
42390
42391 ok = recog_memoized (insn) >= 0;
42392 gcc_assert (ok);
42393 }
42394 return true;
42395 }
42396
42397 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42398 with all elements equal to VAR. Return true if successful. */
42399
42400 static bool
42401 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
42402 rtx target, rtx val)
42403 {
42404 bool ok;
42405
42406 switch (mode)
42407 {
42408 case V2SImode:
42409 case V2SFmode:
42410 if (!mmx_ok)
42411 return false;
42412 /* FALLTHRU */
42413
42414 case V4DFmode:
42415 case V4DImode:
42416 case V8SFmode:
42417 case V8SImode:
42418 case V2DFmode:
42419 case V2DImode:
42420 case V4SFmode:
42421 case V4SImode:
42422 case V16SImode:
42423 case V8DImode:
42424 case V16SFmode:
42425 case V8DFmode:
42426 return ix86_vector_duplicate_value (mode, target, val);
42427
42428 case V4HImode:
42429 if (!mmx_ok)
42430 return false;
42431 if (TARGET_SSE || TARGET_3DNOW_A)
42432 {
42433 rtx x;
42434
42435 val = gen_lowpart (SImode, val);
42436 x = gen_rtx_TRUNCATE (HImode, val);
42437 x = gen_rtx_VEC_DUPLICATE (mode, x);
42438 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42439 return true;
42440 }
42441 goto widen;
42442
42443 case V8QImode:
42444 if (!mmx_ok)
42445 return false;
42446 goto widen;
42447
42448 case V8HImode:
42449 if (TARGET_AVX2)
42450 return ix86_vector_duplicate_value (mode, target, val);
42451
42452 if (TARGET_SSE2)
42453 {
42454 struct expand_vec_perm_d dperm;
42455 rtx tmp1, tmp2;
42456
42457 permute:
42458 memset (&dperm, 0, sizeof (dperm));
42459 dperm.target = target;
42460 dperm.vmode = mode;
42461 dperm.nelt = GET_MODE_NUNITS (mode);
42462 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42463 dperm.one_operand_p = true;
42464
42465 /* Extend to SImode using a paradoxical SUBREG. */
42466 tmp1 = gen_reg_rtx (SImode);
42467 emit_move_insn (tmp1, gen_lowpart (SImode, val));
42468
42469 /* Insert the SImode value as low element of a V4SImode vector. */
42470 tmp2 = gen_reg_rtx (V4SImode);
42471 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42472 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42473
42474 ok = (expand_vec_perm_1 (&dperm)
42475 || expand_vec_perm_broadcast_1 (&dperm));
42476 gcc_assert (ok);
42477 return ok;
42478 }
42479 goto widen;
42480
42481 case V16QImode:
42482 if (TARGET_AVX2)
42483 return ix86_vector_duplicate_value (mode, target, val);
42484
42485 if (TARGET_SSE2)
42486 goto permute;
42487 goto widen;
42488
42489 widen:
42490 /* Replicate the value once into the next wider mode and recurse. */
42491 {
42492 enum machine_mode smode, wsmode, wvmode;
42493 rtx x;
42494
42495 smode = GET_MODE_INNER (mode);
42496 wvmode = get_mode_wider_vector (mode);
42497 wsmode = GET_MODE_INNER (wvmode);
42498
42499 val = convert_modes (wsmode, smode, val, true);
42500 x = expand_simple_binop (wsmode, ASHIFT, val,
42501 GEN_INT (GET_MODE_BITSIZE (smode)),
42502 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42503 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42504
42505 x = gen_reg_rtx (wvmode);
42506 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42507 gcc_assert (ok);
42508 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42509 return ok;
42510 }
42511
42512 case V16HImode:
42513 case V32QImode:
42514 if (TARGET_AVX2)
42515 return ix86_vector_duplicate_value (mode, target, val);
42516 else
42517 {
42518 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42519 rtx x = gen_reg_rtx (hvmode);
42520
42521 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42522 gcc_assert (ok);
42523
42524 x = gen_rtx_VEC_CONCAT (mode, x, x);
42525 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42526 }
42527 return true;
42528
42529 case V64QImode:
42530 case V32HImode:
42531 if (TARGET_AVX512BW)
42532 return ix86_vector_duplicate_value (mode, target, val);
42533 else
42534 {
42535 enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
42536 rtx x = gen_reg_rtx (hvmode);
42537
42538 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42539 gcc_assert (ok);
42540
42541 x = gen_rtx_VEC_CONCAT (mode, x, x);
42542 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42543 }
42544 return true;
42545
42546 default:
42547 return false;
42548 }
42549 }
42550
42551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42552 whose ONE_VAR element is VAR, and other elements are zero. Return true
42553 if successful. */
42554
42555 static bool
42556 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
42557 rtx target, rtx var, int one_var)
42558 {
42559 enum machine_mode vsimode;
42560 rtx new_target;
42561 rtx x, tmp;
42562 bool use_vector_set = false;
42563
42564 switch (mode)
42565 {
42566 case V2DImode:
42567 /* For SSE4.1, we normally use vector set. But if the second
42568 element is zero and inter-unit moves are OK, we use movq
42569 instead. */
42570 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42571 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
42572 && one_var == 0));
42573 break;
42574 case V16QImode:
42575 case V4SImode:
42576 case V4SFmode:
42577 use_vector_set = TARGET_SSE4_1;
42578 break;
42579 case V8HImode:
42580 use_vector_set = TARGET_SSE2;
42581 break;
42582 case V4HImode:
42583 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42584 break;
42585 case V32QImode:
42586 case V16HImode:
42587 case V8SImode:
42588 case V8SFmode:
42589 case V4DFmode:
42590 use_vector_set = TARGET_AVX;
42591 break;
42592 case V4DImode:
42593 /* Use ix86_expand_vector_set in 64bit mode only. */
42594 use_vector_set = TARGET_AVX && TARGET_64BIT;
42595 break;
42596 default:
42597 break;
42598 }
42599
42600 if (use_vector_set)
42601 {
42602 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
42603 var = force_reg (GET_MODE_INNER (mode), var);
42604 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42605 return true;
42606 }
42607
42608 switch (mode)
42609 {
42610 case V2SFmode:
42611 case V2SImode:
42612 if (!mmx_ok)
42613 return false;
42614 /* FALLTHRU */
42615
42616 case V2DFmode:
42617 case V2DImode:
42618 if (one_var != 0)
42619 return false;
42620 var = force_reg (GET_MODE_INNER (mode), var);
42621 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
42622 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42623 return true;
42624
42625 case V4SFmode:
42626 case V4SImode:
42627 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
42628 new_target = gen_reg_rtx (mode);
42629 else
42630 new_target = target;
42631 var = force_reg (GET_MODE_INNER (mode), var);
42632 x = gen_rtx_VEC_DUPLICATE (mode, var);
42633 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
42634 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
42635 if (one_var != 0)
42636 {
42637 /* We need to shuffle the value to the correct position, so
42638 create a new pseudo to store the intermediate result. */
42639
42640 /* With SSE2, we can use the integer shuffle insns. */
42641 if (mode != V4SFmode && TARGET_SSE2)
42642 {
42643 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
42644 const1_rtx,
42645 GEN_INT (one_var == 1 ? 0 : 1),
42646 GEN_INT (one_var == 2 ? 0 : 1),
42647 GEN_INT (one_var == 3 ? 0 : 1)));
42648 if (target != new_target)
42649 emit_move_insn (target, new_target);
42650 return true;
42651 }
42652
42653 /* Otherwise convert the intermediate result to V4SFmode and
42654 use the SSE1 shuffle instructions. */
42655 if (mode != V4SFmode)
42656 {
42657 tmp = gen_reg_rtx (V4SFmode);
42658 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
42659 }
42660 else
42661 tmp = new_target;
42662
42663 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
42664 const1_rtx,
42665 GEN_INT (one_var == 1 ? 0 : 1),
42666 GEN_INT (one_var == 2 ? 0+4 : 1+4),
42667 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
42668
42669 if (mode != V4SFmode)
42670 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
42671 else if (tmp != target)
42672 emit_move_insn (target, tmp);
42673 }
42674 else if (target != new_target)
42675 emit_move_insn (target, new_target);
42676 return true;
42677
42678 case V8HImode:
42679 case V16QImode:
42680 vsimode = V4SImode;
42681 goto widen;
42682 case V4HImode:
42683 case V8QImode:
42684 if (!mmx_ok)
42685 return false;
42686 vsimode = V2SImode;
42687 goto widen;
42688 widen:
42689 if (one_var != 0)
42690 return false;
42691
42692 /* Zero extend the variable element to SImode and recurse. */
42693 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
42694
42695 x = gen_reg_rtx (vsimode);
42696 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
42697 var, one_var))
42698 gcc_unreachable ();
42699
42700 emit_move_insn (target, gen_lowpart (mode, x));
42701 return true;
42702
42703 default:
42704 return false;
42705 }
42706 }
42707
42708 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42709 consisting of the values in VALS. It is known that all elements
42710 except ONE_VAR are constants. Return true if successful. */
42711
42712 static bool
42713 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
42714 rtx target, rtx vals, int one_var)
42715 {
42716 rtx var = XVECEXP (vals, 0, one_var);
42717 enum machine_mode wmode;
42718 rtx const_vec, x;
42719
42720 const_vec = copy_rtx (vals);
42721 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
42722 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
42723
42724 switch (mode)
42725 {
42726 case V2DFmode:
42727 case V2DImode:
42728 case V2SFmode:
42729 case V2SImode:
42730 /* For the two element vectors, it's just as easy to use
42731 the general case. */
42732 return false;
42733
42734 case V4DImode:
42735 /* Use ix86_expand_vector_set in 64bit mode only. */
42736 if (!TARGET_64BIT)
42737 return false;
42738 case V4DFmode:
42739 case V8SFmode:
42740 case V8SImode:
42741 case V16HImode:
42742 case V32QImode:
42743 case V4SFmode:
42744 case V4SImode:
42745 case V8HImode:
42746 case V4HImode:
42747 break;
42748
42749 case V16QImode:
42750 if (TARGET_SSE4_1)
42751 break;
42752 wmode = V8HImode;
42753 goto widen;
42754 case V8QImode:
42755 wmode = V4HImode;
42756 goto widen;
42757 widen:
42758 /* There's no way to set one QImode entry easily. Combine
42759 the variable value with its adjacent constant value, and
42760 promote to an HImode set. */
42761 x = XVECEXP (vals, 0, one_var ^ 1);
42762 if (one_var & 1)
42763 {
42764 var = convert_modes (HImode, QImode, var, true);
42765 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
42766 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42767 x = GEN_INT (INTVAL (x) & 0xff);
42768 }
42769 else
42770 {
42771 var = convert_modes (HImode, QImode, var, true);
42772 x = gen_int_mode (INTVAL (x) << 8, HImode);
42773 }
42774 if (x != const0_rtx)
42775 var = expand_simple_binop (HImode, IOR, var, x, var,
42776 1, OPTAB_LIB_WIDEN);
42777
42778 x = gen_reg_rtx (wmode);
42779 emit_move_insn (x, gen_lowpart (wmode, const_vec));
42780 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
42781
42782 emit_move_insn (target, gen_lowpart (mode, x));
42783 return true;
42784
42785 default:
42786 return false;
42787 }
42788
42789 emit_move_insn (target, const_vec);
42790 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42791 return true;
42792 }
42793
42794 /* A subroutine of ix86_expand_vector_init_general. Use vector
42795 concatenate to handle the most general case: all values variable,
42796 and none identical. */
42797
42798 static void
42799 ix86_expand_vector_init_concat (enum machine_mode mode,
42800 rtx target, rtx *ops, int n)
42801 {
42802 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
42803 rtx first[16], second[8], third[4];
42804 rtvec v;
42805 int i, j;
42806
42807 switch (n)
42808 {
42809 case 2:
42810 switch (mode)
42811 {
42812 case V16SImode:
42813 cmode = V8SImode;
42814 break;
42815 case V16SFmode:
42816 cmode = V8SFmode;
42817 break;
42818 case V8DImode:
42819 cmode = V4DImode;
42820 break;
42821 case V8DFmode:
42822 cmode = V4DFmode;
42823 break;
42824 case V8SImode:
42825 cmode = V4SImode;
42826 break;
42827 case V8SFmode:
42828 cmode = V4SFmode;
42829 break;
42830 case V4DImode:
42831 cmode = V2DImode;
42832 break;
42833 case V4DFmode:
42834 cmode = V2DFmode;
42835 break;
42836 case V4SImode:
42837 cmode = V2SImode;
42838 break;
42839 case V4SFmode:
42840 cmode = V2SFmode;
42841 break;
42842 case V2DImode:
42843 cmode = DImode;
42844 break;
42845 case V2SImode:
42846 cmode = SImode;
42847 break;
42848 case V2DFmode:
42849 cmode = DFmode;
42850 break;
42851 case V2SFmode:
42852 cmode = SFmode;
42853 break;
42854 default:
42855 gcc_unreachable ();
42856 }
42857
42858 if (!register_operand (ops[1], cmode))
42859 ops[1] = force_reg (cmode, ops[1]);
42860 if (!register_operand (ops[0], cmode))
42861 ops[0] = force_reg (cmode, ops[0]);
42862 emit_insn (gen_rtx_SET (VOIDmode, target,
42863 gen_rtx_VEC_CONCAT (mode, ops[0],
42864 ops[1])));
42865 break;
42866
42867 case 4:
42868 switch (mode)
42869 {
42870 case V4DImode:
42871 cmode = V2DImode;
42872 break;
42873 case V4DFmode:
42874 cmode = V2DFmode;
42875 break;
42876 case V4SImode:
42877 cmode = V2SImode;
42878 break;
42879 case V4SFmode:
42880 cmode = V2SFmode;
42881 break;
42882 default:
42883 gcc_unreachable ();
42884 }
42885 goto half;
42886
42887 case 8:
42888 switch (mode)
42889 {
42890 case V8DImode:
42891 cmode = V2DImode;
42892 hmode = V4DImode;
42893 break;
42894 case V8DFmode:
42895 cmode = V2DFmode;
42896 hmode = V4DFmode;
42897 break;
42898 case V8SImode:
42899 cmode = V2SImode;
42900 hmode = V4SImode;
42901 break;
42902 case V8SFmode:
42903 cmode = V2SFmode;
42904 hmode = V4SFmode;
42905 break;
42906 default:
42907 gcc_unreachable ();
42908 }
42909 goto half;
42910
42911 case 16:
42912 switch (mode)
42913 {
42914 case V16SImode:
42915 cmode = V2SImode;
42916 hmode = V4SImode;
42917 gmode = V8SImode;
42918 break;
42919 case V16SFmode:
42920 cmode = V2SFmode;
42921 hmode = V4SFmode;
42922 gmode = V8SFmode;
42923 break;
42924 default:
42925 gcc_unreachable ();
42926 }
42927 goto half;
42928
42929 half:
42930 /* FIXME: We process inputs backward to help RA. PR 36222. */
42931 i = n - 1;
42932 j = (n >> 1) - 1;
42933 for (; i > 0; i -= 2, j--)
42934 {
42935 first[j] = gen_reg_rtx (cmode);
42936 v = gen_rtvec (2, ops[i - 1], ops[i]);
42937 ix86_expand_vector_init (false, first[j],
42938 gen_rtx_PARALLEL (cmode, v));
42939 }
42940
42941 n >>= 1;
42942 if (n > 4)
42943 {
42944 gcc_assert (hmode != VOIDmode);
42945 gcc_assert (gmode != VOIDmode);
42946 for (i = j = 0; i < n; i += 2, j++)
42947 {
42948 second[j] = gen_reg_rtx (hmode);
42949 ix86_expand_vector_init_concat (hmode, second [j],
42950 &first [i], 2);
42951 }
42952 n >>= 1;
42953 for (i = j = 0; i < n; i += 2, j++)
42954 {
42955 third[j] = gen_reg_rtx (gmode);
42956 ix86_expand_vector_init_concat (gmode, third[j],
42957 &second[i], 2);
42958 }
42959 n >>= 1;
42960 ix86_expand_vector_init_concat (mode, target, third, n);
42961 }
42962 else if (n > 2)
42963 {
42964 gcc_assert (hmode != VOIDmode);
42965 for (i = j = 0; i < n; i += 2, j++)
42966 {
42967 second[j] = gen_reg_rtx (hmode);
42968 ix86_expand_vector_init_concat (hmode, second [j],
42969 &first [i], 2);
42970 }
42971 n >>= 1;
42972 ix86_expand_vector_init_concat (mode, target, second, n);
42973 }
42974 else
42975 ix86_expand_vector_init_concat (mode, target, first, n);
42976 break;
42977
42978 default:
42979 gcc_unreachable ();
42980 }
42981 }
42982
42983 /* A subroutine of ix86_expand_vector_init_general. Use vector
42984 interleave to handle the most general case: all values variable,
42985 and none identical. */
42986
42987 static void
42988 ix86_expand_vector_init_interleave (enum machine_mode mode,
42989 rtx target, rtx *ops, int n)
42990 {
42991 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
42992 int i, j;
42993 rtx op0, op1;
42994 rtx (*gen_load_even) (rtx, rtx, rtx);
42995 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
42996 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
42997
42998 switch (mode)
42999 {
43000 case V8HImode:
43001 gen_load_even = gen_vec_setv8hi;
43002 gen_interleave_first_low = gen_vec_interleave_lowv4si;
43003 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43004 inner_mode = HImode;
43005 first_imode = V4SImode;
43006 second_imode = V2DImode;
43007 third_imode = VOIDmode;
43008 break;
43009 case V16QImode:
43010 gen_load_even = gen_vec_setv16qi;
43011 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
43012 gen_interleave_second_low = gen_vec_interleave_lowv4si;
43013 inner_mode = QImode;
43014 first_imode = V8HImode;
43015 second_imode = V4SImode;
43016 third_imode = V2DImode;
43017 break;
43018 default:
43019 gcc_unreachable ();
43020 }
43021
43022 for (i = 0; i < n; i++)
43023 {
43024 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
43025 op0 = gen_reg_rtx (SImode);
43026 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
43027
43028 /* Insert the SImode value as low element of V4SImode vector. */
43029 op1 = gen_reg_rtx (V4SImode);
43030 op0 = gen_rtx_VEC_MERGE (V4SImode,
43031 gen_rtx_VEC_DUPLICATE (V4SImode,
43032 op0),
43033 CONST0_RTX (V4SImode),
43034 const1_rtx);
43035 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
43036
43037 /* Cast the V4SImode vector back to a vector in orignal mode. */
43038 op0 = gen_reg_rtx (mode);
43039 emit_move_insn (op0, gen_lowpart (mode, op1));
43040
43041 /* Load even elements into the second position. */
43042 emit_insn (gen_load_even (op0,
43043 force_reg (inner_mode,
43044 ops [i + i + 1]),
43045 const1_rtx));
43046
43047 /* Cast vector to FIRST_IMODE vector. */
43048 ops[i] = gen_reg_rtx (first_imode);
43049 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
43050 }
43051
43052 /* Interleave low FIRST_IMODE vectors. */
43053 for (i = j = 0; i < n; i += 2, j++)
43054 {
43055 op0 = gen_reg_rtx (first_imode);
43056 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43057
43058 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
43059 ops[j] = gen_reg_rtx (second_imode);
43060 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43061 }
43062
43063 /* Interleave low SECOND_IMODE vectors. */
43064 switch (second_imode)
43065 {
43066 case V4SImode:
43067 for (i = j = 0; i < n / 2; i += 2, j++)
43068 {
43069 op0 = gen_reg_rtx (second_imode);
43070 emit_insn (gen_interleave_second_low (op0, ops[i],
43071 ops[i + 1]));
43072
43073 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43074 vector. */
43075 ops[j] = gen_reg_rtx (third_imode);
43076 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43077 }
43078 second_imode = V2DImode;
43079 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43080 /* FALLTHRU */
43081
43082 case V2DImode:
43083 op0 = gen_reg_rtx (second_imode);
43084 emit_insn (gen_interleave_second_low (op0, ops[0],
43085 ops[1]));
43086
43087 /* Cast the SECOND_IMODE vector back to a vector on original
43088 mode. */
43089 emit_insn (gen_rtx_SET (VOIDmode, target,
43090 gen_lowpart (mode, op0)));
43091 break;
43092
43093 default:
43094 gcc_unreachable ();
43095 }
43096 }
43097
43098 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
43099 all values variable, and none identical. */
43100
43101 static void
43102 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
43103 rtx target, rtx vals)
43104 {
43105 rtx ops[64], op0, op1, op2, op3, op4, op5;
43106 enum machine_mode half_mode = VOIDmode;
43107 enum machine_mode quarter_mode = VOIDmode;
43108 int n, i;
43109
43110 switch (mode)
43111 {
43112 case V2SFmode:
43113 case V2SImode:
43114 if (!mmx_ok && !TARGET_SSE)
43115 break;
43116 /* FALLTHRU */
43117
43118 case V16SImode:
43119 case V16SFmode:
43120 case V8DFmode:
43121 case V8DImode:
43122 case V8SFmode:
43123 case V8SImode:
43124 case V4DFmode:
43125 case V4DImode:
43126 case V4SFmode:
43127 case V4SImode:
43128 case V2DFmode:
43129 case V2DImode:
43130 n = GET_MODE_NUNITS (mode);
43131 for (i = 0; i < n; i++)
43132 ops[i] = XVECEXP (vals, 0, i);
43133 ix86_expand_vector_init_concat (mode, target, ops, n);
43134 return;
43135
43136 case V32QImode:
43137 half_mode = V16QImode;
43138 goto half;
43139
43140 case V16HImode:
43141 half_mode = V8HImode;
43142 goto half;
43143
43144 half:
43145 n = GET_MODE_NUNITS (mode);
43146 for (i = 0; i < n; i++)
43147 ops[i] = XVECEXP (vals, 0, i);
43148 op0 = gen_reg_rtx (half_mode);
43149 op1 = gen_reg_rtx (half_mode);
43150 ix86_expand_vector_init_interleave (half_mode, op0, ops,
43151 n >> 2);
43152 ix86_expand_vector_init_interleave (half_mode, op1,
43153 &ops [n >> 1], n >> 2);
43154 emit_insn (gen_rtx_SET (VOIDmode, target,
43155 gen_rtx_VEC_CONCAT (mode, op0, op1)));
43156 return;
43157
43158 case V64QImode:
43159 quarter_mode = V16QImode;
43160 half_mode = V32QImode;
43161 goto quarter;
43162
43163 case V32HImode:
43164 quarter_mode = V8HImode;
43165 half_mode = V16HImode;
43166 goto quarter;
43167
43168 quarter:
43169 n = GET_MODE_NUNITS (mode);
43170 for (i = 0; i < n; i++)
43171 ops[i] = XVECEXP (vals, 0, i);
43172 op0 = gen_reg_rtx (quarter_mode);
43173 op1 = gen_reg_rtx (quarter_mode);
43174 op2 = gen_reg_rtx (quarter_mode);
43175 op3 = gen_reg_rtx (quarter_mode);
43176 op4 = gen_reg_rtx (half_mode);
43177 op5 = gen_reg_rtx (half_mode);
43178 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43179 n >> 3);
43180 ix86_expand_vector_init_interleave (quarter_mode, op1,
43181 &ops [n >> 2], n >> 3);
43182 ix86_expand_vector_init_interleave (quarter_mode, op2,
43183 &ops [n >> 1], n >> 3);
43184 ix86_expand_vector_init_interleave (quarter_mode, op3,
43185 &ops [(n >> 1) | (n >> 2)], n >> 3);
43186 emit_insn (gen_rtx_SET (VOIDmode, op4,
43187 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43188 emit_insn (gen_rtx_SET (VOIDmode, op5,
43189 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43190 emit_insn (gen_rtx_SET (VOIDmode, target,
43191 gen_rtx_VEC_CONCAT (mode, op4, op5)));
43192 return;
43193
43194 case V16QImode:
43195 if (!TARGET_SSE4_1)
43196 break;
43197 /* FALLTHRU */
43198
43199 case V8HImode:
43200 if (!TARGET_SSE2)
43201 break;
43202
43203 /* Don't use ix86_expand_vector_init_interleave if we can't
43204 move from GPR to SSE register directly. */
43205 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43206 break;
43207
43208 n = GET_MODE_NUNITS (mode);
43209 for (i = 0; i < n; i++)
43210 ops[i] = XVECEXP (vals, 0, i);
43211 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43212 return;
43213
43214 case V4HImode:
43215 case V8QImode:
43216 break;
43217
43218 default:
43219 gcc_unreachable ();
43220 }
43221
43222 {
43223 int i, j, n_elts, n_words, n_elt_per_word;
43224 enum machine_mode inner_mode;
43225 rtx words[4], shift;
43226
43227 inner_mode = GET_MODE_INNER (mode);
43228 n_elts = GET_MODE_NUNITS (mode);
43229 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43230 n_elt_per_word = n_elts / n_words;
43231 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43232
43233 for (i = 0; i < n_words; ++i)
43234 {
43235 rtx word = NULL_RTX;
43236
43237 for (j = 0; j < n_elt_per_word; ++j)
43238 {
43239 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43240 elt = convert_modes (word_mode, inner_mode, elt, true);
43241
43242 if (j == 0)
43243 word = elt;
43244 else
43245 {
43246 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43247 word, 1, OPTAB_LIB_WIDEN);
43248 word = expand_simple_binop (word_mode, IOR, word, elt,
43249 word, 1, OPTAB_LIB_WIDEN);
43250 }
43251 }
43252
43253 words[i] = word;
43254 }
43255
43256 if (n_words == 1)
43257 emit_move_insn (target, gen_lowpart (mode, words[0]));
43258 else if (n_words == 2)
43259 {
43260 rtx tmp = gen_reg_rtx (mode);
43261 emit_clobber (tmp);
43262 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43263 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43264 emit_move_insn (target, tmp);
43265 }
43266 else if (n_words == 4)
43267 {
43268 rtx tmp = gen_reg_rtx (V4SImode);
43269 gcc_assert (word_mode == SImode);
43270 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43271 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43272 emit_move_insn (target, gen_lowpart (mode, tmp));
43273 }
43274 else
43275 gcc_unreachable ();
43276 }
43277 }
43278
43279 /* Initialize vector TARGET via VALS. Suppress the use of MMX
43280 instructions unless MMX_OK is true. */
43281
43282 void
43283 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43284 {
43285 enum machine_mode mode = GET_MODE (target);
43286 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43287 int n_elts = GET_MODE_NUNITS (mode);
43288 int n_var = 0, one_var = -1;
43289 bool all_same = true, all_const_zero = true;
43290 int i;
43291 rtx x;
43292
43293 for (i = 0; i < n_elts; ++i)
43294 {
43295 x = XVECEXP (vals, 0, i);
43296 if (!(CONST_INT_P (x)
43297 || GET_CODE (x) == CONST_DOUBLE
43298 || GET_CODE (x) == CONST_FIXED))
43299 n_var++, one_var = i;
43300 else if (x != CONST0_RTX (inner_mode))
43301 all_const_zero = false;
43302 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43303 all_same = false;
43304 }
43305
43306 /* Constants are best loaded from the constant pool. */
43307 if (n_var == 0)
43308 {
43309 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43310 return;
43311 }
43312
43313 /* If all values are identical, broadcast the value. */
43314 if (all_same
43315 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43316 XVECEXP (vals, 0, 0)))
43317 return;
43318
43319 /* Values where only one field is non-constant are best loaded from
43320 the pool and overwritten via move later. */
43321 if (n_var == 1)
43322 {
43323 if (all_const_zero
43324 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43325 XVECEXP (vals, 0, one_var),
43326 one_var))
43327 return;
43328
43329 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43330 return;
43331 }
43332
43333 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43334 }
43335
43336 void
43337 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43338 {
43339 enum machine_mode mode = GET_MODE (target);
43340 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43341 enum machine_mode half_mode;
43342 bool use_vec_merge = false;
43343 rtx tmp;
43344 static rtx (*gen_extract[6][2]) (rtx, rtx)
43345 = {
43346 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43347 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43348 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43349 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43350 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43351 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43352 };
43353 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43354 = {
43355 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43356 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43357 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43358 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43359 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43360 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43361 };
43362 int i, j, n;
43363
43364 switch (mode)
43365 {
43366 case V2SFmode:
43367 case V2SImode:
43368 if (mmx_ok)
43369 {
43370 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43371 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43372 if (elt == 0)
43373 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43374 else
43375 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43376 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43377 return;
43378 }
43379 break;
43380
43381 case V2DImode:
43382 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43383 if (use_vec_merge)
43384 break;
43385
43386 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43387 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43388 if (elt == 0)
43389 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43390 else
43391 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43392 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43393 return;
43394
43395 case V2DFmode:
43396 {
43397 rtx op0, op1;
43398
43399 /* For the two element vectors, we implement a VEC_CONCAT with
43400 the extraction of the other element. */
43401
43402 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43403 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43404
43405 if (elt == 0)
43406 op0 = val, op1 = tmp;
43407 else
43408 op0 = tmp, op1 = val;
43409
43410 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43411 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43412 }
43413 return;
43414
43415 case V4SFmode:
43416 use_vec_merge = TARGET_SSE4_1;
43417 if (use_vec_merge)
43418 break;
43419
43420 switch (elt)
43421 {
43422 case 0:
43423 use_vec_merge = true;
43424 break;
43425
43426 case 1:
43427 /* tmp = target = A B C D */
43428 tmp = copy_to_reg (target);
43429 /* target = A A B B */
43430 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43431 /* target = X A B B */
43432 ix86_expand_vector_set (false, target, val, 0);
43433 /* target = A X C D */
43434 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43435 const1_rtx, const0_rtx,
43436 GEN_INT (2+4), GEN_INT (3+4)));
43437 return;
43438
43439 case 2:
43440 /* tmp = target = A B C D */
43441 tmp = copy_to_reg (target);
43442 /* tmp = X B C D */
43443 ix86_expand_vector_set (false, tmp, val, 0);
43444 /* target = A B X D */
43445 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43446 const0_rtx, const1_rtx,
43447 GEN_INT (0+4), GEN_INT (3+4)));
43448 return;
43449
43450 case 3:
43451 /* tmp = target = A B C D */
43452 tmp = copy_to_reg (target);
43453 /* tmp = X B C D */
43454 ix86_expand_vector_set (false, tmp, val, 0);
43455 /* target = A B X D */
43456 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43457 const0_rtx, const1_rtx,
43458 GEN_INT (2+4), GEN_INT (0+4)));
43459 return;
43460
43461 default:
43462 gcc_unreachable ();
43463 }
43464 break;
43465
43466 case V4SImode:
43467 use_vec_merge = TARGET_SSE4_1;
43468 if (use_vec_merge)
43469 break;
43470
43471 /* Element 0 handled by vec_merge below. */
43472 if (elt == 0)
43473 {
43474 use_vec_merge = true;
43475 break;
43476 }
43477
43478 if (TARGET_SSE2)
43479 {
43480 /* With SSE2, use integer shuffles to swap element 0 and ELT,
43481 store into element 0, then shuffle them back. */
43482
43483 rtx order[4];
43484
43485 order[0] = GEN_INT (elt);
43486 order[1] = const1_rtx;
43487 order[2] = const2_rtx;
43488 order[3] = GEN_INT (3);
43489 order[elt] = const0_rtx;
43490
43491 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43492 order[1], order[2], order[3]));
43493
43494 ix86_expand_vector_set (false, target, val, 0);
43495
43496 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43497 order[1], order[2], order[3]));
43498 }
43499 else
43500 {
43501 /* For SSE1, we have to reuse the V4SF code. */
43502 rtx t = gen_reg_rtx (V4SFmode);
43503 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43504 emit_move_insn (target, gen_lowpart (mode, t));
43505 }
43506 return;
43507
43508 case V8HImode:
43509 use_vec_merge = TARGET_SSE2;
43510 break;
43511 case V4HImode:
43512 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43513 break;
43514
43515 case V16QImode:
43516 use_vec_merge = TARGET_SSE4_1;
43517 break;
43518
43519 case V8QImode:
43520 break;
43521
43522 case V32QImode:
43523 half_mode = V16QImode;
43524 j = 0;
43525 n = 16;
43526 goto half;
43527
43528 case V16HImode:
43529 half_mode = V8HImode;
43530 j = 1;
43531 n = 8;
43532 goto half;
43533
43534 case V8SImode:
43535 half_mode = V4SImode;
43536 j = 2;
43537 n = 4;
43538 goto half;
43539
43540 case V4DImode:
43541 half_mode = V2DImode;
43542 j = 3;
43543 n = 2;
43544 goto half;
43545
43546 case V8SFmode:
43547 half_mode = V4SFmode;
43548 j = 4;
43549 n = 4;
43550 goto half;
43551
43552 case V4DFmode:
43553 half_mode = V2DFmode;
43554 j = 5;
43555 n = 2;
43556 goto half;
43557
43558 half:
43559 /* Compute offset. */
43560 i = elt / n;
43561 elt %= n;
43562
43563 gcc_assert (i <= 1);
43564
43565 /* Extract the half. */
43566 tmp = gen_reg_rtx (half_mode);
43567 emit_insn (gen_extract[j][i] (tmp, target));
43568
43569 /* Put val in tmp at elt. */
43570 ix86_expand_vector_set (false, tmp, val, elt);
43571
43572 /* Put it back. */
43573 emit_insn (gen_insert[j][i] (target, target, tmp));
43574 return;
43575
43576 case V8DFmode:
43577 if (TARGET_AVX512F)
43578 {
43579 tmp = gen_reg_rtx (mode);
43580 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43581 gen_rtx_VEC_DUPLICATE (mode, val)));
43582 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
43583 force_reg (QImode, GEN_INT (1 << elt))));
43584 return;
43585 }
43586 else
43587 break;
43588 case V8DImode:
43589 if (TARGET_AVX512F)
43590 {
43591 tmp = gen_reg_rtx (mode);
43592 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43593 gen_rtx_VEC_DUPLICATE (mode, val)));
43594 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
43595 force_reg (QImode, GEN_INT (1 << elt))));
43596 return;
43597 }
43598 else
43599 break;
43600 case V16SFmode:
43601 if (TARGET_AVX512F)
43602 {
43603 tmp = gen_reg_rtx (mode);
43604 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43605 gen_rtx_VEC_DUPLICATE (mode, val)));
43606 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
43607 force_reg (HImode, GEN_INT (1 << elt))));
43608 return;
43609 }
43610 else
43611 break;
43612 case V16SImode:
43613 if (TARGET_AVX512F)
43614 {
43615 tmp = gen_reg_rtx (mode);
43616 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43617 gen_rtx_VEC_DUPLICATE (mode, val)));
43618 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
43619 force_reg (HImode, GEN_INT (1 << elt))));
43620 return;
43621 }
43622 else
43623 break;
43624 case V32HImode:
43625 if (TARGET_AVX512F && TARGET_AVX512BW)
43626 {
43627 tmp = gen_reg_rtx (mode);
43628 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43629 gen_rtx_VEC_DUPLICATE (mode, val)));
43630 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
43631 force_reg (SImode, GEN_INT (1 << elt))));
43632 return;
43633 }
43634 else
43635 break;
43636 case V64QImode:
43637 if (TARGET_AVX512F && TARGET_AVX512BW)
43638 {
43639 tmp = gen_reg_rtx (mode);
43640 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43641 gen_rtx_VEC_DUPLICATE (mode, val)));
43642 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
43643 force_reg (DImode, GEN_INT (1 << elt))));
43644 return;
43645 }
43646 else
43647 break;
43648
43649 default:
43650 break;
43651 }
43652
43653 if (use_vec_merge)
43654 {
43655 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
43656 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
43657 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43658 }
43659 else
43660 {
43661 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43662
43663 emit_move_insn (mem, target);
43664
43665 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43666 emit_move_insn (tmp, val);
43667
43668 emit_move_insn (target, mem);
43669 }
43670 }
43671
43672 void
43673 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
43674 {
43675 enum machine_mode mode = GET_MODE (vec);
43676 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43677 bool use_vec_extr = false;
43678 rtx tmp;
43679
43680 switch (mode)
43681 {
43682 case V2SImode:
43683 case V2SFmode:
43684 if (!mmx_ok)
43685 break;
43686 /* FALLTHRU */
43687
43688 case V2DFmode:
43689 case V2DImode:
43690 use_vec_extr = true;
43691 break;
43692
43693 case V4SFmode:
43694 use_vec_extr = TARGET_SSE4_1;
43695 if (use_vec_extr)
43696 break;
43697
43698 switch (elt)
43699 {
43700 case 0:
43701 tmp = vec;
43702 break;
43703
43704 case 1:
43705 case 3:
43706 tmp = gen_reg_rtx (mode);
43707 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
43708 GEN_INT (elt), GEN_INT (elt),
43709 GEN_INT (elt+4), GEN_INT (elt+4)));
43710 break;
43711
43712 case 2:
43713 tmp = gen_reg_rtx (mode);
43714 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
43715 break;
43716
43717 default:
43718 gcc_unreachable ();
43719 }
43720 vec = tmp;
43721 use_vec_extr = true;
43722 elt = 0;
43723 break;
43724
43725 case V4SImode:
43726 use_vec_extr = TARGET_SSE4_1;
43727 if (use_vec_extr)
43728 break;
43729
43730 if (TARGET_SSE2)
43731 {
43732 switch (elt)
43733 {
43734 case 0:
43735 tmp = vec;
43736 break;
43737
43738 case 1:
43739 case 3:
43740 tmp = gen_reg_rtx (mode);
43741 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
43742 GEN_INT (elt), GEN_INT (elt),
43743 GEN_INT (elt), GEN_INT (elt)));
43744 break;
43745
43746 case 2:
43747 tmp = gen_reg_rtx (mode);
43748 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
43749 break;
43750
43751 default:
43752 gcc_unreachable ();
43753 }
43754 vec = tmp;
43755 use_vec_extr = true;
43756 elt = 0;
43757 }
43758 else
43759 {
43760 /* For SSE1, we have to reuse the V4SF code. */
43761 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
43762 gen_lowpart (V4SFmode, vec), elt);
43763 return;
43764 }
43765 break;
43766
43767 case V8HImode:
43768 use_vec_extr = TARGET_SSE2;
43769 break;
43770 case V4HImode:
43771 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43772 break;
43773
43774 case V16QImode:
43775 use_vec_extr = TARGET_SSE4_1;
43776 break;
43777
43778 case V8SFmode:
43779 if (TARGET_AVX)
43780 {
43781 tmp = gen_reg_rtx (V4SFmode);
43782 if (elt < 4)
43783 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
43784 else
43785 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
43786 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43787 return;
43788 }
43789 break;
43790
43791 case V4DFmode:
43792 if (TARGET_AVX)
43793 {
43794 tmp = gen_reg_rtx (V2DFmode);
43795 if (elt < 2)
43796 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
43797 else
43798 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
43799 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43800 return;
43801 }
43802 break;
43803
43804 case V32QImode:
43805 if (TARGET_AVX)
43806 {
43807 tmp = gen_reg_rtx (V16QImode);
43808 if (elt < 16)
43809 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
43810 else
43811 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
43812 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43813 return;
43814 }
43815 break;
43816
43817 case V16HImode:
43818 if (TARGET_AVX)
43819 {
43820 tmp = gen_reg_rtx (V8HImode);
43821 if (elt < 8)
43822 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
43823 else
43824 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
43825 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43826 return;
43827 }
43828 break;
43829
43830 case V8SImode:
43831 if (TARGET_AVX)
43832 {
43833 tmp = gen_reg_rtx (V4SImode);
43834 if (elt < 4)
43835 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
43836 else
43837 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
43838 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43839 return;
43840 }
43841 break;
43842
43843 case V4DImode:
43844 if (TARGET_AVX)
43845 {
43846 tmp = gen_reg_rtx (V2DImode);
43847 if (elt < 2)
43848 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
43849 else
43850 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
43851 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43852 return;
43853 }
43854 break;
43855
43856 case V32HImode:
43857 if (TARGET_AVX512BW)
43858 {
43859 tmp = gen_reg_rtx (V16HImode);
43860 if (elt < 16)
43861 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
43862 else
43863 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
43864 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43865 return;
43866 }
43867 break;
43868
43869 case V64QImode:
43870 if (TARGET_AVX512BW)
43871 {
43872 tmp = gen_reg_rtx (V32QImode);
43873 if (elt < 32)
43874 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
43875 else
43876 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
43877 ix86_expand_vector_extract (false, target, tmp, elt & 31);
43878 return;
43879 }
43880 break;
43881
43882 case V16SFmode:
43883 tmp = gen_reg_rtx (V8SFmode);
43884 if (elt < 8)
43885 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
43886 else
43887 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
43888 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43889 return;
43890
43891 case V8DFmode:
43892 tmp = gen_reg_rtx (V4DFmode);
43893 if (elt < 4)
43894 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
43895 else
43896 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
43897 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43898 return;
43899
43900 case V16SImode:
43901 tmp = gen_reg_rtx (V8SImode);
43902 if (elt < 8)
43903 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
43904 else
43905 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
43906 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43907 return;
43908
43909 case V8DImode:
43910 tmp = gen_reg_rtx (V4DImode);
43911 if (elt < 4)
43912 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
43913 else
43914 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
43915 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43916 return;
43917
43918 case V8QImode:
43919 /* ??? Could extract the appropriate HImode element and shift. */
43920 default:
43921 break;
43922 }
43923
43924 if (use_vec_extr)
43925 {
43926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
43927 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
43928
43929 /* Let the rtl optimizers know about the zero extension performed. */
43930 if (inner_mode == QImode || inner_mode == HImode)
43931 {
43932 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
43933 target = gen_lowpart (SImode, target);
43934 }
43935
43936 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43937 }
43938 else
43939 {
43940 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43941
43942 emit_move_insn (mem, vec);
43943
43944 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43945 emit_move_insn (target, tmp);
43946 }
43947 }
43948
43949 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
43950 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
43951 The upper bits of DEST are undefined, though they shouldn't cause
43952 exceptions (some bits from src or all zeros are ok). */
43953
43954 static void
43955 emit_reduc_half (rtx dest, rtx src, int i)
43956 {
43957 rtx tem, d = dest;
43958 switch (GET_MODE (src))
43959 {
43960 case V4SFmode:
43961 if (i == 128)
43962 tem = gen_sse_movhlps (dest, src, src);
43963 else
43964 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
43965 GEN_INT (1 + 4), GEN_INT (1 + 4));
43966 break;
43967 case V2DFmode:
43968 tem = gen_vec_interleave_highv2df (dest, src, src);
43969 break;
43970 case V16QImode:
43971 case V8HImode:
43972 case V4SImode:
43973 case V2DImode:
43974 d = gen_reg_rtx (V1TImode);
43975 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
43976 GEN_INT (i / 2));
43977 break;
43978 case V8SFmode:
43979 if (i == 256)
43980 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
43981 else
43982 tem = gen_avx_shufps256 (dest, src, src,
43983 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
43984 break;
43985 case V4DFmode:
43986 if (i == 256)
43987 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
43988 else
43989 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
43990 break;
43991 case V32QImode:
43992 case V16HImode:
43993 case V8SImode:
43994 case V4DImode:
43995 if (i == 256)
43996 {
43997 if (GET_MODE (dest) != V4DImode)
43998 d = gen_reg_rtx (V4DImode);
43999 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44000 gen_lowpart (V4DImode, src),
44001 const1_rtx);
44002 }
44003 else
44004 {
44005 d = gen_reg_rtx (V2TImode);
44006 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44007 GEN_INT (i / 2));
44008 }
44009 break;
44010 case V64QImode:
44011 case V32HImode:
44012 case V16SImode:
44013 case V16SFmode:
44014 case V8DImode:
44015 case V8DFmode:
44016 if (i > 128)
44017 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44018 gen_lowpart (V16SImode, src),
44019 gen_lowpart (V16SImode, src),
44020 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44021 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44022 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44023 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44024 GEN_INT (0xC), GEN_INT (0xD),
44025 GEN_INT (0xE), GEN_INT (0xF),
44026 GEN_INT (0x10), GEN_INT (0x11),
44027 GEN_INT (0x12), GEN_INT (0x13),
44028 GEN_INT (0x14), GEN_INT (0x15),
44029 GEN_INT (0x16), GEN_INT (0x17));
44030 else
44031 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
44032 gen_lowpart (V16SImode, src),
44033 GEN_INT (i == 128 ? 0x2 : 0x1),
44034 GEN_INT (0x3),
44035 GEN_INT (0x3),
44036 GEN_INT (0x3),
44037 GEN_INT (i == 128 ? 0x6 : 0x5),
44038 GEN_INT (0x7),
44039 GEN_INT (0x7),
44040 GEN_INT (0x7),
44041 GEN_INT (i == 128 ? 0xA : 0x9),
44042 GEN_INT (0xB),
44043 GEN_INT (0xB),
44044 GEN_INT (0xB),
44045 GEN_INT (i == 128 ? 0xE : 0xD),
44046 GEN_INT (0xF),
44047 GEN_INT (0xF),
44048 GEN_INT (0xF));
44049 break;
44050 default:
44051 gcc_unreachable ();
44052 }
44053 emit_insn (tem);
44054 if (d != dest)
44055 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
44056 }
44057
44058 /* Expand a vector reduction. FN is the binary pattern to reduce;
44059 DEST is the destination; IN is the input vector. */
44060
44061 void
44062 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
44063 {
44064 rtx half, dst, vec = in;
44065 enum machine_mode mode = GET_MODE (in);
44066 int i;
44067
44068 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
44069 if (TARGET_SSE4_1
44070 && mode == V8HImode
44071 && fn == gen_uminv8hi3)
44072 {
44073 emit_insn (gen_sse4_1_phminposuw (dest, in));
44074 return;
44075 }
44076
44077 for (i = GET_MODE_BITSIZE (mode);
44078 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
44079 i >>= 1)
44080 {
44081 half = gen_reg_rtx (mode);
44082 emit_reduc_half (half, vec, i);
44083 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
44084 dst = dest;
44085 else
44086 dst = gen_reg_rtx (mode);
44087 emit_insn (fn (dst, half, vec));
44088 vec = dst;
44089 }
44090 }
44091 \f
44092 /* Target hook for scalar_mode_supported_p. */
44093 static bool
44094 ix86_scalar_mode_supported_p (enum machine_mode mode)
44095 {
44096 if (DECIMAL_FLOAT_MODE_P (mode))
44097 return default_decimal_float_supported_p ();
44098 else if (mode == TFmode)
44099 return true;
44100 else
44101 return default_scalar_mode_supported_p (mode);
44102 }
44103
44104 /* Implements target hook vector_mode_supported_p. */
44105 static bool
44106 ix86_vector_mode_supported_p (enum machine_mode mode)
44107 {
44108 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44109 return true;
44110 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44111 return true;
44112 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44113 return true;
44114 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44115 return true;
44116 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44117 return true;
44118 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44119 return true;
44120 return false;
44121 }
44122
44123 /* Implement target hook libgcc_floating_mode_supported_p. */
44124 static bool
44125 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
44126 {
44127 switch (mode)
44128 {
44129 case SFmode:
44130 case DFmode:
44131 case XFmode:
44132 return true;
44133
44134 case TFmode:
44135 #ifdef IX86_NO_LIBGCC_TFMODE
44136 return false;
44137 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
44138 return TARGET_LONG_DOUBLE_128;
44139 #else
44140 return true;
44141 #endif
44142
44143 default:
44144 return false;
44145 }
44146 }
44147
44148 /* Target hook for c_mode_for_suffix. */
44149 static enum machine_mode
44150 ix86_c_mode_for_suffix (char suffix)
44151 {
44152 if (suffix == 'q')
44153 return TFmode;
44154 if (suffix == 'w')
44155 return XFmode;
44156
44157 return VOIDmode;
44158 }
44159
44160 /* Worker function for TARGET_MD_ASM_CLOBBERS.
44161
44162 We do this in the new i386 backend to maintain source compatibility
44163 with the old cc0-based compiler. */
44164
44165 static tree
44166 ix86_md_asm_clobbers (tree, tree, tree clobbers)
44167 {
44168 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
44169 clobbers);
44170 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
44171 clobbers);
44172 return clobbers;
44173 }
44174
44175 /* Implements target vector targetm.asm.encode_section_info. */
44176
44177 static void ATTRIBUTE_UNUSED
44178 ix86_encode_section_info (tree decl, rtx rtl, int first)
44179 {
44180 default_encode_section_info (decl, rtl, first);
44181
44182 if (TREE_CODE (decl) == VAR_DECL
44183 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
44184 && ix86_in_large_data_p (decl))
44185 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44186 }
44187
44188 /* Worker function for REVERSE_CONDITION. */
44189
44190 enum rtx_code
44191 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
44192 {
44193 return (mode != CCFPmode && mode != CCFPUmode
44194 ? reverse_condition (code)
44195 : reverse_condition_maybe_unordered (code));
44196 }
44197
44198 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44199 to OPERANDS[0]. */
44200
44201 const char *
44202 output_387_reg_move (rtx insn, rtx *operands)
44203 {
44204 if (REG_P (operands[0]))
44205 {
44206 if (REG_P (operands[1])
44207 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44208 {
44209 if (REGNO (operands[0]) == FIRST_STACK_REG)
44210 return output_387_ffreep (operands, 0);
44211 return "fstp\t%y0";
44212 }
44213 if (STACK_TOP_P (operands[0]))
44214 return "fld%Z1\t%y1";
44215 return "fst\t%y0";
44216 }
44217 else if (MEM_P (operands[0]))
44218 {
44219 gcc_assert (REG_P (operands[1]));
44220 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44221 return "fstp%Z0\t%y0";
44222 else
44223 {
44224 /* There is no non-popping store to memory for XFmode.
44225 So if we need one, follow the store with a load. */
44226 if (GET_MODE (operands[0]) == XFmode)
44227 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44228 else
44229 return "fst%Z0\t%y0";
44230 }
44231 }
44232 else
44233 gcc_unreachable();
44234 }
44235
44236 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44237 FP status register is set. */
44238
44239 void
44240 ix86_emit_fp_unordered_jump (rtx label)
44241 {
44242 rtx reg = gen_reg_rtx (HImode);
44243 rtx temp;
44244
44245 emit_insn (gen_x86_fnstsw_1 (reg));
44246
44247 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44248 {
44249 emit_insn (gen_x86_sahf_1 (reg));
44250
44251 temp = gen_rtx_REG (CCmode, FLAGS_REG);
44252 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44253 }
44254 else
44255 {
44256 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
44257
44258 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44259 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44260 }
44261
44262 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44263 gen_rtx_LABEL_REF (VOIDmode, label),
44264 pc_rtx);
44265 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
44266
44267 emit_jump_insn (temp);
44268 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44269 }
44270
44271 /* Output code to perform a log1p XFmode calculation. */
44272
44273 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44274 {
44275 rtx_code_label *label1 = gen_label_rtx ();
44276 rtx_code_label *label2 = gen_label_rtx ();
44277
44278 rtx tmp = gen_reg_rtx (XFmode);
44279 rtx tmp2 = gen_reg_rtx (XFmode);
44280 rtx test;
44281
44282 emit_insn (gen_absxf2 (tmp, op1));
44283 test = gen_rtx_GE (VOIDmode, tmp,
44284 CONST_DOUBLE_FROM_REAL_VALUE (
44285 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
44286 XFmode));
44287 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
44288
44289 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44290 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
44291 emit_jump (label2);
44292
44293 emit_label (label1);
44294 emit_move_insn (tmp, CONST1_RTX (XFmode));
44295 emit_insn (gen_addxf3 (tmp, op1, tmp));
44296 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44297 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
44298
44299 emit_label (label2);
44300 }
44301
44302 /* Emit code for round calculation. */
44303 void ix86_emit_i387_round (rtx op0, rtx op1)
44304 {
44305 enum machine_mode inmode = GET_MODE (op1);
44306 enum machine_mode outmode = GET_MODE (op0);
44307 rtx e1, e2, res, tmp, tmp1, half;
44308 rtx scratch = gen_reg_rtx (HImode);
44309 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44310 rtx_code_label *jump_label = gen_label_rtx ();
44311 rtx insn;
44312 rtx (*gen_abs) (rtx, rtx);
44313 rtx (*gen_neg) (rtx, rtx);
44314
44315 switch (inmode)
44316 {
44317 case SFmode:
44318 gen_abs = gen_abssf2;
44319 break;
44320 case DFmode:
44321 gen_abs = gen_absdf2;
44322 break;
44323 case XFmode:
44324 gen_abs = gen_absxf2;
44325 break;
44326 default:
44327 gcc_unreachable ();
44328 }
44329
44330 switch (outmode)
44331 {
44332 case SFmode:
44333 gen_neg = gen_negsf2;
44334 break;
44335 case DFmode:
44336 gen_neg = gen_negdf2;
44337 break;
44338 case XFmode:
44339 gen_neg = gen_negxf2;
44340 break;
44341 case HImode:
44342 gen_neg = gen_neghi2;
44343 break;
44344 case SImode:
44345 gen_neg = gen_negsi2;
44346 break;
44347 case DImode:
44348 gen_neg = gen_negdi2;
44349 break;
44350 default:
44351 gcc_unreachable ();
44352 }
44353
44354 e1 = gen_reg_rtx (inmode);
44355 e2 = gen_reg_rtx (inmode);
44356 res = gen_reg_rtx (outmode);
44357
44358 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
44359
44360 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
44361
44362 /* scratch = fxam(op1) */
44363 emit_insn (gen_rtx_SET (VOIDmode, scratch,
44364 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
44365 UNSPEC_FXAM)));
44366 /* e1 = fabs(op1) */
44367 emit_insn (gen_abs (e1, op1));
44368
44369 /* e2 = e1 + 0.5 */
44370 half = force_reg (inmode, half);
44371 emit_insn (gen_rtx_SET (VOIDmode, e2,
44372 gen_rtx_PLUS (inmode, e1, half)));
44373
44374 /* res = floor(e2) */
44375 if (inmode != XFmode)
44376 {
44377 tmp1 = gen_reg_rtx (XFmode);
44378
44379 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
44380 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
44381 }
44382 else
44383 tmp1 = e2;
44384
44385 switch (outmode)
44386 {
44387 case SFmode:
44388 case DFmode:
44389 {
44390 rtx tmp0 = gen_reg_rtx (XFmode);
44391
44392 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
44393
44394 emit_insn (gen_rtx_SET (VOIDmode, res,
44395 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
44396 UNSPEC_TRUNC_NOOP)));
44397 }
44398 break;
44399 case XFmode:
44400 emit_insn (gen_frndintxf2_floor (res, tmp1));
44401 break;
44402 case HImode:
44403 emit_insn (gen_lfloorxfhi2 (res, tmp1));
44404 break;
44405 case SImode:
44406 emit_insn (gen_lfloorxfsi2 (res, tmp1));
44407 break;
44408 case DImode:
44409 emit_insn (gen_lfloorxfdi2 (res, tmp1));
44410 break;
44411 default:
44412 gcc_unreachable ();
44413 }
44414
44415 /* flags = signbit(a) */
44416 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
44417
44418 /* if (flags) then res = -res */
44419 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44420 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44421 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44422 pc_rtx);
44423 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
44424 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44425 JUMP_LABEL (insn) = jump_label;
44426
44427 emit_insn (gen_neg (res, res));
44428
44429 emit_label (jump_label);
44430 LABEL_NUSES (jump_label) = 1;
44431
44432 emit_move_insn (op0, res);
44433 }
44434
44435 /* Output code to perform a Newton-Rhapson approximation of a single precision
44436 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
44437
44438 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
44439 {
44440 rtx x0, x1, e0, e1;
44441
44442 x0 = gen_reg_rtx (mode);
44443 e0 = gen_reg_rtx (mode);
44444 e1 = gen_reg_rtx (mode);
44445 x1 = gen_reg_rtx (mode);
44446
44447 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
44448
44449 b = force_reg (mode, b);
44450
44451 /* x0 = rcp(b) estimate */
44452 if (mode == V16SFmode || mode == V8DFmode)
44453 emit_insn (gen_rtx_SET (VOIDmode, x0,
44454 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44455 UNSPEC_RCP14)));
44456 else
44457 emit_insn (gen_rtx_SET (VOIDmode, x0,
44458 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44459 UNSPEC_RCP)));
44460
44461 /* e0 = x0 * b */
44462 emit_insn (gen_rtx_SET (VOIDmode, e0,
44463 gen_rtx_MULT (mode, x0, b)));
44464
44465 /* e0 = x0 * e0 */
44466 emit_insn (gen_rtx_SET (VOIDmode, e0,
44467 gen_rtx_MULT (mode, x0, e0)));
44468
44469 /* e1 = x0 + x0 */
44470 emit_insn (gen_rtx_SET (VOIDmode, e1,
44471 gen_rtx_PLUS (mode, x0, x0)));
44472
44473 /* x1 = e1 - e0 */
44474 emit_insn (gen_rtx_SET (VOIDmode, x1,
44475 gen_rtx_MINUS (mode, e1, e0)));
44476
44477 /* res = a * x1 */
44478 emit_insn (gen_rtx_SET (VOIDmode, res,
44479 gen_rtx_MULT (mode, a, x1)));
44480 }
44481
44482 /* Output code to perform a Newton-Rhapson approximation of a
44483 single precision floating point [reciprocal] square root. */
44484
44485 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
44486 bool recip)
44487 {
44488 rtx x0, e0, e1, e2, e3, mthree, mhalf;
44489 REAL_VALUE_TYPE r;
44490 int unspec;
44491
44492 x0 = gen_reg_rtx (mode);
44493 e0 = gen_reg_rtx (mode);
44494 e1 = gen_reg_rtx (mode);
44495 e2 = gen_reg_rtx (mode);
44496 e3 = gen_reg_rtx (mode);
44497
44498 real_from_integer (&r, VOIDmode, -3, SIGNED);
44499 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
44500
44501 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
44502 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
44503 unspec = UNSPEC_RSQRT;
44504
44505 if (VECTOR_MODE_P (mode))
44506 {
44507 mthree = ix86_build_const_vector (mode, true, mthree);
44508 mhalf = ix86_build_const_vector (mode, true, mhalf);
44509 /* There is no 512-bit rsqrt. There is however rsqrt14. */
44510 if (GET_MODE_SIZE (mode) == 64)
44511 unspec = UNSPEC_RSQRT14;
44512 }
44513
44514 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
44515 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
44516
44517 a = force_reg (mode, a);
44518
44519 /* x0 = rsqrt(a) estimate */
44520 emit_insn (gen_rtx_SET (VOIDmode, x0,
44521 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
44522 unspec)));
44523
44524 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
44525 if (!recip)
44526 {
44527 rtx zero, mask;
44528
44529 zero = gen_reg_rtx (mode);
44530 mask = gen_reg_rtx (mode);
44531
44532 zero = force_reg (mode, CONST0_RTX(mode));
44533
44534 /* Handle masked compare. */
44535 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
44536 {
44537 mask = gen_reg_rtx (HImode);
44538 /* Imm value 0x4 corresponds to not-equal comparison. */
44539 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
44540 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
44541 }
44542 else
44543 {
44544 emit_insn (gen_rtx_SET (VOIDmode, mask,
44545 gen_rtx_NE (mode, zero, a)));
44546
44547 emit_insn (gen_rtx_SET (VOIDmode, x0,
44548 gen_rtx_AND (mode, x0, mask)));
44549 }
44550 }
44551
44552 /* e0 = x0 * a */
44553 emit_insn (gen_rtx_SET (VOIDmode, e0,
44554 gen_rtx_MULT (mode, x0, a)));
44555 /* e1 = e0 * x0 */
44556 emit_insn (gen_rtx_SET (VOIDmode, e1,
44557 gen_rtx_MULT (mode, e0, x0)));
44558
44559 /* e2 = e1 - 3. */
44560 mthree = force_reg (mode, mthree);
44561 emit_insn (gen_rtx_SET (VOIDmode, e2,
44562 gen_rtx_PLUS (mode, e1, mthree)));
44563
44564 mhalf = force_reg (mode, mhalf);
44565 if (recip)
44566 /* e3 = -.5 * x0 */
44567 emit_insn (gen_rtx_SET (VOIDmode, e3,
44568 gen_rtx_MULT (mode, x0, mhalf)));
44569 else
44570 /* e3 = -.5 * e0 */
44571 emit_insn (gen_rtx_SET (VOIDmode, e3,
44572 gen_rtx_MULT (mode, e0, mhalf)));
44573 /* ret = e2 * e3 */
44574 emit_insn (gen_rtx_SET (VOIDmode, res,
44575 gen_rtx_MULT (mode, e2, e3)));
44576 }
44577
44578 #ifdef TARGET_SOLARIS
44579 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
44580
44581 static void
44582 i386_solaris_elf_named_section (const char *name, unsigned int flags,
44583 tree decl)
44584 {
44585 /* With Binutils 2.15, the "@unwind" marker must be specified on
44586 every occurrence of the ".eh_frame" section, not just the first
44587 one. */
44588 if (TARGET_64BIT
44589 && strcmp (name, ".eh_frame") == 0)
44590 {
44591 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
44592 flags & SECTION_WRITE ? "aw" : "a");
44593 return;
44594 }
44595
44596 #ifndef USE_GAS
44597 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
44598 {
44599 solaris_elf_asm_comdat_section (name, flags, decl);
44600 return;
44601 }
44602 #endif
44603
44604 default_elf_asm_named_section (name, flags, decl);
44605 }
44606 #endif /* TARGET_SOLARIS */
44607
44608 /* Return the mangling of TYPE if it is an extended fundamental type. */
44609
44610 static const char *
44611 ix86_mangle_type (const_tree type)
44612 {
44613 type = TYPE_MAIN_VARIANT (type);
44614
44615 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
44616 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
44617 return NULL;
44618
44619 switch (TYPE_MODE (type))
44620 {
44621 case TFmode:
44622 /* __float128 is "g". */
44623 return "g";
44624 case XFmode:
44625 /* "long double" or __float80 is "e". */
44626 return "e";
44627 default:
44628 return NULL;
44629 }
44630 }
44631
44632 /* For 32-bit code we can save PIC register setup by using
44633 __stack_chk_fail_local hidden function instead of calling
44634 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
44635 register, so it is better to call __stack_chk_fail directly. */
44636
44637 static tree ATTRIBUTE_UNUSED
44638 ix86_stack_protect_fail (void)
44639 {
44640 return TARGET_64BIT
44641 ? default_external_stack_protect_fail ()
44642 : default_hidden_stack_protect_fail ();
44643 }
44644
44645 /* Select a format to encode pointers in exception handling data. CODE
44646 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
44647 true if the symbol may be affected by dynamic relocations.
44648
44649 ??? All x86 object file formats are capable of representing this.
44650 After all, the relocation needed is the same as for the call insn.
44651 Whether or not a particular assembler allows us to enter such, I
44652 guess we'll have to see. */
44653 int
44654 asm_preferred_eh_data_format (int code, int global)
44655 {
44656 if (flag_pic)
44657 {
44658 int type = DW_EH_PE_sdata8;
44659 if (!TARGET_64BIT
44660 || ix86_cmodel == CM_SMALL_PIC
44661 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
44662 type = DW_EH_PE_sdata4;
44663 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
44664 }
44665 if (ix86_cmodel == CM_SMALL
44666 || (ix86_cmodel == CM_MEDIUM && code))
44667 return DW_EH_PE_udata4;
44668 return DW_EH_PE_absptr;
44669 }
44670 \f
44671 /* Expand copysign from SIGN to the positive value ABS_VALUE
44672 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
44673 the sign-bit. */
44674 static void
44675 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
44676 {
44677 enum machine_mode mode = GET_MODE (sign);
44678 rtx sgn = gen_reg_rtx (mode);
44679 if (mask == NULL_RTX)
44680 {
44681 enum machine_mode vmode;
44682
44683 if (mode == SFmode)
44684 vmode = V4SFmode;
44685 else if (mode == DFmode)
44686 vmode = V2DFmode;
44687 else
44688 vmode = mode;
44689
44690 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
44691 if (!VECTOR_MODE_P (mode))
44692 {
44693 /* We need to generate a scalar mode mask in this case. */
44694 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44695 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44696 mask = gen_reg_rtx (mode);
44697 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
44698 }
44699 }
44700 else
44701 mask = gen_rtx_NOT (mode, mask);
44702 emit_insn (gen_rtx_SET (VOIDmode, sgn,
44703 gen_rtx_AND (mode, mask, sign)));
44704 emit_insn (gen_rtx_SET (VOIDmode, result,
44705 gen_rtx_IOR (mode, abs_value, sgn)));
44706 }
44707
44708 /* Expand fabs (OP0) and return a new rtx that holds the result. The
44709 mask for masking out the sign-bit is stored in *SMASK, if that is
44710 non-null. */
44711 static rtx
44712 ix86_expand_sse_fabs (rtx op0, rtx *smask)
44713 {
44714 enum machine_mode vmode, mode = GET_MODE (op0);
44715 rtx xa, mask;
44716
44717 xa = gen_reg_rtx (mode);
44718 if (mode == SFmode)
44719 vmode = V4SFmode;
44720 else if (mode == DFmode)
44721 vmode = V2DFmode;
44722 else
44723 vmode = mode;
44724 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
44725 if (!VECTOR_MODE_P (mode))
44726 {
44727 /* We need to generate a scalar mode mask in this case. */
44728 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44729 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44730 mask = gen_reg_rtx (mode);
44731 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
44732 }
44733 emit_insn (gen_rtx_SET (VOIDmode, xa,
44734 gen_rtx_AND (mode, op0, mask)));
44735
44736 if (smask)
44737 *smask = mask;
44738
44739 return xa;
44740 }
44741
44742 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
44743 swapping the operands if SWAP_OPERANDS is true. The expanded
44744 code is a forward jump to a newly created label in case the
44745 comparison is true. The generated label rtx is returned. */
44746 static rtx_code_label *
44747 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
44748 bool swap_operands)
44749 {
44750 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
44751 rtx_code_label *label;
44752 rtx tmp;
44753
44754 if (swap_operands)
44755 {
44756 tmp = op0;
44757 op0 = op1;
44758 op1 = tmp;
44759 }
44760
44761 label = gen_label_rtx ();
44762 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
44763 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44764 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
44765 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
44766 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
44767 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
44768 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
44769 JUMP_LABEL (tmp) = label;
44770
44771 return label;
44772 }
44773
44774 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
44775 using comparison code CODE. Operands are swapped for the comparison if
44776 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
44777 static rtx
44778 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
44779 bool swap_operands)
44780 {
44781 rtx (*insn)(rtx, rtx, rtx, rtx);
44782 enum machine_mode mode = GET_MODE (op0);
44783 rtx mask = gen_reg_rtx (mode);
44784
44785 if (swap_operands)
44786 {
44787 rtx tmp = op0;
44788 op0 = op1;
44789 op1 = tmp;
44790 }
44791
44792 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
44793
44794 emit_insn (insn (mask, op0, op1,
44795 gen_rtx_fmt_ee (code, mode, op0, op1)));
44796 return mask;
44797 }
44798
44799 /* Generate and return a rtx of mode MODE for 2**n where n is the number
44800 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
44801 static rtx
44802 ix86_gen_TWO52 (enum machine_mode mode)
44803 {
44804 REAL_VALUE_TYPE TWO52r;
44805 rtx TWO52;
44806
44807 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
44808 TWO52 = const_double_from_real_value (TWO52r, mode);
44809 TWO52 = force_reg (mode, TWO52);
44810
44811 return TWO52;
44812 }
44813
44814 /* Expand SSE sequence for computing lround from OP1 storing
44815 into OP0. */
44816 void
44817 ix86_expand_lround (rtx op0, rtx op1)
44818 {
44819 /* C code for the stuff we're doing below:
44820 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
44821 return (long)tmp;
44822 */
44823 enum machine_mode mode = GET_MODE (op1);
44824 const struct real_format *fmt;
44825 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44826 rtx adj;
44827
44828 /* load nextafter (0.5, 0.0) */
44829 fmt = REAL_MODE_FORMAT (mode);
44830 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44831 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
44832
44833 /* adj = copysign (0.5, op1) */
44834 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
44835 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
44836
44837 /* adj = op1 + adj */
44838 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
44839
44840 /* op0 = (imode)adj */
44841 expand_fix (op0, adj, 0);
44842 }
44843
44844 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
44845 into OPERAND0. */
44846 void
44847 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
44848 {
44849 /* C code for the stuff we're doing below (for do_floor):
44850 xi = (long)op1;
44851 xi -= (double)xi > op1 ? 1 : 0;
44852 return xi;
44853 */
44854 enum machine_mode fmode = GET_MODE (op1);
44855 enum machine_mode imode = GET_MODE (op0);
44856 rtx ireg, freg, tmp;
44857 rtx_code_label *label;
44858
44859 /* reg = (long)op1 */
44860 ireg = gen_reg_rtx (imode);
44861 expand_fix (ireg, op1, 0);
44862
44863 /* freg = (double)reg */
44864 freg = gen_reg_rtx (fmode);
44865 expand_float (freg, ireg, 0);
44866
44867 /* ireg = (freg > op1) ? ireg - 1 : ireg */
44868 label = ix86_expand_sse_compare_and_jump (UNLE,
44869 freg, op1, !do_floor);
44870 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
44871 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
44872 emit_move_insn (ireg, tmp);
44873
44874 emit_label (label);
44875 LABEL_NUSES (label) = 1;
44876
44877 emit_move_insn (op0, ireg);
44878 }
44879
44880 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
44881 result in OPERAND0. */
44882 void
44883 ix86_expand_rint (rtx operand0, rtx operand1)
44884 {
44885 /* C code for the stuff we're doing below:
44886 xa = fabs (operand1);
44887 if (!isless (xa, 2**52))
44888 return operand1;
44889 xa = xa + 2**52 - 2**52;
44890 return copysign (xa, operand1);
44891 */
44892 enum machine_mode mode = GET_MODE (operand0);
44893 rtx res, xa, TWO52, mask;
44894 rtx_code_label *label;
44895
44896 res = gen_reg_rtx (mode);
44897 emit_move_insn (res, operand1);
44898
44899 /* xa = abs (operand1) */
44900 xa = ix86_expand_sse_fabs (res, &mask);
44901
44902 /* if (!isless (xa, TWO52)) goto label; */
44903 TWO52 = ix86_gen_TWO52 (mode);
44904 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44905
44906 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44907 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44908
44909 ix86_sse_copysign_to_positive (res, xa, res, mask);
44910
44911 emit_label (label);
44912 LABEL_NUSES (label) = 1;
44913
44914 emit_move_insn (operand0, res);
44915 }
44916
44917 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44918 into OPERAND0. */
44919 void
44920 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
44921 {
44922 /* C code for the stuff we expand below.
44923 double xa = fabs (x), x2;
44924 if (!isless (xa, TWO52))
44925 return x;
44926 xa = xa + TWO52 - TWO52;
44927 x2 = copysign (xa, x);
44928 Compensate. Floor:
44929 if (x2 > x)
44930 x2 -= 1;
44931 Compensate. Ceil:
44932 if (x2 < x)
44933 x2 -= -1;
44934 return x2;
44935 */
44936 enum machine_mode mode = GET_MODE (operand0);
44937 rtx xa, TWO52, tmp, one, res, mask;
44938 rtx_code_label *label;
44939
44940 TWO52 = ix86_gen_TWO52 (mode);
44941
44942 /* Temporary for holding the result, initialized to the input
44943 operand to ease control flow. */
44944 res = gen_reg_rtx (mode);
44945 emit_move_insn (res, operand1);
44946
44947 /* xa = abs (operand1) */
44948 xa = ix86_expand_sse_fabs (res, &mask);
44949
44950 /* if (!isless (xa, TWO52)) goto label; */
44951 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44952
44953 /* xa = xa + TWO52 - TWO52; */
44954 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44955 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44956
44957 /* xa = copysign (xa, operand1) */
44958 ix86_sse_copysign_to_positive (xa, xa, res, mask);
44959
44960 /* generate 1.0 or -1.0 */
44961 one = force_reg (mode,
44962 const_double_from_real_value (do_floor
44963 ? dconst1 : dconstm1, mode));
44964
44965 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44966 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44967 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44968 gen_rtx_AND (mode, one, tmp)));
44969 /* We always need to subtract here to preserve signed zero. */
44970 tmp = expand_simple_binop (mode, MINUS,
44971 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44972 emit_move_insn (res, tmp);
44973
44974 emit_label (label);
44975 LABEL_NUSES (label) = 1;
44976
44977 emit_move_insn (operand0, res);
44978 }
44979
44980 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44981 into OPERAND0. */
44982 void
44983 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
44984 {
44985 /* C code for the stuff we expand below.
44986 double xa = fabs (x), x2;
44987 if (!isless (xa, TWO52))
44988 return x;
44989 x2 = (double)(long)x;
44990 Compensate. Floor:
44991 if (x2 > x)
44992 x2 -= 1;
44993 Compensate. Ceil:
44994 if (x2 < x)
44995 x2 += 1;
44996 if (HONOR_SIGNED_ZEROS (mode))
44997 return copysign (x2, x);
44998 return x2;
44999 */
45000 enum machine_mode mode = GET_MODE (operand0);
45001 rtx xa, xi, TWO52, tmp, one, res, mask;
45002 rtx_code_label *label;
45003
45004 TWO52 = ix86_gen_TWO52 (mode);
45005
45006 /* Temporary for holding the result, initialized to the input
45007 operand to ease control flow. */
45008 res = gen_reg_rtx (mode);
45009 emit_move_insn (res, operand1);
45010
45011 /* xa = abs (operand1) */
45012 xa = ix86_expand_sse_fabs (res, &mask);
45013
45014 /* if (!isless (xa, TWO52)) goto label; */
45015 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45016
45017 /* xa = (double)(long)x */
45018 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45019 expand_fix (xi, res, 0);
45020 expand_float (xa, xi, 0);
45021
45022 /* generate 1.0 */
45023 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45024
45025 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45026 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45027 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45028 gen_rtx_AND (mode, one, tmp)));
45029 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45030 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45031 emit_move_insn (res, tmp);
45032
45033 if (HONOR_SIGNED_ZEROS (mode))
45034 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45035
45036 emit_label (label);
45037 LABEL_NUSES (label) = 1;
45038
45039 emit_move_insn (operand0, res);
45040 }
45041
45042 /* Expand SSE sequence for computing round from OPERAND1 storing
45043 into OPERAND0. Sequence that works without relying on DImode truncation
45044 via cvttsd2siq that is only available on 64bit targets. */
45045 void
45046 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
45047 {
45048 /* C code for the stuff we expand below.
45049 double xa = fabs (x), xa2, x2;
45050 if (!isless (xa, TWO52))
45051 return x;
45052 Using the absolute value and copying back sign makes
45053 -0.0 -> -0.0 correct.
45054 xa2 = xa + TWO52 - TWO52;
45055 Compensate.
45056 dxa = xa2 - xa;
45057 if (dxa <= -0.5)
45058 xa2 += 1;
45059 else if (dxa > 0.5)
45060 xa2 -= 1;
45061 x2 = copysign (xa2, x);
45062 return x2;
45063 */
45064 enum machine_mode mode = GET_MODE (operand0);
45065 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
45066 rtx_code_label *label;
45067
45068 TWO52 = ix86_gen_TWO52 (mode);
45069
45070 /* Temporary for holding the result, initialized to the input
45071 operand to ease control flow. */
45072 res = gen_reg_rtx (mode);
45073 emit_move_insn (res, operand1);
45074
45075 /* xa = abs (operand1) */
45076 xa = ix86_expand_sse_fabs (res, &mask);
45077
45078 /* if (!isless (xa, TWO52)) goto label; */
45079 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45080
45081 /* xa2 = xa + TWO52 - TWO52; */
45082 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45083 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
45084
45085 /* dxa = xa2 - xa; */
45086 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
45087
45088 /* generate 0.5, 1.0 and -0.5 */
45089 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
45090 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
45091 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
45092 0, OPTAB_DIRECT);
45093
45094 /* Compensate. */
45095 tmp = gen_reg_rtx (mode);
45096 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
45097 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
45098 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45099 gen_rtx_AND (mode, one, tmp)));
45100 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45101 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
45102 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
45103 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45104 gen_rtx_AND (mode, one, tmp)));
45105 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45106
45107 /* res = copysign (xa2, operand1) */
45108 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
45109
45110 emit_label (label);
45111 LABEL_NUSES (label) = 1;
45112
45113 emit_move_insn (operand0, res);
45114 }
45115
45116 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45117 into OPERAND0. */
45118 void
45119 ix86_expand_trunc (rtx operand0, rtx operand1)
45120 {
45121 /* C code for SSE variant we expand below.
45122 double xa = fabs (x), x2;
45123 if (!isless (xa, TWO52))
45124 return x;
45125 x2 = (double)(long)x;
45126 if (HONOR_SIGNED_ZEROS (mode))
45127 return copysign (x2, x);
45128 return x2;
45129 */
45130 enum machine_mode mode = GET_MODE (operand0);
45131 rtx xa, xi, TWO52, res, mask;
45132 rtx_code_label *label;
45133
45134 TWO52 = ix86_gen_TWO52 (mode);
45135
45136 /* Temporary for holding the result, initialized to the input
45137 operand to ease control flow. */
45138 res = gen_reg_rtx (mode);
45139 emit_move_insn (res, operand1);
45140
45141 /* xa = abs (operand1) */
45142 xa = ix86_expand_sse_fabs (res, &mask);
45143
45144 /* if (!isless (xa, TWO52)) goto label; */
45145 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45146
45147 /* x = (double)(long)x */
45148 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45149 expand_fix (xi, res, 0);
45150 expand_float (res, xi, 0);
45151
45152 if (HONOR_SIGNED_ZEROS (mode))
45153 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45154
45155 emit_label (label);
45156 LABEL_NUSES (label) = 1;
45157
45158 emit_move_insn (operand0, res);
45159 }
45160
45161 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45162 into OPERAND0. */
45163 void
45164 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45165 {
45166 enum machine_mode mode = GET_MODE (operand0);
45167 rtx xa, mask, TWO52, one, res, smask, tmp;
45168 rtx_code_label *label;
45169
45170 /* C code for SSE variant we expand below.
45171 double xa = fabs (x), x2;
45172 if (!isless (xa, TWO52))
45173 return x;
45174 xa2 = xa + TWO52 - TWO52;
45175 Compensate:
45176 if (xa2 > xa)
45177 xa2 -= 1.0;
45178 x2 = copysign (xa2, x);
45179 return x2;
45180 */
45181
45182 TWO52 = ix86_gen_TWO52 (mode);
45183
45184 /* Temporary for holding the result, initialized to the input
45185 operand to ease control flow. */
45186 res = gen_reg_rtx (mode);
45187 emit_move_insn (res, operand1);
45188
45189 /* xa = abs (operand1) */
45190 xa = ix86_expand_sse_fabs (res, &smask);
45191
45192 /* if (!isless (xa, TWO52)) goto label; */
45193 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45194
45195 /* res = xa + TWO52 - TWO52; */
45196 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45197 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45198 emit_move_insn (res, tmp);
45199
45200 /* generate 1.0 */
45201 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45202
45203 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
45204 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45205 emit_insn (gen_rtx_SET (VOIDmode, mask,
45206 gen_rtx_AND (mode, mask, one)));
45207 tmp = expand_simple_binop (mode, MINUS,
45208 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45209 emit_move_insn (res, tmp);
45210
45211 /* res = copysign (res, operand1) */
45212 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45213
45214 emit_label (label);
45215 LABEL_NUSES (label) = 1;
45216
45217 emit_move_insn (operand0, res);
45218 }
45219
45220 /* Expand SSE sequence for computing round from OPERAND1 storing
45221 into OPERAND0. */
45222 void
45223 ix86_expand_round (rtx operand0, rtx operand1)
45224 {
45225 /* C code for the stuff we're doing below:
45226 double xa = fabs (x);
45227 if (!isless (xa, TWO52))
45228 return x;
45229 xa = (double)(long)(xa + nextafter (0.5, 0.0));
45230 return copysign (xa, x);
45231 */
45232 enum machine_mode mode = GET_MODE (operand0);
45233 rtx res, TWO52, xa, xi, half, mask;
45234 rtx_code_label *label;
45235 const struct real_format *fmt;
45236 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45237
45238 /* Temporary for holding the result, initialized to the input
45239 operand to ease control flow. */
45240 res = gen_reg_rtx (mode);
45241 emit_move_insn (res, operand1);
45242
45243 TWO52 = ix86_gen_TWO52 (mode);
45244 xa = ix86_expand_sse_fabs (res, &mask);
45245 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45246
45247 /* load nextafter (0.5, 0.0) */
45248 fmt = REAL_MODE_FORMAT (mode);
45249 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45250 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45251
45252 /* xa = xa + 0.5 */
45253 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45254 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45255
45256 /* xa = (double)(int64_t)xa */
45257 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45258 expand_fix (xi, xa, 0);
45259 expand_float (xa, xi, 0);
45260
45261 /* res = copysign (xa, operand1) */
45262 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45263
45264 emit_label (label);
45265 LABEL_NUSES (label) = 1;
45266
45267 emit_move_insn (operand0, res);
45268 }
45269
45270 /* Expand SSE sequence for computing round
45271 from OP1 storing into OP0 using sse4 round insn. */
45272 void
45273 ix86_expand_round_sse4 (rtx op0, rtx op1)
45274 {
45275 enum machine_mode mode = GET_MODE (op0);
45276 rtx e1, e2, res, half;
45277 const struct real_format *fmt;
45278 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45279 rtx (*gen_copysign) (rtx, rtx, rtx);
45280 rtx (*gen_round) (rtx, rtx, rtx);
45281
45282 switch (mode)
45283 {
45284 case SFmode:
45285 gen_copysign = gen_copysignsf3;
45286 gen_round = gen_sse4_1_roundsf2;
45287 break;
45288 case DFmode:
45289 gen_copysign = gen_copysigndf3;
45290 gen_round = gen_sse4_1_rounddf2;
45291 break;
45292 default:
45293 gcc_unreachable ();
45294 }
45295
45296 /* round (a) = trunc (a + copysign (0.5, a)) */
45297
45298 /* load nextafter (0.5, 0.0) */
45299 fmt = REAL_MODE_FORMAT (mode);
45300 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45301 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45302 half = const_double_from_real_value (pred_half, mode);
45303
45304 /* e1 = copysign (0.5, op1) */
45305 e1 = gen_reg_rtx (mode);
45306 emit_insn (gen_copysign (e1, half, op1));
45307
45308 /* e2 = op1 + e1 */
45309 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
45310
45311 /* res = trunc (e2) */
45312 res = gen_reg_rtx (mode);
45313 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
45314
45315 emit_move_insn (op0, res);
45316 }
45317 \f
45318
45319 /* Table of valid machine attributes. */
45320 static const struct attribute_spec ix86_attribute_table[] =
45321 {
45322 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
45323 affects_type_identity } */
45324 /* Stdcall attribute says callee is responsible for popping arguments
45325 if they are not variable. */
45326 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45327 true },
45328 /* Fastcall attribute says callee is responsible for popping arguments
45329 if they are not variable. */
45330 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45331 true },
45332 /* Thiscall attribute says callee is responsible for popping arguments
45333 if they are not variable. */
45334 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45335 true },
45336 /* Cdecl attribute says the callee is a normal C declaration */
45337 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45338 true },
45339 /* Regparm attribute specifies how many integer arguments are to be
45340 passed in registers. */
45341 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
45342 true },
45343 /* Sseregparm attribute says we are using x86_64 calling conventions
45344 for FP arguments. */
45345 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45346 true },
45347 /* The transactional memory builtins are implicitly regparm or fastcall
45348 depending on the ABI. Override the generic do-nothing attribute that
45349 these builtins were declared with. */
45350 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
45351 true },
45352 /* force_align_arg_pointer says this function realigns the stack at entry. */
45353 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
45354 false, true, true, ix86_handle_cconv_attribute, false },
45355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
45356 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
45357 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
45358 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
45359 false },
45360 #endif
45361 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
45362 false },
45363 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
45364 false },
45365 #ifdef SUBTARGET_ATTRIBUTE_TABLE
45366 SUBTARGET_ATTRIBUTE_TABLE,
45367 #endif
45368 /* ms_abi and sysv_abi calling convention function attributes. */
45369 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
45370 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
45371 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
45372 false },
45373 { "callee_pop_aggregate_return", 1, 1, false, true, true,
45374 ix86_handle_callee_pop_aggregate_return, true },
45375 /* End element. */
45376 { NULL, 0, 0, false, false, false, NULL, false }
45377 };
45378
45379 /* Implement targetm.vectorize.builtin_vectorization_cost. */
45380 static int
45381 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
45382 tree vectype, int)
45383 {
45384 unsigned elements;
45385
45386 switch (type_of_cost)
45387 {
45388 case scalar_stmt:
45389 return ix86_cost->scalar_stmt_cost;
45390
45391 case scalar_load:
45392 return ix86_cost->scalar_load_cost;
45393
45394 case scalar_store:
45395 return ix86_cost->scalar_store_cost;
45396
45397 case vector_stmt:
45398 return ix86_cost->vec_stmt_cost;
45399
45400 case vector_load:
45401 return ix86_cost->vec_align_load_cost;
45402
45403 case vector_store:
45404 return ix86_cost->vec_store_cost;
45405
45406 case vec_to_scalar:
45407 return ix86_cost->vec_to_scalar_cost;
45408
45409 case scalar_to_vec:
45410 return ix86_cost->scalar_to_vec_cost;
45411
45412 case unaligned_load:
45413 case unaligned_store:
45414 return ix86_cost->vec_unalign_load_cost;
45415
45416 case cond_branch_taken:
45417 return ix86_cost->cond_taken_branch_cost;
45418
45419 case cond_branch_not_taken:
45420 return ix86_cost->cond_not_taken_branch_cost;
45421
45422 case vec_perm:
45423 case vec_promote_demote:
45424 return ix86_cost->vec_stmt_cost;
45425
45426 case vec_construct:
45427 elements = TYPE_VECTOR_SUBPARTS (vectype);
45428 return elements / 2 + 1;
45429
45430 default:
45431 gcc_unreachable ();
45432 }
45433 }
45434
45435 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
45436 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
45437 insn every time. */
45438
45439 static GTY(()) rtx_insn *vselect_insn;
45440
45441 /* Initialize vselect_insn. */
45442
45443 static void
45444 init_vselect_insn (void)
45445 {
45446 unsigned i;
45447 rtx x;
45448
45449 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
45450 for (i = 0; i < MAX_VECT_LEN; ++i)
45451 XVECEXP (x, 0, i) = const0_rtx;
45452 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
45453 const0_rtx), x);
45454 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
45455 start_sequence ();
45456 vselect_insn = emit_insn (x);
45457 end_sequence ();
45458 }
45459
45460 /* Construct (set target (vec_select op0 (parallel perm))) and
45461 return true if that's a valid instruction in the active ISA. */
45462
45463 static bool
45464 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
45465 unsigned nelt, bool testing_p)
45466 {
45467 unsigned int i;
45468 rtx x, save_vconcat;
45469 int icode;
45470
45471 if (vselect_insn == NULL_RTX)
45472 init_vselect_insn ();
45473
45474 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
45475 PUT_NUM_ELEM (XVEC (x, 0), nelt);
45476 for (i = 0; i < nelt; ++i)
45477 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
45478 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
45479 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
45480 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
45481 SET_DEST (PATTERN (vselect_insn)) = target;
45482 icode = recog_memoized (vselect_insn);
45483
45484 if (icode >= 0 && !testing_p)
45485 emit_insn (copy_rtx (PATTERN (vselect_insn)));
45486
45487 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
45488 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
45489 INSN_CODE (vselect_insn) = -1;
45490
45491 return icode >= 0;
45492 }
45493
45494 /* Similar, but generate a vec_concat from op0 and op1 as well. */
45495
45496 static bool
45497 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
45498 const unsigned char *perm, unsigned nelt,
45499 bool testing_p)
45500 {
45501 enum machine_mode v2mode;
45502 rtx x;
45503 bool ok;
45504
45505 if (vselect_insn == NULL_RTX)
45506 init_vselect_insn ();
45507
45508 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
45509 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
45510 PUT_MODE (x, v2mode);
45511 XEXP (x, 0) = op0;
45512 XEXP (x, 1) = op1;
45513 ok = expand_vselect (target, x, perm, nelt, testing_p);
45514 XEXP (x, 0) = const0_rtx;
45515 XEXP (x, 1) = const0_rtx;
45516 return ok;
45517 }
45518
45519 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45520 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
45521
45522 static bool
45523 expand_vec_perm_blend (struct expand_vec_perm_d *d)
45524 {
45525 enum machine_mode vmode = d->vmode;
45526 unsigned i, mask, nelt = d->nelt;
45527 rtx target, op0, op1, x;
45528 rtx rperm[32], vperm;
45529
45530 if (d->one_operand_p)
45531 return false;
45532 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
45533 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
45534 ;
45535 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45536 ;
45537 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45538 ;
45539 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45540 ;
45541 else
45542 return false;
45543
45544 /* This is a blend, not a permute. Elements must stay in their
45545 respective lanes. */
45546 for (i = 0; i < nelt; ++i)
45547 {
45548 unsigned e = d->perm[i];
45549 if (!(e == i || e == i + nelt))
45550 return false;
45551 }
45552
45553 if (d->testing_p)
45554 return true;
45555
45556 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
45557 decision should be extracted elsewhere, so that we only try that
45558 sequence once all budget==3 options have been tried. */
45559 target = d->target;
45560 op0 = d->op0;
45561 op1 = d->op1;
45562 mask = 0;
45563
45564 switch (vmode)
45565 {
45566 case V8DFmode:
45567 case V16SFmode:
45568 case V4DFmode:
45569 case V8SFmode:
45570 case V2DFmode:
45571 case V4SFmode:
45572 case V8HImode:
45573 case V8SImode:
45574 case V32HImode:
45575 case V64QImode:
45576 case V16SImode:
45577 case V8DImode:
45578 for (i = 0; i < nelt; ++i)
45579 mask |= (d->perm[i] >= nelt) << i;
45580 break;
45581
45582 case V2DImode:
45583 for (i = 0; i < 2; ++i)
45584 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
45585 vmode = V8HImode;
45586 goto do_subreg;
45587
45588 case V4SImode:
45589 for (i = 0; i < 4; ++i)
45590 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45591 vmode = V8HImode;
45592 goto do_subreg;
45593
45594 case V16QImode:
45595 /* See if bytes move in pairs so we can use pblendw with
45596 an immediate argument, rather than pblendvb with a vector
45597 argument. */
45598 for (i = 0; i < 16; i += 2)
45599 if (d->perm[i] + 1 != d->perm[i + 1])
45600 {
45601 use_pblendvb:
45602 for (i = 0; i < nelt; ++i)
45603 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
45604
45605 finish_pblendvb:
45606 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
45607 vperm = force_reg (vmode, vperm);
45608
45609 if (GET_MODE_SIZE (vmode) == 16)
45610 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
45611 else
45612 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
45613 if (target != d->target)
45614 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45615 return true;
45616 }
45617
45618 for (i = 0; i < 8; ++i)
45619 mask |= (d->perm[i * 2] >= 16) << i;
45620 vmode = V8HImode;
45621 /* FALLTHRU */
45622
45623 do_subreg:
45624 target = gen_reg_rtx (vmode);
45625 op0 = gen_lowpart (vmode, op0);
45626 op1 = gen_lowpart (vmode, op1);
45627 break;
45628
45629 case V32QImode:
45630 /* See if bytes move in pairs. If not, vpblendvb must be used. */
45631 for (i = 0; i < 32; i += 2)
45632 if (d->perm[i] + 1 != d->perm[i + 1])
45633 goto use_pblendvb;
45634 /* See if bytes move in quadruplets. If yes, vpblendd
45635 with immediate can be used. */
45636 for (i = 0; i < 32; i += 4)
45637 if (d->perm[i] + 2 != d->perm[i + 2])
45638 break;
45639 if (i < 32)
45640 {
45641 /* See if bytes move the same in both lanes. If yes,
45642 vpblendw with immediate can be used. */
45643 for (i = 0; i < 16; i += 2)
45644 if (d->perm[i] + 16 != d->perm[i + 16])
45645 goto use_pblendvb;
45646
45647 /* Use vpblendw. */
45648 for (i = 0; i < 16; ++i)
45649 mask |= (d->perm[i * 2] >= 32) << i;
45650 vmode = V16HImode;
45651 goto do_subreg;
45652 }
45653
45654 /* Use vpblendd. */
45655 for (i = 0; i < 8; ++i)
45656 mask |= (d->perm[i * 4] >= 32) << i;
45657 vmode = V8SImode;
45658 goto do_subreg;
45659
45660 case V16HImode:
45661 /* See if words move in pairs. If yes, vpblendd can be used. */
45662 for (i = 0; i < 16; i += 2)
45663 if (d->perm[i] + 1 != d->perm[i + 1])
45664 break;
45665 if (i < 16)
45666 {
45667 /* See if words move the same in both lanes. If not,
45668 vpblendvb must be used. */
45669 for (i = 0; i < 8; i++)
45670 if (d->perm[i] + 8 != d->perm[i + 8])
45671 {
45672 /* Use vpblendvb. */
45673 for (i = 0; i < 32; ++i)
45674 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
45675
45676 vmode = V32QImode;
45677 nelt = 32;
45678 target = gen_reg_rtx (vmode);
45679 op0 = gen_lowpart (vmode, op0);
45680 op1 = gen_lowpart (vmode, op1);
45681 goto finish_pblendvb;
45682 }
45683
45684 /* Use vpblendw. */
45685 for (i = 0; i < 16; ++i)
45686 mask |= (d->perm[i] >= 16) << i;
45687 break;
45688 }
45689
45690 /* Use vpblendd. */
45691 for (i = 0; i < 8; ++i)
45692 mask |= (d->perm[i * 2] >= 16) << i;
45693 vmode = V8SImode;
45694 goto do_subreg;
45695
45696 case V4DImode:
45697 /* Use vpblendd. */
45698 for (i = 0; i < 4; ++i)
45699 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45700 vmode = V8SImode;
45701 goto do_subreg;
45702
45703 default:
45704 gcc_unreachable ();
45705 }
45706
45707 /* This matches five different patterns with the different modes. */
45708 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
45709 x = gen_rtx_SET (VOIDmode, target, x);
45710 emit_insn (x);
45711 if (target != d->target)
45712 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45713
45714 return true;
45715 }
45716
45717 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45718 in terms of the variable form of vpermilps.
45719
45720 Note that we will have already failed the immediate input vpermilps,
45721 which requires that the high and low part shuffle be identical; the
45722 variable form doesn't require that. */
45723
45724 static bool
45725 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
45726 {
45727 rtx rperm[8], vperm;
45728 unsigned i;
45729
45730 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
45731 return false;
45732
45733 /* We can only permute within the 128-bit lane. */
45734 for (i = 0; i < 8; ++i)
45735 {
45736 unsigned e = d->perm[i];
45737 if (i < 4 ? e >= 4 : e < 4)
45738 return false;
45739 }
45740
45741 if (d->testing_p)
45742 return true;
45743
45744 for (i = 0; i < 8; ++i)
45745 {
45746 unsigned e = d->perm[i];
45747
45748 /* Within each 128-bit lane, the elements of op0 are numbered
45749 from 0 and the elements of op1 are numbered from 4. */
45750 if (e >= 8 + 4)
45751 e -= 8;
45752 else if (e >= 4)
45753 e -= 4;
45754
45755 rperm[i] = GEN_INT (e);
45756 }
45757
45758 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
45759 vperm = force_reg (V8SImode, vperm);
45760 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
45761
45762 return true;
45763 }
45764
45765 /* Return true if permutation D can be performed as VMODE permutation
45766 instead. */
45767
45768 static bool
45769 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
45770 {
45771 unsigned int i, j, chunk;
45772
45773 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
45774 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
45775 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
45776 return false;
45777
45778 if (GET_MODE_NUNITS (vmode) >= d->nelt)
45779 return true;
45780
45781 chunk = d->nelt / GET_MODE_NUNITS (vmode);
45782 for (i = 0; i < d->nelt; i += chunk)
45783 if (d->perm[i] & (chunk - 1))
45784 return false;
45785 else
45786 for (j = 1; j < chunk; ++j)
45787 if (d->perm[i] + j != d->perm[i + j])
45788 return false;
45789
45790 return true;
45791 }
45792
45793 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45794 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
45795
45796 static bool
45797 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
45798 {
45799 unsigned i, nelt, eltsz, mask;
45800 unsigned char perm[64];
45801 enum machine_mode vmode = V16QImode;
45802 rtx rperm[64], vperm, target, op0, op1;
45803
45804 nelt = d->nelt;
45805
45806 if (!d->one_operand_p)
45807 {
45808 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
45809 {
45810 if (TARGET_AVX2
45811 && valid_perm_using_mode_p (V2TImode, d))
45812 {
45813 if (d->testing_p)
45814 return true;
45815
45816 /* Use vperm2i128 insn. The pattern uses
45817 V4DImode instead of V2TImode. */
45818 target = d->target;
45819 if (d->vmode != V4DImode)
45820 target = gen_reg_rtx (V4DImode);
45821 op0 = gen_lowpart (V4DImode, d->op0);
45822 op1 = gen_lowpart (V4DImode, d->op1);
45823 rperm[0]
45824 = GEN_INT ((d->perm[0] / (nelt / 2))
45825 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
45826 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
45827 if (target != d->target)
45828 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45829 return true;
45830 }
45831 return false;
45832 }
45833 }
45834 else
45835 {
45836 if (GET_MODE_SIZE (d->vmode) == 16)
45837 {
45838 if (!TARGET_SSSE3)
45839 return false;
45840 }
45841 else if (GET_MODE_SIZE (d->vmode) == 32)
45842 {
45843 if (!TARGET_AVX2)
45844 return false;
45845
45846 /* V4DImode should be already handled through
45847 expand_vselect by vpermq instruction. */
45848 gcc_assert (d->vmode != V4DImode);
45849
45850 vmode = V32QImode;
45851 if (d->vmode == V8SImode
45852 || d->vmode == V16HImode
45853 || d->vmode == V32QImode)
45854 {
45855 /* First see if vpermq can be used for
45856 V8SImode/V16HImode/V32QImode. */
45857 if (valid_perm_using_mode_p (V4DImode, d))
45858 {
45859 for (i = 0; i < 4; i++)
45860 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
45861 if (d->testing_p)
45862 return true;
45863 target = gen_reg_rtx (V4DImode);
45864 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
45865 perm, 4, false))
45866 {
45867 emit_move_insn (d->target,
45868 gen_lowpart (d->vmode, target));
45869 return true;
45870 }
45871 return false;
45872 }
45873
45874 /* Next see if vpermd can be used. */
45875 if (valid_perm_using_mode_p (V8SImode, d))
45876 vmode = V8SImode;
45877 }
45878 /* Or if vpermps can be used. */
45879 else if (d->vmode == V8SFmode)
45880 vmode = V8SImode;
45881
45882 if (vmode == V32QImode)
45883 {
45884 /* vpshufb only works intra lanes, it is not
45885 possible to shuffle bytes in between the lanes. */
45886 for (i = 0; i < nelt; ++i)
45887 if ((d->perm[i] ^ i) & (nelt / 2))
45888 return false;
45889 }
45890 }
45891 else if (GET_MODE_SIZE (d->vmode) == 64)
45892 {
45893 if (!TARGET_AVX512BW)
45894 return false;
45895 if (vmode == V64QImode)
45896 {
45897 /* vpshufb only works intra lanes, it is not
45898 possible to shuffle bytes in between the lanes. */
45899 for (i = 0; i < nelt; ++i)
45900 if ((d->perm[i] ^ i) & (nelt / 4))
45901 return false;
45902 }
45903 }
45904 else
45905 return false;
45906 }
45907
45908 if (d->testing_p)
45909 return true;
45910
45911 if (vmode == V8SImode)
45912 for (i = 0; i < 8; ++i)
45913 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
45914 else
45915 {
45916 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45917 if (!d->one_operand_p)
45918 mask = 2 * nelt - 1;
45919 else if (vmode == V16QImode)
45920 mask = nelt - 1;
45921 else if (vmode == V64QImode)
45922 mask = nelt / 4 - 1;
45923 else
45924 mask = nelt / 2 - 1;
45925
45926 for (i = 0; i < nelt; ++i)
45927 {
45928 unsigned j, e = d->perm[i] & mask;
45929 for (j = 0; j < eltsz; ++j)
45930 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
45931 }
45932 }
45933
45934 vperm = gen_rtx_CONST_VECTOR (vmode,
45935 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
45936 vperm = force_reg (vmode, vperm);
45937
45938 target = d->target;
45939 if (d->vmode != vmode)
45940 target = gen_reg_rtx (vmode);
45941 op0 = gen_lowpart (vmode, d->op0);
45942 if (d->one_operand_p)
45943 {
45944 if (vmode == V16QImode)
45945 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
45946 else if (vmode == V32QImode)
45947 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
45948 else if (vmode == V64QImode)
45949 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
45950 else if (vmode == V8SFmode)
45951 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
45952 else
45953 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
45954 }
45955 else
45956 {
45957 op1 = gen_lowpart (vmode, d->op1);
45958 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
45959 }
45960 if (target != d->target)
45961 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45962
45963 return true;
45964 }
45965
45966 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45967 in a single instruction. */
45968
45969 static bool
45970 expand_vec_perm_1 (struct expand_vec_perm_d *d)
45971 {
45972 unsigned i, nelt = d->nelt;
45973 unsigned char perm2[MAX_VECT_LEN];
45974
45975 /* Check plain VEC_SELECT first, because AVX has instructions that could
45976 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45977 input where SEL+CONCAT may not. */
45978 if (d->one_operand_p)
45979 {
45980 int mask = nelt - 1;
45981 bool identity_perm = true;
45982 bool broadcast_perm = true;
45983
45984 for (i = 0; i < nelt; i++)
45985 {
45986 perm2[i] = d->perm[i] & mask;
45987 if (perm2[i] != i)
45988 identity_perm = false;
45989 if (perm2[i])
45990 broadcast_perm = false;
45991 }
45992
45993 if (identity_perm)
45994 {
45995 if (!d->testing_p)
45996 emit_move_insn (d->target, d->op0);
45997 return true;
45998 }
45999 else if (broadcast_perm && TARGET_AVX2)
46000 {
46001 /* Use vpbroadcast{b,w,d}. */
46002 rtx (*gen) (rtx, rtx) = NULL;
46003 switch (d->vmode)
46004 {
46005 case V64QImode:
46006 if (TARGET_AVX512BW)
46007 gen = gen_avx512bw_vec_dupv64qi;
46008 break;
46009 case V32QImode:
46010 gen = gen_avx2_pbroadcastv32qi_1;
46011 break;
46012 case V32HImode:
46013 if (TARGET_AVX512BW)
46014 gen = gen_avx512bw_vec_dupv32hi;
46015 break;
46016 case V16HImode:
46017 gen = gen_avx2_pbroadcastv16hi_1;
46018 break;
46019 case V16SImode:
46020 if (TARGET_AVX512F)
46021 gen = gen_avx512f_vec_dupv16si;
46022 break;
46023 case V8SImode:
46024 gen = gen_avx2_pbroadcastv8si_1;
46025 break;
46026 case V16QImode:
46027 gen = gen_avx2_pbroadcastv16qi;
46028 break;
46029 case V8HImode:
46030 gen = gen_avx2_pbroadcastv8hi;
46031 break;
46032 case V16SFmode:
46033 if (TARGET_AVX512F)
46034 gen = gen_avx512f_vec_dupv16sf;
46035 break;
46036 case V8SFmode:
46037 gen = gen_avx2_vec_dupv8sf_1;
46038 break;
46039 case V8DFmode:
46040 if (TARGET_AVX512F)
46041 gen = gen_avx512f_vec_dupv8df;
46042 break;
46043 case V8DImode:
46044 if (TARGET_AVX512F)
46045 gen = gen_avx512f_vec_dupv8di;
46046 break;
46047 /* For other modes prefer other shuffles this function creates. */
46048 default: break;
46049 }
46050 if (gen != NULL)
46051 {
46052 if (!d->testing_p)
46053 emit_insn (gen (d->target, d->op0));
46054 return true;
46055 }
46056 }
46057
46058 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
46059 return true;
46060
46061 /* There are plenty of patterns in sse.md that are written for
46062 SEL+CONCAT and are not replicated for a single op. Perhaps
46063 that should be changed, to avoid the nastiness here. */
46064
46065 /* Recognize interleave style patterns, which means incrementing
46066 every other permutation operand. */
46067 for (i = 0; i < nelt; i += 2)
46068 {
46069 perm2[i] = d->perm[i] & mask;
46070 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
46071 }
46072 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
46073 d->testing_p))
46074 return true;
46075
46076 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
46077 if (nelt >= 4)
46078 {
46079 for (i = 0; i < nelt; i += 4)
46080 {
46081 perm2[i + 0] = d->perm[i + 0] & mask;
46082 perm2[i + 1] = d->perm[i + 1] & mask;
46083 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
46084 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
46085 }
46086
46087 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
46088 d->testing_p))
46089 return true;
46090 }
46091 }
46092
46093 /* Finally, try the fully general two operand permute. */
46094 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
46095 d->testing_p))
46096 return true;
46097
46098 /* Recognize interleave style patterns with reversed operands. */
46099 if (!d->one_operand_p)
46100 {
46101 for (i = 0; i < nelt; ++i)
46102 {
46103 unsigned e = d->perm[i];
46104 if (e >= nelt)
46105 e -= nelt;
46106 else
46107 e += nelt;
46108 perm2[i] = e;
46109 }
46110
46111 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
46112 d->testing_p))
46113 return true;
46114 }
46115
46116 /* Try the SSE4.1 blend variable merge instructions. */
46117 if (expand_vec_perm_blend (d))
46118 return true;
46119
46120 /* Try one of the AVX vpermil variable permutations. */
46121 if (expand_vec_perm_vpermil (d))
46122 return true;
46123
46124 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
46125 vpshufb, vpermd, vpermps or vpermq variable permutation. */
46126 if (expand_vec_perm_pshufb (d))
46127 return true;
46128
46129 /* Try the AVX2 vpalignr instruction. */
46130 if (expand_vec_perm_palignr (d, true))
46131 return true;
46132
46133 /* Try the AVX512F vpermi2 instructions. */
46134 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
46135 return true;
46136
46137 return false;
46138 }
46139
46140 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46141 in terms of a pair of pshuflw + pshufhw instructions. */
46142
46143 static bool
46144 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
46145 {
46146 unsigned char perm2[MAX_VECT_LEN];
46147 unsigned i;
46148 bool ok;
46149
46150 if (d->vmode != V8HImode || !d->one_operand_p)
46151 return false;
46152
46153 /* The two permutations only operate in 64-bit lanes. */
46154 for (i = 0; i < 4; ++i)
46155 if (d->perm[i] >= 4)
46156 return false;
46157 for (i = 4; i < 8; ++i)
46158 if (d->perm[i] < 4)
46159 return false;
46160
46161 if (d->testing_p)
46162 return true;
46163
46164 /* Emit the pshuflw. */
46165 memcpy (perm2, d->perm, 4);
46166 for (i = 4; i < 8; ++i)
46167 perm2[i] = i;
46168 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
46169 gcc_assert (ok);
46170
46171 /* Emit the pshufhw. */
46172 memcpy (perm2 + 4, d->perm + 4, 4);
46173 for (i = 0; i < 4; ++i)
46174 perm2[i] = i;
46175 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
46176 gcc_assert (ok);
46177
46178 return true;
46179 }
46180
46181 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46182 the permutation using the SSSE3 palignr instruction. This succeeds
46183 when all of the elements in PERM fit within one vector and we merely
46184 need to shift them down so that a single vector permutation has a
46185 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
46186 the vpalignr instruction itself can perform the requested permutation. */
46187
46188 static bool
46189 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
46190 {
46191 unsigned i, nelt = d->nelt;
46192 unsigned min, max, minswap, maxswap;
46193 bool in_order, ok, swap = false;
46194 rtx shift, target;
46195 struct expand_vec_perm_d dcopy;
46196
46197 /* Even with AVX, palignr only operates on 128-bit vectors,
46198 in AVX2 palignr operates on both 128-bit lanes. */
46199 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46200 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
46201 return false;
46202
46203 min = 2 * nelt;
46204 max = 0;
46205 minswap = 2 * nelt;
46206 maxswap = 0;
46207 for (i = 0; i < nelt; ++i)
46208 {
46209 unsigned e = d->perm[i];
46210 unsigned eswap = d->perm[i] ^ nelt;
46211 if (GET_MODE_SIZE (d->vmode) == 32)
46212 {
46213 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
46214 eswap = e ^ (nelt / 2);
46215 }
46216 if (e < min)
46217 min = e;
46218 if (e > max)
46219 max = e;
46220 if (eswap < minswap)
46221 minswap = eswap;
46222 if (eswap > maxswap)
46223 maxswap = eswap;
46224 }
46225 if (min == 0
46226 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
46227 {
46228 if (d->one_operand_p
46229 || minswap == 0
46230 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
46231 ? nelt / 2 : nelt))
46232 return false;
46233 swap = true;
46234 min = minswap;
46235 max = maxswap;
46236 }
46237
46238 /* Given that we have SSSE3, we know we'll be able to implement the
46239 single operand permutation after the palignr with pshufb for
46240 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
46241 first. */
46242 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
46243 return true;
46244
46245 dcopy = *d;
46246 if (swap)
46247 {
46248 dcopy.op0 = d->op1;
46249 dcopy.op1 = d->op0;
46250 for (i = 0; i < nelt; ++i)
46251 dcopy.perm[i] ^= nelt;
46252 }
46253
46254 in_order = true;
46255 for (i = 0; i < nelt; ++i)
46256 {
46257 unsigned e = dcopy.perm[i];
46258 if (GET_MODE_SIZE (d->vmode) == 32
46259 && e >= nelt
46260 && (e & (nelt / 2 - 1)) < min)
46261 e = e - min - (nelt / 2);
46262 else
46263 e = e - min;
46264 if (e != i)
46265 in_order = false;
46266 dcopy.perm[i] = e;
46267 }
46268 dcopy.one_operand_p = true;
46269
46270 if (single_insn_only_p && !in_order)
46271 return false;
46272
46273 /* For AVX2, test whether we can permute the result in one instruction. */
46274 if (d->testing_p)
46275 {
46276 if (in_order)
46277 return true;
46278 dcopy.op1 = dcopy.op0;
46279 return expand_vec_perm_1 (&dcopy);
46280 }
46281
46282 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
46283 if (GET_MODE_SIZE (d->vmode) == 16)
46284 {
46285 target = gen_reg_rtx (TImode);
46286 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
46287 gen_lowpart (TImode, dcopy.op0), shift));
46288 }
46289 else
46290 {
46291 target = gen_reg_rtx (V2TImode);
46292 emit_insn (gen_avx2_palignrv2ti (target,
46293 gen_lowpart (V2TImode, dcopy.op1),
46294 gen_lowpart (V2TImode, dcopy.op0),
46295 shift));
46296 }
46297
46298 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
46299
46300 /* Test for the degenerate case where the alignment by itself
46301 produces the desired permutation. */
46302 if (in_order)
46303 {
46304 emit_move_insn (d->target, dcopy.op0);
46305 return true;
46306 }
46307
46308 ok = expand_vec_perm_1 (&dcopy);
46309 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
46310
46311 return ok;
46312 }
46313
46314 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
46315 the permutation using the SSE4_1 pblendv instruction. Potentially
46316 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
46317
46318 static bool
46319 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
46320 {
46321 unsigned i, which, nelt = d->nelt;
46322 struct expand_vec_perm_d dcopy, dcopy1;
46323 enum machine_mode vmode = d->vmode;
46324 bool ok;
46325
46326 /* Use the same checks as in expand_vec_perm_blend. */
46327 if (d->one_operand_p)
46328 return false;
46329 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46330 ;
46331 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46332 ;
46333 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46334 ;
46335 else
46336 return false;
46337
46338 /* Figure out where permutation elements stay not in their
46339 respective lanes. */
46340 for (i = 0, which = 0; i < nelt; ++i)
46341 {
46342 unsigned e = d->perm[i];
46343 if (e != i)
46344 which |= (e < nelt ? 1 : 2);
46345 }
46346 /* We can pblend the part where elements stay not in their
46347 respective lanes only when these elements are all in one
46348 half of a permutation.
46349 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
46350 lanes, but both 8 and 9 >= 8
46351 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
46352 respective lanes and 8 >= 8, but 2 not. */
46353 if (which != 1 && which != 2)
46354 return false;
46355 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
46356 return true;
46357
46358 /* First we apply one operand permutation to the part where
46359 elements stay not in their respective lanes. */
46360 dcopy = *d;
46361 if (which == 2)
46362 dcopy.op0 = dcopy.op1 = d->op1;
46363 else
46364 dcopy.op0 = dcopy.op1 = d->op0;
46365 dcopy.one_operand_p = true;
46366
46367 for (i = 0; i < nelt; ++i)
46368 dcopy.perm[i] = d->perm[i] & (nelt - 1);
46369
46370 ok = expand_vec_perm_1 (&dcopy);
46371 if (GET_MODE_SIZE (vmode) != 16 && !ok)
46372 return false;
46373 else
46374 gcc_assert (ok);
46375 if (d->testing_p)
46376 return true;
46377
46378 /* Next we put permuted elements into their positions. */
46379 dcopy1 = *d;
46380 if (which == 2)
46381 dcopy1.op1 = dcopy.target;
46382 else
46383 dcopy1.op0 = dcopy.target;
46384
46385 for (i = 0; i < nelt; ++i)
46386 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
46387
46388 ok = expand_vec_perm_blend (&dcopy1);
46389 gcc_assert (ok);
46390
46391 return true;
46392 }
46393
46394 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
46395
46396 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46397 a two vector permutation into a single vector permutation by using
46398 an interleave operation to merge the vectors. */
46399
46400 static bool
46401 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
46402 {
46403 struct expand_vec_perm_d dremap, dfinal;
46404 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
46405 unsigned HOST_WIDE_INT contents;
46406 unsigned char remap[2 * MAX_VECT_LEN];
46407 rtx_insn *seq;
46408 bool ok, same_halves = false;
46409
46410 if (GET_MODE_SIZE (d->vmode) == 16)
46411 {
46412 if (d->one_operand_p)
46413 return false;
46414 }
46415 else if (GET_MODE_SIZE (d->vmode) == 32)
46416 {
46417 if (!TARGET_AVX)
46418 return false;
46419 /* For 32-byte modes allow even d->one_operand_p.
46420 The lack of cross-lane shuffling in some instructions
46421 might prevent a single insn shuffle. */
46422 dfinal = *d;
46423 dfinal.testing_p = true;
46424 /* If expand_vec_perm_interleave3 can expand this into
46425 a 3 insn sequence, give up and let it be expanded as
46426 3 insn sequence. While that is one insn longer,
46427 it doesn't need a memory operand and in the common
46428 case that both interleave low and high permutations
46429 with the same operands are adjacent needs 4 insns
46430 for both after CSE. */
46431 if (expand_vec_perm_interleave3 (&dfinal))
46432 return false;
46433 }
46434 else
46435 return false;
46436
46437 /* Examine from whence the elements come. */
46438 contents = 0;
46439 for (i = 0; i < nelt; ++i)
46440 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
46441
46442 memset (remap, 0xff, sizeof (remap));
46443 dremap = *d;
46444
46445 if (GET_MODE_SIZE (d->vmode) == 16)
46446 {
46447 unsigned HOST_WIDE_INT h1, h2, h3, h4;
46448
46449 /* Split the two input vectors into 4 halves. */
46450 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
46451 h2 = h1 << nelt2;
46452 h3 = h2 << nelt2;
46453 h4 = h3 << nelt2;
46454
46455 /* If the elements from the low halves use interleave low, and similarly
46456 for interleave high. If the elements are from mis-matched halves, we
46457 can use shufps for V4SF/V4SI or do a DImode shuffle. */
46458 if ((contents & (h1 | h3)) == contents)
46459 {
46460 /* punpckl* */
46461 for (i = 0; i < nelt2; ++i)
46462 {
46463 remap[i] = i * 2;
46464 remap[i + nelt] = i * 2 + 1;
46465 dremap.perm[i * 2] = i;
46466 dremap.perm[i * 2 + 1] = i + nelt;
46467 }
46468 if (!TARGET_SSE2 && d->vmode == V4SImode)
46469 dremap.vmode = V4SFmode;
46470 }
46471 else if ((contents & (h2 | h4)) == contents)
46472 {
46473 /* punpckh* */
46474 for (i = 0; i < nelt2; ++i)
46475 {
46476 remap[i + nelt2] = i * 2;
46477 remap[i + nelt + nelt2] = i * 2 + 1;
46478 dremap.perm[i * 2] = i + nelt2;
46479 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
46480 }
46481 if (!TARGET_SSE2 && d->vmode == V4SImode)
46482 dremap.vmode = V4SFmode;
46483 }
46484 else if ((contents & (h1 | h4)) == contents)
46485 {
46486 /* shufps */
46487 for (i = 0; i < nelt2; ++i)
46488 {
46489 remap[i] = i;
46490 remap[i + nelt + nelt2] = i + nelt2;
46491 dremap.perm[i] = i;
46492 dremap.perm[i + nelt2] = i + nelt + nelt2;
46493 }
46494 if (nelt != 4)
46495 {
46496 /* shufpd */
46497 dremap.vmode = V2DImode;
46498 dremap.nelt = 2;
46499 dremap.perm[0] = 0;
46500 dremap.perm[1] = 3;
46501 }
46502 }
46503 else if ((contents & (h2 | h3)) == contents)
46504 {
46505 /* shufps */
46506 for (i = 0; i < nelt2; ++i)
46507 {
46508 remap[i + nelt2] = i;
46509 remap[i + nelt] = i + nelt2;
46510 dremap.perm[i] = i + nelt2;
46511 dremap.perm[i + nelt2] = i + nelt;
46512 }
46513 if (nelt != 4)
46514 {
46515 /* shufpd */
46516 dremap.vmode = V2DImode;
46517 dremap.nelt = 2;
46518 dremap.perm[0] = 1;
46519 dremap.perm[1] = 2;
46520 }
46521 }
46522 else
46523 return false;
46524 }
46525 else
46526 {
46527 unsigned int nelt4 = nelt / 4, nzcnt = 0;
46528 unsigned HOST_WIDE_INT q[8];
46529 unsigned int nonzero_halves[4];
46530
46531 /* Split the two input vectors into 8 quarters. */
46532 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
46533 for (i = 1; i < 8; ++i)
46534 q[i] = q[0] << (nelt4 * i);
46535 for (i = 0; i < 4; ++i)
46536 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
46537 {
46538 nonzero_halves[nzcnt] = i;
46539 ++nzcnt;
46540 }
46541
46542 if (nzcnt == 1)
46543 {
46544 gcc_assert (d->one_operand_p);
46545 nonzero_halves[1] = nonzero_halves[0];
46546 same_halves = true;
46547 }
46548 else if (d->one_operand_p)
46549 {
46550 gcc_assert (nonzero_halves[0] == 0);
46551 gcc_assert (nonzero_halves[1] == 1);
46552 }
46553
46554 if (nzcnt <= 2)
46555 {
46556 if (d->perm[0] / nelt2 == nonzero_halves[1])
46557 {
46558 /* Attempt to increase the likelihood that dfinal
46559 shuffle will be intra-lane. */
46560 char tmph = nonzero_halves[0];
46561 nonzero_halves[0] = nonzero_halves[1];
46562 nonzero_halves[1] = tmph;
46563 }
46564
46565 /* vperm2f128 or vperm2i128. */
46566 for (i = 0; i < nelt2; ++i)
46567 {
46568 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
46569 remap[i + nonzero_halves[0] * nelt2] = i;
46570 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
46571 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
46572 }
46573
46574 if (d->vmode != V8SFmode
46575 && d->vmode != V4DFmode
46576 && d->vmode != V8SImode)
46577 {
46578 dremap.vmode = V8SImode;
46579 dremap.nelt = 8;
46580 for (i = 0; i < 4; ++i)
46581 {
46582 dremap.perm[i] = i + nonzero_halves[0] * 4;
46583 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
46584 }
46585 }
46586 }
46587 else if (d->one_operand_p)
46588 return false;
46589 else if (TARGET_AVX2
46590 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
46591 {
46592 /* vpunpckl* */
46593 for (i = 0; i < nelt4; ++i)
46594 {
46595 remap[i] = i * 2;
46596 remap[i + nelt] = i * 2 + 1;
46597 remap[i + nelt2] = i * 2 + nelt2;
46598 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
46599 dremap.perm[i * 2] = i;
46600 dremap.perm[i * 2 + 1] = i + nelt;
46601 dremap.perm[i * 2 + nelt2] = i + nelt2;
46602 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
46603 }
46604 }
46605 else if (TARGET_AVX2
46606 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
46607 {
46608 /* vpunpckh* */
46609 for (i = 0; i < nelt4; ++i)
46610 {
46611 remap[i + nelt4] = i * 2;
46612 remap[i + nelt + nelt4] = i * 2 + 1;
46613 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
46614 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
46615 dremap.perm[i * 2] = i + nelt4;
46616 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
46617 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
46618 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
46619 }
46620 }
46621 else
46622 return false;
46623 }
46624
46625 /* Use the remapping array set up above to move the elements from their
46626 swizzled locations into their final destinations. */
46627 dfinal = *d;
46628 for (i = 0; i < nelt; ++i)
46629 {
46630 unsigned e = remap[d->perm[i]];
46631 gcc_assert (e < nelt);
46632 /* If same_halves is true, both halves of the remapped vector are the
46633 same. Avoid cross-lane accesses if possible. */
46634 if (same_halves && i >= nelt2)
46635 {
46636 gcc_assert (e < nelt2);
46637 dfinal.perm[i] = e + nelt2;
46638 }
46639 else
46640 dfinal.perm[i] = e;
46641 }
46642 if (!d->testing_p)
46643 {
46644 dremap.target = gen_reg_rtx (dremap.vmode);
46645 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46646 }
46647 dfinal.op1 = dfinal.op0;
46648 dfinal.one_operand_p = true;
46649
46650 /* Test if the final remap can be done with a single insn. For V4SFmode or
46651 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
46652 start_sequence ();
46653 ok = expand_vec_perm_1 (&dfinal);
46654 seq = get_insns ();
46655 end_sequence ();
46656
46657 if (!ok)
46658 return false;
46659
46660 if (d->testing_p)
46661 return true;
46662
46663 if (dremap.vmode != dfinal.vmode)
46664 {
46665 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
46666 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
46667 }
46668
46669 ok = expand_vec_perm_1 (&dremap);
46670 gcc_assert (ok);
46671
46672 emit_insn (seq);
46673 return true;
46674 }
46675
46676 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46677 a single vector cross-lane permutation into vpermq followed
46678 by any of the single insn permutations. */
46679
46680 static bool
46681 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
46682 {
46683 struct expand_vec_perm_d dremap, dfinal;
46684 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
46685 unsigned contents[2];
46686 bool ok;
46687
46688 if (!(TARGET_AVX2
46689 && (d->vmode == V32QImode || d->vmode == V16HImode)
46690 && d->one_operand_p))
46691 return false;
46692
46693 contents[0] = 0;
46694 contents[1] = 0;
46695 for (i = 0; i < nelt2; ++i)
46696 {
46697 contents[0] |= 1u << (d->perm[i] / nelt4);
46698 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
46699 }
46700
46701 for (i = 0; i < 2; ++i)
46702 {
46703 unsigned int cnt = 0;
46704 for (j = 0; j < 4; ++j)
46705 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
46706 return false;
46707 }
46708
46709 if (d->testing_p)
46710 return true;
46711
46712 dremap = *d;
46713 dremap.vmode = V4DImode;
46714 dremap.nelt = 4;
46715 dremap.target = gen_reg_rtx (V4DImode);
46716 dremap.op0 = gen_lowpart (V4DImode, d->op0);
46717 dremap.op1 = dremap.op0;
46718 dremap.one_operand_p = true;
46719 for (i = 0; i < 2; ++i)
46720 {
46721 unsigned int cnt = 0;
46722 for (j = 0; j < 4; ++j)
46723 if ((contents[i] & (1u << j)) != 0)
46724 dremap.perm[2 * i + cnt++] = j;
46725 for (; cnt < 2; ++cnt)
46726 dremap.perm[2 * i + cnt] = 0;
46727 }
46728
46729 dfinal = *d;
46730 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46731 dfinal.op1 = dfinal.op0;
46732 dfinal.one_operand_p = true;
46733 for (i = 0, j = 0; i < nelt; ++i)
46734 {
46735 if (i == nelt2)
46736 j = 2;
46737 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
46738 if ((d->perm[i] / nelt4) == dremap.perm[j])
46739 ;
46740 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
46741 dfinal.perm[i] |= nelt4;
46742 else
46743 gcc_unreachable ();
46744 }
46745
46746 ok = expand_vec_perm_1 (&dremap);
46747 gcc_assert (ok);
46748
46749 ok = expand_vec_perm_1 (&dfinal);
46750 gcc_assert (ok);
46751
46752 return true;
46753 }
46754
46755 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46756 a vector permutation using two instructions, vperm2f128 resp.
46757 vperm2i128 followed by any single in-lane permutation. */
46758
46759 static bool
46760 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
46761 {
46762 struct expand_vec_perm_d dfirst, dsecond;
46763 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
46764 bool ok;
46765
46766 if (!TARGET_AVX
46767 || GET_MODE_SIZE (d->vmode) != 32
46768 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
46769 return false;
46770
46771 dsecond = *d;
46772 dsecond.one_operand_p = false;
46773 dsecond.testing_p = true;
46774
46775 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46776 immediate. For perm < 16 the second permutation uses
46777 d->op0 as first operand, for perm >= 16 it uses d->op1
46778 as first operand. The second operand is the result of
46779 vperm2[fi]128. */
46780 for (perm = 0; perm < 32; perm++)
46781 {
46782 /* Ignore permutations which do not move anything cross-lane. */
46783 if (perm < 16)
46784 {
46785 /* The second shuffle for e.g. V4DFmode has
46786 0123 and ABCD operands.
46787 Ignore AB23, as 23 is already in the second lane
46788 of the first operand. */
46789 if ((perm & 0xc) == (1 << 2)) continue;
46790 /* And 01CD, as 01 is in the first lane of the first
46791 operand. */
46792 if ((perm & 3) == 0) continue;
46793 /* And 4567, as then the vperm2[fi]128 doesn't change
46794 anything on the original 4567 second operand. */
46795 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
46796 }
46797 else
46798 {
46799 /* The second shuffle for e.g. V4DFmode has
46800 4567 and ABCD operands.
46801 Ignore AB67, as 67 is already in the second lane
46802 of the first operand. */
46803 if ((perm & 0xc) == (3 << 2)) continue;
46804 /* And 45CD, as 45 is in the first lane of the first
46805 operand. */
46806 if ((perm & 3) == 2) continue;
46807 /* And 0123, as then the vperm2[fi]128 doesn't change
46808 anything on the original 0123 first operand. */
46809 if ((perm & 0xf) == (1 << 2)) continue;
46810 }
46811
46812 for (i = 0; i < nelt; i++)
46813 {
46814 j = d->perm[i] / nelt2;
46815 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
46816 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
46817 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
46818 dsecond.perm[i] = d->perm[i] & (nelt - 1);
46819 else
46820 break;
46821 }
46822
46823 if (i == nelt)
46824 {
46825 start_sequence ();
46826 ok = expand_vec_perm_1 (&dsecond);
46827 end_sequence ();
46828 }
46829 else
46830 ok = false;
46831
46832 if (ok)
46833 {
46834 if (d->testing_p)
46835 return true;
46836
46837 /* Found a usable second shuffle. dfirst will be
46838 vperm2f128 on d->op0 and d->op1. */
46839 dsecond.testing_p = false;
46840 dfirst = *d;
46841 dfirst.target = gen_reg_rtx (d->vmode);
46842 for (i = 0; i < nelt; i++)
46843 dfirst.perm[i] = (i & (nelt2 - 1))
46844 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
46845
46846 canonicalize_perm (&dfirst);
46847 ok = expand_vec_perm_1 (&dfirst);
46848 gcc_assert (ok);
46849
46850 /* And dsecond is some single insn shuffle, taking
46851 d->op0 and result of vperm2f128 (if perm < 16) or
46852 d->op1 and result of vperm2f128 (otherwise). */
46853 if (perm >= 16)
46854 dsecond.op0 = dsecond.op1;
46855 dsecond.op1 = dfirst.target;
46856
46857 ok = expand_vec_perm_1 (&dsecond);
46858 gcc_assert (ok);
46859
46860 return true;
46861 }
46862
46863 /* For one operand, the only useful vperm2f128 permutation is 0x01
46864 aka lanes swap. */
46865 if (d->one_operand_p)
46866 return false;
46867 }
46868
46869 return false;
46870 }
46871
46872 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46873 a two vector permutation using 2 intra-lane interleave insns
46874 and cross-lane shuffle for 32-byte vectors. */
46875
46876 static bool
46877 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
46878 {
46879 unsigned i, nelt;
46880 rtx (*gen) (rtx, rtx, rtx);
46881
46882 if (d->one_operand_p)
46883 return false;
46884 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
46885 ;
46886 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
46887 ;
46888 else
46889 return false;
46890
46891 nelt = d->nelt;
46892 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
46893 return false;
46894 for (i = 0; i < nelt; i += 2)
46895 if (d->perm[i] != d->perm[0] + i / 2
46896 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
46897 return false;
46898
46899 if (d->testing_p)
46900 return true;
46901
46902 switch (d->vmode)
46903 {
46904 case V32QImode:
46905 if (d->perm[0])
46906 gen = gen_vec_interleave_highv32qi;
46907 else
46908 gen = gen_vec_interleave_lowv32qi;
46909 break;
46910 case V16HImode:
46911 if (d->perm[0])
46912 gen = gen_vec_interleave_highv16hi;
46913 else
46914 gen = gen_vec_interleave_lowv16hi;
46915 break;
46916 case V8SImode:
46917 if (d->perm[0])
46918 gen = gen_vec_interleave_highv8si;
46919 else
46920 gen = gen_vec_interleave_lowv8si;
46921 break;
46922 case V4DImode:
46923 if (d->perm[0])
46924 gen = gen_vec_interleave_highv4di;
46925 else
46926 gen = gen_vec_interleave_lowv4di;
46927 break;
46928 case V8SFmode:
46929 if (d->perm[0])
46930 gen = gen_vec_interleave_highv8sf;
46931 else
46932 gen = gen_vec_interleave_lowv8sf;
46933 break;
46934 case V4DFmode:
46935 if (d->perm[0])
46936 gen = gen_vec_interleave_highv4df;
46937 else
46938 gen = gen_vec_interleave_lowv4df;
46939 break;
46940 default:
46941 gcc_unreachable ();
46942 }
46943
46944 emit_insn (gen (d->target, d->op0, d->op1));
46945 return true;
46946 }
46947
46948 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46949 a single vector permutation using a single intra-lane vector
46950 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46951 the non-swapped and swapped vectors together. */
46952
46953 static bool
46954 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
46955 {
46956 struct expand_vec_perm_d dfirst, dsecond;
46957 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
46958 rtx_insn *seq;
46959 bool ok;
46960 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
46961
46962 if (!TARGET_AVX
46963 || TARGET_AVX2
46964 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
46965 || !d->one_operand_p)
46966 return false;
46967
46968 dfirst = *d;
46969 for (i = 0; i < nelt; i++)
46970 dfirst.perm[i] = 0xff;
46971 for (i = 0, msk = 0; i < nelt; i++)
46972 {
46973 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
46974 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
46975 return false;
46976 dfirst.perm[j] = d->perm[i];
46977 if (j != i)
46978 msk |= (1 << i);
46979 }
46980 for (i = 0; i < nelt; i++)
46981 if (dfirst.perm[i] == 0xff)
46982 dfirst.perm[i] = i;
46983
46984 if (!d->testing_p)
46985 dfirst.target = gen_reg_rtx (dfirst.vmode);
46986
46987 start_sequence ();
46988 ok = expand_vec_perm_1 (&dfirst);
46989 seq = get_insns ();
46990 end_sequence ();
46991
46992 if (!ok)
46993 return false;
46994
46995 if (d->testing_p)
46996 return true;
46997
46998 emit_insn (seq);
46999
47000 dsecond = *d;
47001 dsecond.op0 = dfirst.target;
47002 dsecond.op1 = dfirst.target;
47003 dsecond.one_operand_p = true;
47004 dsecond.target = gen_reg_rtx (dsecond.vmode);
47005 for (i = 0; i < nelt; i++)
47006 dsecond.perm[i] = i ^ nelt2;
47007
47008 ok = expand_vec_perm_1 (&dsecond);
47009 gcc_assert (ok);
47010
47011 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
47012 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
47013 return true;
47014 }
47015
47016 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
47017 permutation using two vperm2f128, followed by a vshufpd insn blending
47018 the two vectors together. */
47019
47020 static bool
47021 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
47022 {
47023 struct expand_vec_perm_d dfirst, dsecond, dthird;
47024 bool ok;
47025
47026 if (!TARGET_AVX || (d->vmode != V4DFmode))
47027 return false;
47028
47029 if (d->testing_p)
47030 return true;
47031
47032 dfirst = *d;
47033 dsecond = *d;
47034 dthird = *d;
47035
47036 dfirst.perm[0] = (d->perm[0] & ~1);
47037 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
47038 dfirst.perm[2] = (d->perm[2] & ~1);
47039 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
47040 dsecond.perm[0] = (d->perm[1] & ~1);
47041 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
47042 dsecond.perm[2] = (d->perm[3] & ~1);
47043 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
47044 dthird.perm[0] = (d->perm[0] % 2);
47045 dthird.perm[1] = (d->perm[1] % 2) + 4;
47046 dthird.perm[2] = (d->perm[2] % 2) + 2;
47047 dthird.perm[3] = (d->perm[3] % 2) + 6;
47048
47049 dfirst.target = gen_reg_rtx (dfirst.vmode);
47050 dsecond.target = gen_reg_rtx (dsecond.vmode);
47051 dthird.op0 = dfirst.target;
47052 dthird.op1 = dsecond.target;
47053 dthird.one_operand_p = false;
47054
47055 canonicalize_perm (&dfirst);
47056 canonicalize_perm (&dsecond);
47057
47058 ok = expand_vec_perm_1 (&dfirst)
47059 && expand_vec_perm_1 (&dsecond)
47060 && expand_vec_perm_1 (&dthird);
47061
47062 gcc_assert (ok);
47063
47064 return true;
47065 }
47066
47067 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
47068 permutation with two pshufb insns and an ior. We should have already
47069 failed all two instruction sequences. */
47070
47071 static bool
47072 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
47073 {
47074 rtx rperm[2][16], vperm, l, h, op, m128;
47075 unsigned int i, nelt, eltsz;
47076
47077 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47078 return false;
47079 gcc_assert (!d->one_operand_p);
47080
47081 if (d->testing_p)
47082 return true;
47083
47084 nelt = d->nelt;
47085 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47086
47087 /* Generate two permutation masks. If the required element is within
47088 the given vector it is shuffled into the proper lane. If the required
47089 element is in the other vector, force a zero into the lane by setting
47090 bit 7 in the permutation mask. */
47091 m128 = GEN_INT (-128);
47092 for (i = 0; i < nelt; ++i)
47093 {
47094 unsigned j, e = d->perm[i];
47095 unsigned which = (e >= nelt);
47096 if (e >= nelt)
47097 e -= nelt;
47098
47099 for (j = 0; j < eltsz; ++j)
47100 {
47101 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
47102 rperm[1-which][i*eltsz + j] = m128;
47103 }
47104 }
47105
47106 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
47107 vperm = force_reg (V16QImode, vperm);
47108
47109 l = gen_reg_rtx (V16QImode);
47110 op = gen_lowpart (V16QImode, d->op0);
47111 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
47112
47113 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
47114 vperm = force_reg (V16QImode, vperm);
47115
47116 h = gen_reg_rtx (V16QImode);
47117 op = gen_lowpart (V16QImode, d->op1);
47118 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
47119
47120 op = d->target;
47121 if (d->vmode != V16QImode)
47122 op = gen_reg_rtx (V16QImode);
47123 emit_insn (gen_iorv16qi3 (op, l, h));
47124 if (op != d->target)
47125 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47126
47127 return true;
47128 }
47129
47130 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
47131 with two vpshufb insns, vpermq and vpor. We should have already failed
47132 all two or three instruction sequences. */
47133
47134 static bool
47135 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
47136 {
47137 rtx rperm[2][32], vperm, l, h, hp, op, m128;
47138 unsigned int i, nelt, eltsz;
47139
47140 if (!TARGET_AVX2
47141 || !d->one_operand_p
47142 || (d->vmode != V32QImode && d->vmode != V16HImode))
47143 return false;
47144
47145 if (d->testing_p)
47146 return true;
47147
47148 nelt = d->nelt;
47149 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47150
47151 /* Generate two permutation masks. If the required element is within
47152 the same lane, it is shuffled in. If the required element from the
47153 other lane, force a zero by setting bit 7 in the permutation mask.
47154 In the other mask the mask has non-negative elements if element
47155 is requested from the other lane, but also moved to the other lane,
47156 so that the result of vpshufb can have the two V2TImode halves
47157 swapped. */
47158 m128 = GEN_INT (-128);
47159 for (i = 0; i < nelt; ++i)
47160 {
47161 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47162 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47163
47164 for (j = 0; j < eltsz; ++j)
47165 {
47166 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
47167 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
47168 }
47169 }
47170
47171 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
47172 vperm = force_reg (V32QImode, vperm);
47173
47174 h = gen_reg_rtx (V32QImode);
47175 op = gen_lowpart (V32QImode, d->op0);
47176 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
47177
47178 /* Swap the 128-byte lanes of h into hp. */
47179 hp = gen_reg_rtx (V4DImode);
47180 op = gen_lowpart (V4DImode, h);
47181 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
47182 const1_rtx));
47183
47184 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
47185 vperm = force_reg (V32QImode, vperm);
47186
47187 l = gen_reg_rtx (V32QImode);
47188 op = gen_lowpart (V32QImode, d->op0);
47189 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
47190
47191 op = d->target;
47192 if (d->vmode != V32QImode)
47193 op = gen_reg_rtx (V32QImode);
47194 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
47195 if (op != d->target)
47196 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47197
47198 return true;
47199 }
47200
47201 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
47202 and extract-odd permutations of two V32QImode and V16QImode operand
47203 with two vpshufb insns, vpor and vpermq. We should have already
47204 failed all two or three instruction sequences. */
47205
47206 static bool
47207 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
47208 {
47209 rtx rperm[2][32], vperm, l, h, ior, op, m128;
47210 unsigned int i, nelt, eltsz;
47211
47212 if (!TARGET_AVX2
47213 || d->one_operand_p
47214 || (d->vmode != V32QImode && d->vmode != V16HImode))
47215 return false;
47216
47217 for (i = 0; i < d->nelt; ++i)
47218 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
47219 return false;
47220
47221 if (d->testing_p)
47222 return true;
47223
47224 nelt = d->nelt;
47225 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47226
47227 /* Generate two permutation masks. In the first permutation mask
47228 the first quarter will contain indexes for the first half
47229 of the op0, the second quarter will contain bit 7 set, third quarter
47230 will contain indexes for the second half of the op0 and the
47231 last quarter bit 7 set. In the second permutation mask
47232 the first quarter will contain bit 7 set, the second quarter
47233 indexes for the first half of the op1, the third quarter bit 7 set
47234 and last quarter indexes for the second half of the op1.
47235 I.e. the first mask e.g. for V32QImode extract even will be:
47236 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
47237 (all values masked with 0xf except for -128) and second mask
47238 for extract even will be
47239 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
47240 m128 = GEN_INT (-128);
47241 for (i = 0; i < nelt; ++i)
47242 {
47243 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47244 unsigned which = d->perm[i] >= nelt;
47245 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
47246
47247 for (j = 0; j < eltsz; ++j)
47248 {
47249 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
47250 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
47251 }
47252 }
47253
47254 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
47255 vperm = force_reg (V32QImode, vperm);
47256
47257 l = gen_reg_rtx (V32QImode);
47258 op = gen_lowpart (V32QImode, d->op0);
47259 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
47260
47261 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
47262 vperm = force_reg (V32QImode, vperm);
47263
47264 h = gen_reg_rtx (V32QImode);
47265 op = gen_lowpart (V32QImode, d->op1);
47266 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
47267
47268 ior = gen_reg_rtx (V32QImode);
47269 emit_insn (gen_iorv32qi3 (ior, l, h));
47270
47271 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
47272 op = gen_reg_rtx (V4DImode);
47273 ior = gen_lowpart (V4DImode, ior);
47274 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
47275 const1_rtx, GEN_INT (3)));
47276 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47277
47278 return true;
47279 }
47280
47281 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
47282 and extract-odd permutations. */
47283
47284 static bool
47285 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
47286 {
47287 rtx t1, t2, t3, t4, t5;
47288
47289 switch (d->vmode)
47290 {
47291 case V4DFmode:
47292 if (d->testing_p)
47293 break;
47294 t1 = gen_reg_rtx (V4DFmode);
47295 t2 = gen_reg_rtx (V4DFmode);
47296
47297 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47298 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
47299 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
47300
47301 /* Now an unpck[lh]pd will produce the result required. */
47302 if (odd)
47303 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
47304 else
47305 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
47306 emit_insn (t3);
47307 break;
47308
47309 case V8SFmode:
47310 {
47311 int mask = odd ? 0xdd : 0x88;
47312
47313 if (d->testing_p)
47314 break;
47315 t1 = gen_reg_rtx (V8SFmode);
47316 t2 = gen_reg_rtx (V8SFmode);
47317 t3 = gen_reg_rtx (V8SFmode);
47318
47319 /* Shuffle within the 128-bit lanes to produce:
47320 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
47321 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
47322 GEN_INT (mask)));
47323
47324 /* Shuffle the lanes around to produce:
47325 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
47326 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
47327 GEN_INT (0x3)));
47328
47329 /* Shuffle within the 128-bit lanes to produce:
47330 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
47331 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
47332
47333 /* Shuffle within the 128-bit lanes to produce:
47334 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
47335 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
47336
47337 /* Shuffle the lanes around to produce:
47338 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
47339 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
47340 GEN_INT (0x20)));
47341 }
47342 break;
47343
47344 case V2DFmode:
47345 case V4SFmode:
47346 case V2DImode:
47347 case V4SImode:
47348 /* These are always directly implementable by expand_vec_perm_1. */
47349 gcc_unreachable ();
47350
47351 case V8HImode:
47352 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
47353 return expand_vec_perm_pshufb2 (d);
47354 else
47355 {
47356 if (d->testing_p)
47357 break;
47358 /* We need 2*log2(N)-1 operations to achieve odd/even
47359 with interleave. */
47360 t1 = gen_reg_rtx (V8HImode);
47361 t2 = gen_reg_rtx (V8HImode);
47362 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
47363 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
47364 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
47365 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
47366 if (odd)
47367 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
47368 else
47369 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
47370 emit_insn (t3);
47371 }
47372 break;
47373
47374 case V16QImode:
47375 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
47376 return expand_vec_perm_pshufb2 (d);
47377 else
47378 {
47379 if (d->testing_p)
47380 break;
47381 t1 = gen_reg_rtx (V16QImode);
47382 t2 = gen_reg_rtx (V16QImode);
47383 t3 = gen_reg_rtx (V16QImode);
47384 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
47385 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
47386 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
47387 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
47388 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
47389 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
47390 if (odd)
47391 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
47392 else
47393 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
47394 emit_insn (t3);
47395 }
47396 break;
47397
47398 case V16HImode:
47399 case V32QImode:
47400 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
47401
47402 case V4DImode:
47403 if (!TARGET_AVX2)
47404 {
47405 struct expand_vec_perm_d d_copy = *d;
47406 d_copy.vmode = V4DFmode;
47407 if (d->testing_p)
47408 d_copy.target = gen_lowpart (V4DFmode, d->target);
47409 else
47410 d_copy.target = gen_reg_rtx (V4DFmode);
47411 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
47412 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
47413 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47414 {
47415 if (!d->testing_p)
47416 emit_move_insn (d->target,
47417 gen_lowpart (V4DImode, d_copy.target));
47418 return true;
47419 }
47420 return false;
47421 }
47422
47423 if (d->testing_p)
47424 break;
47425
47426 t1 = gen_reg_rtx (V4DImode);
47427 t2 = gen_reg_rtx (V4DImode);
47428
47429 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47430 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
47431 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
47432
47433 /* Now an vpunpck[lh]qdq will produce the result required. */
47434 if (odd)
47435 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
47436 else
47437 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
47438 emit_insn (t3);
47439 break;
47440
47441 case V8SImode:
47442 if (!TARGET_AVX2)
47443 {
47444 struct expand_vec_perm_d d_copy = *d;
47445 d_copy.vmode = V8SFmode;
47446 if (d->testing_p)
47447 d_copy.target = gen_lowpart (V8SFmode, d->target);
47448 else
47449 d_copy.target = gen_reg_rtx (V8SFmode);
47450 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
47451 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
47452 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47453 {
47454 if (!d->testing_p)
47455 emit_move_insn (d->target,
47456 gen_lowpart (V8SImode, d_copy.target));
47457 return true;
47458 }
47459 return false;
47460 }
47461
47462 if (d->testing_p)
47463 break;
47464
47465 t1 = gen_reg_rtx (V8SImode);
47466 t2 = gen_reg_rtx (V8SImode);
47467 t3 = gen_reg_rtx (V4DImode);
47468 t4 = gen_reg_rtx (V4DImode);
47469 t5 = gen_reg_rtx (V4DImode);
47470
47471 /* Shuffle the lanes around into
47472 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
47473 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
47474 gen_lowpart (V4DImode, d->op1),
47475 GEN_INT (0x20)));
47476 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
47477 gen_lowpart (V4DImode, d->op1),
47478 GEN_INT (0x31)));
47479
47480 /* Swap the 2nd and 3rd position in each lane into
47481 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
47482 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
47483 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47484 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
47485 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47486
47487 /* Now an vpunpck[lh]qdq will produce
47488 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
47489 if (odd)
47490 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
47491 gen_lowpart (V4DImode, t2));
47492 else
47493 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
47494 gen_lowpart (V4DImode, t2));
47495 emit_insn (t3);
47496 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
47497 break;
47498
47499 default:
47500 gcc_unreachable ();
47501 }
47502
47503 return true;
47504 }
47505
47506 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47507 extract-even and extract-odd permutations. */
47508
47509 static bool
47510 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
47511 {
47512 unsigned i, odd, nelt = d->nelt;
47513
47514 odd = d->perm[0];
47515 if (odd != 0 && odd != 1)
47516 return false;
47517
47518 for (i = 1; i < nelt; ++i)
47519 if (d->perm[i] != 2 * i + odd)
47520 return false;
47521
47522 return expand_vec_perm_even_odd_1 (d, odd);
47523 }
47524
47525 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
47526 permutations. We assume that expand_vec_perm_1 has already failed. */
47527
47528 static bool
47529 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
47530 {
47531 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
47532 enum machine_mode vmode = d->vmode;
47533 unsigned char perm2[4];
47534 rtx op0 = d->op0, dest;
47535 bool ok;
47536
47537 switch (vmode)
47538 {
47539 case V4DFmode:
47540 case V8SFmode:
47541 /* These are special-cased in sse.md so that we can optionally
47542 use the vbroadcast instruction. They expand to two insns
47543 if the input happens to be in a register. */
47544 gcc_unreachable ();
47545
47546 case V2DFmode:
47547 case V2DImode:
47548 case V4SFmode:
47549 case V4SImode:
47550 /* These are always implementable using standard shuffle patterns. */
47551 gcc_unreachable ();
47552
47553 case V8HImode:
47554 case V16QImode:
47555 /* These can be implemented via interleave. We save one insn by
47556 stopping once we have promoted to V4SImode and then use pshufd. */
47557 if (d->testing_p)
47558 return true;
47559 do
47560 {
47561 rtx dest;
47562 rtx (*gen) (rtx, rtx, rtx)
47563 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
47564 : gen_vec_interleave_lowv8hi;
47565
47566 if (elt >= nelt2)
47567 {
47568 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
47569 : gen_vec_interleave_highv8hi;
47570 elt -= nelt2;
47571 }
47572 nelt2 /= 2;
47573
47574 dest = gen_reg_rtx (vmode);
47575 emit_insn (gen (dest, op0, op0));
47576 vmode = get_mode_wider_vector (vmode);
47577 op0 = gen_lowpart (vmode, dest);
47578 }
47579 while (vmode != V4SImode);
47580
47581 memset (perm2, elt, 4);
47582 dest = gen_reg_rtx (V4SImode);
47583 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
47584 gcc_assert (ok);
47585 if (!d->testing_p)
47586 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
47587 return true;
47588
47589 case V32QImode:
47590 case V16HImode:
47591 case V8SImode:
47592 case V4DImode:
47593 /* For AVX2 broadcasts of the first element vpbroadcast* or
47594 vpermq should be used by expand_vec_perm_1. */
47595 gcc_assert (!TARGET_AVX2 || d->perm[0]);
47596 return false;
47597
47598 default:
47599 gcc_unreachable ();
47600 }
47601 }
47602
47603 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47604 broadcast permutations. */
47605
47606 static bool
47607 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
47608 {
47609 unsigned i, elt, nelt = d->nelt;
47610
47611 if (!d->one_operand_p)
47612 return false;
47613
47614 elt = d->perm[0];
47615 for (i = 1; i < nelt; ++i)
47616 if (d->perm[i] != elt)
47617 return false;
47618
47619 return expand_vec_perm_broadcast_1 (d);
47620 }
47621
47622 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47623 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47624 all the shorter instruction sequences. */
47625
47626 static bool
47627 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
47628 {
47629 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
47630 unsigned int i, nelt, eltsz;
47631 bool used[4];
47632
47633 if (!TARGET_AVX2
47634 || d->one_operand_p
47635 || (d->vmode != V32QImode && d->vmode != V16HImode))
47636 return false;
47637
47638 if (d->testing_p)
47639 return true;
47640
47641 nelt = d->nelt;
47642 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47643
47644 /* Generate 4 permutation masks. If the required element is within
47645 the same lane, it is shuffled in. If the required element from the
47646 other lane, force a zero by setting bit 7 in the permutation mask.
47647 In the other mask the mask has non-negative elements if element
47648 is requested from the other lane, but also moved to the other lane,
47649 so that the result of vpshufb can have the two V2TImode halves
47650 swapped. */
47651 m128 = GEN_INT (-128);
47652 for (i = 0; i < 32; ++i)
47653 {
47654 rperm[0][i] = m128;
47655 rperm[1][i] = m128;
47656 rperm[2][i] = m128;
47657 rperm[3][i] = m128;
47658 }
47659 used[0] = false;
47660 used[1] = false;
47661 used[2] = false;
47662 used[3] = false;
47663 for (i = 0; i < nelt; ++i)
47664 {
47665 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47666 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47667 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
47668
47669 for (j = 0; j < eltsz; ++j)
47670 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
47671 used[which] = true;
47672 }
47673
47674 for (i = 0; i < 2; ++i)
47675 {
47676 if (!used[2 * i + 1])
47677 {
47678 h[i] = NULL_RTX;
47679 continue;
47680 }
47681 vperm = gen_rtx_CONST_VECTOR (V32QImode,
47682 gen_rtvec_v (32, rperm[2 * i + 1]));
47683 vperm = force_reg (V32QImode, vperm);
47684 h[i] = gen_reg_rtx (V32QImode);
47685 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47686 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
47687 }
47688
47689 /* Swap the 128-byte lanes of h[X]. */
47690 for (i = 0; i < 2; ++i)
47691 {
47692 if (h[i] == NULL_RTX)
47693 continue;
47694 op = gen_reg_rtx (V4DImode);
47695 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
47696 const2_rtx, GEN_INT (3), const0_rtx,
47697 const1_rtx));
47698 h[i] = gen_lowpart (V32QImode, op);
47699 }
47700
47701 for (i = 0; i < 2; ++i)
47702 {
47703 if (!used[2 * i])
47704 {
47705 l[i] = NULL_RTX;
47706 continue;
47707 }
47708 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
47709 vperm = force_reg (V32QImode, vperm);
47710 l[i] = gen_reg_rtx (V32QImode);
47711 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47712 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
47713 }
47714
47715 for (i = 0; i < 2; ++i)
47716 {
47717 if (h[i] && l[i])
47718 {
47719 op = gen_reg_rtx (V32QImode);
47720 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
47721 l[i] = op;
47722 }
47723 else if (h[i])
47724 l[i] = h[i];
47725 }
47726
47727 gcc_assert (l[0] && l[1]);
47728 op = d->target;
47729 if (d->vmode != V32QImode)
47730 op = gen_reg_rtx (V32QImode);
47731 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
47732 if (op != d->target)
47733 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47734 return true;
47735 }
47736
47737 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47738 With all of the interface bits taken care of, perform the expansion
47739 in D and return true on success. */
47740
47741 static bool
47742 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
47743 {
47744 /* Try a single instruction expansion. */
47745 if (expand_vec_perm_1 (d))
47746 return true;
47747
47748 /* Try sequences of two instructions. */
47749
47750 if (expand_vec_perm_pshuflw_pshufhw (d))
47751 return true;
47752
47753 if (expand_vec_perm_palignr (d, false))
47754 return true;
47755
47756 if (expand_vec_perm_interleave2 (d))
47757 return true;
47758
47759 if (expand_vec_perm_broadcast (d))
47760 return true;
47761
47762 if (expand_vec_perm_vpermq_perm_1 (d))
47763 return true;
47764
47765 if (expand_vec_perm_vperm2f128 (d))
47766 return true;
47767
47768 if (expand_vec_perm_pblendv (d))
47769 return true;
47770
47771 /* Try sequences of three instructions. */
47772
47773 if (expand_vec_perm_2vperm2f128_vshuf (d))
47774 return true;
47775
47776 if (expand_vec_perm_pshufb2 (d))
47777 return true;
47778
47779 if (expand_vec_perm_interleave3 (d))
47780 return true;
47781
47782 if (expand_vec_perm_vperm2f128_vblend (d))
47783 return true;
47784
47785 /* Try sequences of four instructions. */
47786
47787 if (expand_vec_perm_vpshufb2_vpermq (d))
47788 return true;
47789
47790 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
47791 return true;
47792
47793 /* ??? Look for narrow permutations whose element orderings would
47794 allow the promotion to a wider mode. */
47795
47796 /* ??? Look for sequences of interleave or a wider permute that place
47797 the data into the correct lanes for a half-vector shuffle like
47798 pshuf[lh]w or vpermilps. */
47799
47800 /* ??? Look for sequences of interleave that produce the desired results.
47801 The combinatorics of punpck[lh] get pretty ugly... */
47802
47803 if (expand_vec_perm_even_odd (d))
47804 return true;
47805
47806 /* Even longer sequences. */
47807 if (expand_vec_perm_vpshufb4_vpermq2 (d))
47808 return true;
47809
47810 return false;
47811 }
47812
47813 /* If a permutation only uses one operand, make it clear. Returns true
47814 if the permutation references both operands. */
47815
47816 static bool
47817 canonicalize_perm (struct expand_vec_perm_d *d)
47818 {
47819 int i, which, nelt = d->nelt;
47820
47821 for (i = which = 0; i < nelt; ++i)
47822 which |= (d->perm[i] < nelt ? 1 : 2);
47823
47824 d->one_operand_p = true;
47825 switch (which)
47826 {
47827 default:
47828 gcc_unreachable();
47829
47830 case 3:
47831 if (!rtx_equal_p (d->op0, d->op1))
47832 {
47833 d->one_operand_p = false;
47834 break;
47835 }
47836 /* The elements of PERM do not suggest that only the first operand
47837 is used, but both operands are identical. Allow easier matching
47838 of the permutation by folding the permutation into the single
47839 input vector. */
47840 /* FALLTHRU */
47841
47842 case 2:
47843 for (i = 0; i < nelt; ++i)
47844 d->perm[i] &= nelt - 1;
47845 d->op0 = d->op1;
47846 break;
47847
47848 case 1:
47849 d->op1 = d->op0;
47850 break;
47851 }
47852
47853 return (which == 3);
47854 }
47855
47856 bool
47857 ix86_expand_vec_perm_const (rtx operands[4])
47858 {
47859 struct expand_vec_perm_d d;
47860 unsigned char perm[MAX_VECT_LEN];
47861 int i, nelt;
47862 bool two_args;
47863 rtx sel;
47864
47865 d.target = operands[0];
47866 d.op0 = operands[1];
47867 d.op1 = operands[2];
47868 sel = operands[3];
47869
47870 d.vmode = GET_MODE (d.target);
47871 gcc_assert (VECTOR_MODE_P (d.vmode));
47872 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47873 d.testing_p = false;
47874
47875 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47876 gcc_assert (XVECLEN (sel, 0) == nelt);
47877 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47878
47879 for (i = 0; i < nelt; ++i)
47880 {
47881 rtx e = XVECEXP (sel, 0, i);
47882 int ei = INTVAL (e) & (2 * nelt - 1);
47883 d.perm[i] = ei;
47884 perm[i] = ei;
47885 }
47886
47887 two_args = canonicalize_perm (&d);
47888
47889 if (ix86_expand_vec_perm_const_1 (&d))
47890 return true;
47891
47892 /* If the selector says both arguments are needed, but the operands are the
47893 same, the above tried to expand with one_operand_p and flattened selector.
47894 If that didn't work, retry without one_operand_p; we succeeded with that
47895 during testing. */
47896 if (two_args && d.one_operand_p)
47897 {
47898 d.one_operand_p = false;
47899 memcpy (d.perm, perm, sizeof (perm));
47900 return ix86_expand_vec_perm_const_1 (&d);
47901 }
47902
47903 return false;
47904 }
47905
47906 /* Implement targetm.vectorize.vec_perm_const_ok. */
47907
47908 static bool
47909 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
47910 const unsigned char *sel)
47911 {
47912 struct expand_vec_perm_d d;
47913 unsigned int i, nelt, which;
47914 bool ret;
47915
47916 d.vmode = vmode;
47917 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47918 d.testing_p = true;
47919
47920 /* Given sufficient ISA support we can just return true here
47921 for selected vector modes. */
47922 switch (d.vmode)
47923 {
47924 case V16SFmode:
47925 case V16SImode:
47926 case V8DImode:
47927 case V8DFmode:
47928 if (TARGET_AVX512F)
47929 /* All implementable with a single vpermi2 insn. */
47930 return true;
47931 break;
47932 case V32HImode:
47933 if (TARGET_AVX512BW)
47934 /* All implementable with a single vpermi2 insn. */
47935 return true;
47936 break;
47937 case V8SImode:
47938 case V8SFmode:
47939 case V4DFmode:
47940 case V4DImode:
47941 if (TARGET_AVX512VL)
47942 /* All implementable with a single vpermi2 insn. */
47943 return true;
47944 break;
47945 case V16HImode:
47946 if (TARGET_AVX2)
47947 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47948 return true;
47949 break;
47950 case V32QImode:
47951 if (TARGET_AVX2)
47952 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47953 return true;
47954 break;
47955 case V4SImode:
47956 case V4SFmode:
47957 case V8HImode:
47958 case V16QImode:
47959 /* All implementable with a single vpperm insn. */
47960 if (TARGET_XOP)
47961 return true;
47962 /* All implementable with 2 pshufb + 1 ior. */
47963 if (TARGET_SSSE3)
47964 return true;
47965 break;
47966 case V2DImode:
47967 case V2DFmode:
47968 /* All implementable with shufpd or unpck[lh]pd. */
47969 return true;
47970 default:
47971 return false;
47972 }
47973
47974 /* Extract the values from the vector CST into the permutation
47975 array in D. */
47976 memcpy (d.perm, sel, nelt);
47977 for (i = which = 0; i < nelt; ++i)
47978 {
47979 unsigned char e = d.perm[i];
47980 gcc_assert (e < 2 * nelt);
47981 which |= (e < nelt ? 1 : 2);
47982 }
47983
47984 /* For all elements from second vector, fold the elements to first. */
47985 if (which == 2)
47986 for (i = 0; i < nelt; ++i)
47987 d.perm[i] -= nelt;
47988
47989 /* Check whether the mask can be applied to the vector type. */
47990 d.one_operand_p = (which != 3);
47991
47992 /* Implementable with shufps or pshufd. */
47993 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47994 return true;
47995
47996 /* Otherwise we have to go through the motions and see if we can
47997 figure out how to generate the requested permutation. */
47998 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47999 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
48000 if (!d.one_operand_p)
48001 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
48002
48003 start_sequence ();
48004 ret = ix86_expand_vec_perm_const_1 (&d);
48005 end_sequence ();
48006
48007 return ret;
48008 }
48009
48010 void
48011 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
48012 {
48013 struct expand_vec_perm_d d;
48014 unsigned i, nelt;
48015
48016 d.target = targ;
48017 d.op0 = op0;
48018 d.op1 = op1;
48019 d.vmode = GET_MODE (targ);
48020 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48021 d.one_operand_p = false;
48022 d.testing_p = false;
48023
48024 for (i = 0; i < nelt; ++i)
48025 d.perm[i] = i * 2 + odd;
48026
48027 /* We'll either be able to implement the permutation directly... */
48028 if (expand_vec_perm_1 (&d))
48029 return;
48030
48031 /* ... or we use the special-case patterns. */
48032 expand_vec_perm_even_odd_1 (&d, odd);
48033 }
48034
48035 static void
48036 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
48037 {
48038 struct expand_vec_perm_d d;
48039 unsigned i, nelt, base;
48040 bool ok;
48041
48042 d.target = targ;
48043 d.op0 = op0;
48044 d.op1 = op1;
48045 d.vmode = GET_MODE (targ);
48046 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48047 d.one_operand_p = false;
48048 d.testing_p = false;
48049
48050 base = high_p ? nelt / 2 : 0;
48051 for (i = 0; i < nelt / 2; ++i)
48052 {
48053 d.perm[i * 2] = i + base;
48054 d.perm[i * 2 + 1] = i + base + nelt;
48055 }
48056
48057 /* Note that for AVX this isn't one instruction. */
48058 ok = ix86_expand_vec_perm_const_1 (&d);
48059 gcc_assert (ok);
48060 }
48061
48062
48063 /* Expand a vector operation CODE for a V*QImode in terms of the
48064 same operation on V*HImode. */
48065
48066 void
48067 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
48068 {
48069 enum machine_mode qimode = GET_MODE (dest);
48070 enum machine_mode himode;
48071 rtx (*gen_il) (rtx, rtx, rtx);
48072 rtx (*gen_ih) (rtx, rtx, rtx);
48073 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
48074 struct expand_vec_perm_d d;
48075 bool ok, full_interleave;
48076 bool uns_p = false;
48077 int i;
48078
48079 switch (qimode)
48080 {
48081 case V16QImode:
48082 himode = V8HImode;
48083 gen_il = gen_vec_interleave_lowv16qi;
48084 gen_ih = gen_vec_interleave_highv16qi;
48085 break;
48086 case V32QImode:
48087 himode = V16HImode;
48088 gen_il = gen_avx2_interleave_lowv32qi;
48089 gen_ih = gen_avx2_interleave_highv32qi;
48090 break;
48091 case V64QImode:
48092 himode = V32HImode;
48093 gen_il = gen_avx512bw_interleave_lowv64qi;
48094 gen_ih = gen_avx512bw_interleave_highv64qi;
48095 break;
48096 default:
48097 gcc_unreachable ();
48098 }
48099
48100 op2_l = op2_h = op2;
48101 switch (code)
48102 {
48103 case MULT:
48104 /* Unpack data such that we've got a source byte in each low byte of
48105 each word. We don't care what goes into the high byte of each word.
48106 Rather than trying to get zero in there, most convenient is to let
48107 it be a copy of the low byte. */
48108 op2_l = gen_reg_rtx (qimode);
48109 op2_h = gen_reg_rtx (qimode);
48110 emit_insn (gen_il (op2_l, op2, op2));
48111 emit_insn (gen_ih (op2_h, op2, op2));
48112 /* FALLTHRU */
48113
48114 op1_l = gen_reg_rtx (qimode);
48115 op1_h = gen_reg_rtx (qimode);
48116 emit_insn (gen_il (op1_l, op1, op1));
48117 emit_insn (gen_ih (op1_h, op1, op1));
48118 full_interleave = qimode == V16QImode;
48119 break;
48120
48121 case ASHIFT:
48122 case LSHIFTRT:
48123 uns_p = true;
48124 /* FALLTHRU */
48125 case ASHIFTRT:
48126 op1_l = gen_reg_rtx (himode);
48127 op1_h = gen_reg_rtx (himode);
48128 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
48129 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
48130 full_interleave = true;
48131 break;
48132 default:
48133 gcc_unreachable ();
48134 }
48135
48136 /* Perform the operation. */
48137 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
48138 1, OPTAB_DIRECT);
48139 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
48140 1, OPTAB_DIRECT);
48141 gcc_assert (res_l && res_h);
48142
48143 /* Merge the data back into the right place. */
48144 d.target = dest;
48145 d.op0 = gen_lowpart (qimode, res_l);
48146 d.op1 = gen_lowpart (qimode, res_h);
48147 d.vmode = qimode;
48148 d.nelt = GET_MODE_NUNITS (qimode);
48149 d.one_operand_p = false;
48150 d.testing_p = false;
48151
48152 if (full_interleave)
48153 {
48154 /* For SSE2, we used an full interleave, so the desired
48155 results are in the even elements. */
48156 for (i = 0; i < 64; ++i)
48157 d.perm[i] = i * 2;
48158 }
48159 else
48160 {
48161 /* For AVX, the interleave used above was not cross-lane. So the
48162 extraction is evens but with the second and third quarter swapped.
48163 Happily, that is even one insn shorter than even extraction. */
48164 for (i = 0; i < 64; ++i)
48165 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
48166 }
48167
48168 ok = ix86_expand_vec_perm_const_1 (&d);
48169 gcc_assert (ok);
48170
48171 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48172 gen_rtx_fmt_ee (code, qimode, op1, op2));
48173 }
48174
48175 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
48176 if op is CONST_VECTOR with all odd elements equal to their
48177 preceding element. */
48178
48179 static bool
48180 const_vector_equal_evenodd_p (rtx op)
48181 {
48182 enum machine_mode mode = GET_MODE (op);
48183 int i, nunits = GET_MODE_NUNITS (mode);
48184 if (GET_CODE (op) != CONST_VECTOR
48185 || nunits != CONST_VECTOR_NUNITS (op))
48186 return false;
48187 for (i = 0; i < nunits; i += 2)
48188 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
48189 return false;
48190 return true;
48191 }
48192
48193 void
48194 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
48195 bool uns_p, bool odd_p)
48196 {
48197 enum machine_mode mode = GET_MODE (op1);
48198 enum machine_mode wmode = GET_MODE (dest);
48199 rtx x;
48200 rtx orig_op1 = op1, orig_op2 = op2;
48201
48202 if (!nonimmediate_operand (op1, mode))
48203 op1 = force_reg (mode, op1);
48204 if (!nonimmediate_operand (op2, mode))
48205 op2 = force_reg (mode, op2);
48206
48207 /* We only play even/odd games with vectors of SImode. */
48208 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
48209
48210 /* If we're looking for the odd results, shift those members down to
48211 the even slots. For some cpus this is faster than a PSHUFD. */
48212 if (odd_p)
48213 {
48214 /* For XOP use vpmacsdqh, but only for smult, as it is only
48215 signed. */
48216 if (TARGET_XOP && mode == V4SImode && !uns_p)
48217 {
48218 x = force_reg (wmode, CONST0_RTX (wmode));
48219 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
48220 return;
48221 }
48222
48223 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
48224 if (!const_vector_equal_evenodd_p (orig_op1))
48225 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
48226 x, NULL, 1, OPTAB_DIRECT);
48227 if (!const_vector_equal_evenodd_p (orig_op2))
48228 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
48229 x, NULL, 1, OPTAB_DIRECT);
48230 op1 = gen_lowpart (mode, op1);
48231 op2 = gen_lowpart (mode, op2);
48232 }
48233
48234 if (mode == V16SImode)
48235 {
48236 if (uns_p)
48237 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
48238 else
48239 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
48240 }
48241 else if (mode == V8SImode)
48242 {
48243 if (uns_p)
48244 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
48245 else
48246 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
48247 }
48248 else if (uns_p)
48249 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
48250 else if (TARGET_SSE4_1)
48251 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
48252 else
48253 {
48254 rtx s1, s2, t0, t1, t2;
48255
48256 /* The easiest way to implement this without PMULDQ is to go through
48257 the motions as if we are performing a full 64-bit multiply. With
48258 the exception that we need to do less shuffling of the elements. */
48259
48260 /* Compute the sign-extension, aka highparts, of the two operands. */
48261 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48262 op1, pc_rtx, pc_rtx);
48263 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48264 op2, pc_rtx, pc_rtx);
48265
48266 /* Multiply LO(A) * HI(B), and vice-versa. */
48267 t1 = gen_reg_rtx (wmode);
48268 t2 = gen_reg_rtx (wmode);
48269 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
48270 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
48271
48272 /* Multiply LO(A) * LO(B). */
48273 t0 = gen_reg_rtx (wmode);
48274 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
48275
48276 /* Combine and shift the highparts into place. */
48277 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
48278 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
48279 1, OPTAB_DIRECT);
48280
48281 /* Combine high and low parts. */
48282 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
48283 return;
48284 }
48285 emit_insn (x);
48286 }
48287
48288 void
48289 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
48290 bool uns_p, bool high_p)
48291 {
48292 enum machine_mode wmode = GET_MODE (dest);
48293 enum machine_mode mode = GET_MODE (op1);
48294 rtx t1, t2, t3, t4, mask;
48295
48296 switch (mode)
48297 {
48298 case V4SImode:
48299 t1 = gen_reg_rtx (mode);
48300 t2 = gen_reg_rtx (mode);
48301 if (TARGET_XOP && !uns_p)
48302 {
48303 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
48304 shuffle the elements once so that all elements are in the right
48305 place for immediate use: { A C B D }. */
48306 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
48307 const1_rtx, GEN_INT (3)));
48308 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
48309 const1_rtx, GEN_INT (3)));
48310 }
48311 else
48312 {
48313 /* Put the elements into place for the multiply. */
48314 ix86_expand_vec_interleave (t1, op1, op1, high_p);
48315 ix86_expand_vec_interleave (t2, op2, op2, high_p);
48316 high_p = false;
48317 }
48318 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
48319 break;
48320
48321 case V8SImode:
48322 /* Shuffle the elements between the lanes. After this we
48323 have { A B E F | C D G H } for each operand. */
48324 t1 = gen_reg_rtx (V4DImode);
48325 t2 = gen_reg_rtx (V4DImode);
48326 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
48327 const0_rtx, const2_rtx,
48328 const1_rtx, GEN_INT (3)));
48329 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
48330 const0_rtx, const2_rtx,
48331 const1_rtx, GEN_INT (3)));
48332
48333 /* Shuffle the elements within the lanes. After this we
48334 have { A A B B | C C D D } or { E E F F | G G H H }. */
48335 t3 = gen_reg_rtx (V8SImode);
48336 t4 = gen_reg_rtx (V8SImode);
48337 mask = GEN_INT (high_p
48338 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
48339 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
48340 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
48341 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
48342
48343 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
48344 break;
48345
48346 case V8HImode:
48347 case V16HImode:
48348 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
48349 uns_p, OPTAB_DIRECT);
48350 t2 = expand_binop (mode,
48351 uns_p ? umul_highpart_optab : smul_highpart_optab,
48352 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
48353 gcc_assert (t1 && t2);
48354
48355 t3 = gen_reg_rtx (mode);
48356 ix86_expand_vec_interleave (t3, t1, t2, high_p);
48357 emit_move_insn (dest, gen_lowpart (wmode, t3));
48358 break;
48359
48360 case V16QImode:
48361 case V32QImode:
48362 case V32HImode:
48363 case V16SImode:
48364 case V64QImode:
48365 t1 = gen_reg_rtx (wmode);
48366 t2 = gen_reg_rtx (wmode);
48367 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
48368 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
48369
48370 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
48371 break;
48372
48373 default:
48374 gcc_unreachable ();
48375 }
48376 }
48377
48378 void
48379 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
48380 {
48381 rtx res_1, res_2, res_3, res_4;
48382
48383 res_1 = gen_reg_rtx (V4SImode);
48384 res_2 = gen_reg_rtx (V4SImode);
48385 res_3 = gen_reg_rtx (V2DImode);
48386 res_4 = gen_reg_rtx (V2DImode);
48387 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
48388 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
48389
48390 /* Move the results in element 2 down to element 1; we don't care
48391 what goes in elements 2 and 3. Then we can merge the parts
48392 back together with an interleave.
48393
48394 Note that two other sequences were tried:
48395 (1) Use interleaves at the start instead of psrldq, which allows
48396 us to use a single shufps to merge things back at the end.
48397 (2) Use shufps here to combine the two vectors, then pshufd to
48398 put the elements in the correct order.
48399 In both cases the cost of the reformatting stall was too high
48400 and the overall sequence slower. */
48401
48402 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
48403 const0_rtx, const2_rtx,
48404 const0_rtx, const0_rtx));
48405 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
48406 const0_rtx, const2_rtx,
48407 const0_rtx, const0_rtx));
48408 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
48409
48410 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
48411 }
48412
48413 void
48414 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
48415 {
48416 enum machine_mode mode = GET_MODE (op0);
48417 rtx t1, t2, t3, t4, t5, t6;
48418
48419 if (TARGET_AVX512DQ && mode == V8DImode)
48420 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
48421 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
48422 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
48423 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
48424 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
48425 else if (TARGET_XOP && mode == V2DImode)
48426 {
48427 /* op1: A,B,C,D, op2: E,F,G,H */
48428 op1 = gen_lowpart (V4SImode, op1);
48429 op2 = gen_lowpart (V4SImode, op2);
48430
48431 t1 = gen_reg_rtx (V4SImode);
48432 t2 = gen_reg_rtx (V4SImode);
48433 t3 = gen_reg_rtx (V2DImode);
48434 t4 = gen_reg_rtx (V2DImode);
48435
48436 /* t1: B,A,D,C */
48437 emit_insn (gen_sse2_pshufd_1 (t1, op1,
48438 GEN_INT (1),
48439 GEN_INT (0),
48440 GEN_INT (3),
48441 GEN_INT (2)));
48442
48443 /* t2: (B*E),(A*F),(D*G),(C*H) */
48444 emit_insn (gen_mulv4si3 (t2, t1, op2));
48445
48446 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
48447 emit_insn (gen_xop_phadddq (t3, t2));
48448
48449 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48450 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
48451
48452 /* Multiply lower parts and add all */
48453 t5 = gen_reg_rtx (V2DImode);
48454 emit_insn (gen_vec_widen_umult_even_v4si (t5,
48455 gen_lowpart (V4SImode, op1),
48456 gen_lowpart (V4SImode, op2)));
48457 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
48458
48459 }
48460 else
48461 {
48462 enum machine_mode nmode;
48463 rtx (*umul) (rtx, rtx, rtx);
48464
48465 if (mode == V2DImode)
48466 {
48467 umul = gen_vec_widen_umult_even_v4si;
48468 nmode = V4SImode;
48469 }
48470 else if (mode == V4DImode)
48471 {
48472 umul = gen_vec_widen_umult_even_v8si;
48473 nmode = V8SImode;
48474 }
48475 else if (mode == V8DImode)
48476 {
48477 umul = gen_vec_widen_umult_even_v16si;
48478 nmode = V16SImode;
48479 }
48480 else
48481 gcc_unreachable ();
48482
48483
48484 /* Multiply low parts. */
48485 t1 = gen_reg_rtx (mode);
48486 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
48487
48488 /* Shift input vectors right 32 bits so we can multiply high parts. */
48489 t6 = GEN_INT (32);
48490 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
48491 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
48492
48493 /* Multiply high parts by low parts. */
48494 t4 = gen_reg_rtx (mode);
48495 t5 = gen_reg_rtx (mode);
48496 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
48497 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
48498
48499 /* Combine and shift the highparts back. */
48500 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
48501 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
48502
48503 /* Combine high and low parts. */
48504 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
48505 }
48506
48507 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48508 gen_rtx_MULT (mode, op1, op2));
48509 }
48510
48511 /* Calculate integer abs() using only SSE2 instructions. */
48512
48513 void
48514 ix86_expand_sse2_abs (rtx target, rtx input)
48515 {
48516 enum machine_mode mode = GET_MODE (target);
48517 rtx tmp0, tmp1, x;
48518
48519 switch (mode)
48520 {
48521 /* For 32-bit signed integer X, the best way to calculate the absolute
48522 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48523 case V4SImode:
48524 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
48525 GEN_INT (GET_MODE_BITSIZE
48526 (GET_MODE_INNER (mode)) - 1),
48527 NULL, 0, OPTAB_DIRECT);
48528 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
48529 NULL, 0, OPTAB_DIRECT);
48530 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
48531 target, 0, OPTAB_DIRECT);
48532 break;
48533
48534 /* For 16-bit signed integer X, the best way to calculate the absolute
48535 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48536 case V8HImode:
48537 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48538
48539 x = expand_simple_binop (mode, SMAX, tmp0, input,
48540 target, 0, OPTAB_DIRECT);
48541 break;
48542
48543 /* For 8-bit signed integer X, the best way to calculate the absolute
48544 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48545 as SSE2 provides the PMINUB insn. */
48546 case V16QImode:
48547 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48548
48549 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
48550 target, 0, OPTAB_DIRECT);
48551 break;
48552
48553 default:
48554 gcc_unreachable ();
48555 }
48556
48557 if (x != target)
48558 emit_move_insn (target, x);
48559 }
48560
48561 /* Expand an insert into a vector register through pinsr insn.
48562 Return true if successful. */
48563
48564 bool
48565 ix86_expand_pinsr (rtx *operands)
48566 {
48567 rtx dst = operands[0];
48568 rtx src = operands[3];
48569
48570 unsigned int size = INTVAL (operands[1]);
48571 unsigned int pos = INTVAL (operands[2]);
48572
48573 if (GET_CODE (dst) == SUBREG)
48574 {
48575 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
48576 dst = SUBREG_REG (dst);
48577 }
48578
48579 if (GET_CODE (src) == SUBREG)
48580 src = SUBREG_REG (src);
48581
48582 switch (GET_MODE (dst))
48583 {
48584 case V16QImode:
48585 case V8HImode:
48586 case V4SImode:
48587 case V2DImode:
48588 {
48589 enum machine_mode srcmode, dstmode;
48590 rtx (*pinsr)(rtx, rtx, rtx, rtx);
48591
48592 srcmode = mode_for_size (size, MODE_INT, 0);
48593
48594 switch (srcmode)
48595 {
48596 case QImode:
48597 if (!TARGET_SSE4_1)
48598 return false;
48599 dstmode = V16QImode;
48600 pinsr = gen_sse4_1_pinsrb;
48601 break;
48602
48603 case HImode:
48604 if (!TARGET_SSE2)
48605 return false;
48606 dstmode = V8HImode;
48607 pinsr = gen_sse2_pinsrw;
48608 break;
48609
48610 case SImode:
48611 if (!TARGET_SSE4_1)
48612 return false;
48613 dstmode = V4SImode;
48614 pinsr = gen_sse4_1_pinsrd;
48615 break;
48616
48617 case DImode:
48618 gcc_assert (TARGET_64BIT);
48619 if (!TARGET_SSE4_1)
48620 return false;
48621 dstmode = V2DImode;
48622 pinsr = gen_sse4_1_pinsrq;
48623 break;
48624
48625 default:
48626 return false;
48627 }
48628
48629 rtx d = dst;
48630 if (GET_MODE (dst) != dstmode)
48631 d = gen_reg_rtx (dstmode);
48632 src = gen_lowpart (srcmode, src);
48633
48634 pos /= size;
48635
48636 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
48637 GEN_INT (1 << pos)));
48638 if (d != dst)
48639 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48640 return true;
48641 }
48642
48643 default:
48644 return false;
48645 }
48646 }
48647 \f
48648 /* This function returns the calling abi specific va_list type node.
48649 It returns the FNDECL specific va_list type. */
48650
48651 static tree
48652 ix86_fn_abi_va_list (tree fndecl)
48653 {
48654 if (!TARGET_64BIT)
48655 return va_list_type_node;
48656 gcc_assert (fndecl != NULL_TREE);
48657
48658 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
48659 return ms_va_list_type_node;
48660 else
48661 return sysv_va_list_type_node;
48662 }
48663
48664 /* Returns the canonical va_list type specified by TYPE. If there
48665 is no valid TYPE provided, it return NULL_TREE. */
48666
48667 static tree
48668 ix86_canonical_va_list_type (tree type)
48669 {
48670 tree wtype, htype;
48671
48672 /* Resolve references and pointers to va_list type. */
48673 if (TREE_CODE (type) == MEM_REF)
48674 type = TREE_TYPE (type);
48675 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
48676 type = TREE_TYPE (type);
48677 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
48678 type = TREE_TYPE (type);
48679
48680 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
48681 {
48682 wtype = va_list_type_node;
48683 gcc_assert (wtype != NULL_TREE);
48684 htype = type;
48685 if (TREE_CODE (wtype) == ARRAY_TYPE)
48686 {
48687 /* If va_list is an array type, the argument may have decayed
48688 to a pointer type, e.g. by being passed to another function.
48689 In that case, unwrap both types so that we can compare the
48690 underlying records. */
48691 if (TREE_CODE (htype) == ARRAY_TYPE
48692 || POINTER_TYPE_P (htype))
48693 {
48694 wtype = TREE_TYPE (wtype);
48695 htype = TREE_TYPE (htype);
48696 }
48697 }
48698 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48699 return va_list_type_node;
48700 wtype = sysv_va_list_type_node;
48701 gcc_assert (wtype != NULL_TREE);
48702 htype = type;
48703 if (TREE_CODE (wtype) == ARRAY_TYPE)
48704 {
48705 /* If va_list is an array type, the argument may have decayed
48706 to a pointer type, e.g. by being passed to another function.
48707 In that case, unwrap both types so that we can compare the
48708 underlying records. */
48709 if (TREE_CODE (htype) == ARRAY_TYPE
48710 || POINTER_TYPE_P (htype))
48711 {
48712 wtype = TREE_TYPE (wtype);
48713 htype = TREE_TYPE (htype);
48714 }
48715 }
48716 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48717 return sysv_va_list_type_node;
48718 wtype = ms_va_list_type_node;
48719 gcc_assert (wtype != NULL_TREE);
48720 htype = type;
48721 if (TREE_CODE (wtype) == ARRAY_TYPE)
48722 {
48723 /* If va_list is an array type, the argument may have decayed
48724 to a pointer type, e.g. by being passed to another function.
48725 In that case, unwrap both types so that we can compare the
48726 underlying records. */
48727 if (TREE_CODE (htype) == ARRAY_TYPE
48728 || POINTER_TYPE_P (htype))
48729 {
48730 wtype = TREE_TYPE (wtype);
48731 htype = TREE_TYPE (htype);
48732 }
48733 }
48734 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48735 return ms_va_list_type_node;
48736 return NULL_TREE;
48737 }
48738 return std_canonical_va_list_type (type);
48739 }
48740
48741 /* Iterate through the target-specific builtin types for va_list.
48742 IDX denotes the iterator, *PTREE is set to the result type of
48743 the va_list builtin, and *PNAME to its internal type.
48744 Returns zero if there is no element for this index, otherwise
48745 IDX should be increased upon the next call.
48746 Note, do not iterate a base builtin's name like __builtin_va_list.
48747 Used from c_common_nodes_and_builtins. */
48748
48749 static int
48750 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
48751 {
48752 if (TARGET_64BIT)
48753 {
48754 switch (idx)
48755 {
48756 default:
48757 break;
48758
48759 case 0:
48760 *ptree = ms_va_list_type_node;
48761 *pname = "__builtin_ms_va_list";
48762 return 1;
48763
48764 case 1:
48765 *ptree = sysv_va_list_type_node;
48766 *pname = "__builtin_sysv_va_list";
48767 return 1;
48768 }
48769 }
48770
48771 return 0;
48772 }
48773
48774 #undef TARGET_SCHED_DISPATCH
48775 #define TARGET_SCHED_DISPATCH has_dispatch
48776 #undef TARGET_SCHED_DISPATCH_DO
48777 #define TARGET_SCHED_DISPATCH_DO do_dispatch
48778 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48779 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48780 #undef TARGET_SCHED_REORDER
48781 #define TARGET_SCHED_REORDER ix86_sched_reorder
48782 #undef TARGET_SCHED_ADJUST_PRIORITY
48783 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48784 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48785 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48786 ix86_dependencies_evaluation_hook
48787
48788 /* The size of the dispatch window is the total number of bytes of
48789 object code allowed in a window. */
48790 #define DISPATCH_WINDOW_SIZE 16
48791
48792 /* Number of dispatch windows considered for scheduling. */
48793 #define MAX_DISPATCH_WINDOWS 3
48794
48795 /* Maximum number of instructions in a window. */
48796 #define MAX_INSN 4
48797
48798 /* Maximum number of immediate operands in a window. */
48799 #define MAX_IMM 4
48800
48801 /* Maximum number of immediate bits allowed in a window. */
48802 #define MAX_IMM_SIZE 128
48803
48804 /* Maximum number of 32 bit immediates allowed in a window. */
48805 #define MAX_IMM_32 4
48806
48807 /* Maximum number of 64 bit immediates allowed in a window. */
48808 #define MAX_IMM_64 2
48809
48810 /* Maximum total of loads or prefetches allowed in a window. */
48811 #define MAX_LOAD 2
48812
48813 /* Maximum total of stores allowed in a window. */
48814 #define MAX_STORE 1
48815
48816 #undef BIG
48817 #define BIG 100
48818
48819
48820 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
48821 enum dispatch_group {
48822 disp_no_group = 0,
48823 disp_load,
48824 disp_store,
48825 disp_load_store,
48826 disp_prefetch,
48827 disp_imm,
48828 disp_imm_32,
48829 disp_imm_64,
48830 disp_branch,
48831 disp_cmp,
48832 disp_jcc,
48833 disp_last
48834 };
48835
48836 /* Number of allowable groups in a dispatch window. It is an array
48837 indexed by dispatch_group enum. 100 is used as a big number,
48838 because the number of these kind of operations does not have any
48839 effect in dispatch window, but we need them for other reasons in
48840 the table. */
48841 static unsigned int num_allowable_groups[disp_last] = {
48842 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
48843 };
48844
48845 char group_name[disp_last + 1][16] = {
48846 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
48847 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
48848 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
48849 };
48850
48851 /* Instruction path. */
48852 enum insn_path {
48853 no_path = 0,
48854 path_single, /* Single micro op. */
48855 path_double, /* Double micro op. */
48856 path_multi, /* Instructions with more than 2 micro op.. */
48857 last_path
48858 };
48859
48860 /* sched_insn_info defines a window to the instructions scheduled in
48861 the basic block. It contains a pointer to the insn_info table and
48862 the instruction scheduled.
48863
48864 Windows are allocated for each basic block and are linked
48865 together. */
48866 typedef struct sched_insn_info_s {
48867 rtx insn;
48868 enum dispatch_group group;
48869 enum insn_path path;
48870 int byte_len;
48871 int imm_bytes;
48872 } sched_insn_info;
48873
48874 /* Linked list of dispatch windows. This is a two way list of
48875 dispatch windows of a basic block. It contains information about
48876 the number of uops in the window and the total number of
48877 instructions and of bytes in the object code for this dispatch
48878 window. */
48879 typedef struct dispatch_windows_s {
48880 int num_insn; /* Number of insn in the window. */
48881 int num_uops; /* Number of uops in the window. */
48882 int window_size; /* Number of bytes in the window. */
48883 int window_num; /* Window number between 0 or 1. */
48884 int num_imm; /* Number of immediates in an insn. */
48885 int num_imm_32; /* Number of 32 bit immediates in an insn. */
48886 int num_imm_64; /* Number of 64 bit immediates in an insn. */
48887 int imm_size; /* Total immediates in the window. */
48888 int num_loads; /* Total memory loads in the window. */
48889 int num_stores; /* Total memory stores in the window. */
48890 int violation; /* Violation exists in window. */
48891 sched_insn_info *window; /* Pointer to the window. */
48892 struct dispatch_windows_s *next;
48893 struct dispatch_windows_s *prev;
48894 } dispatch_windows;
48895
48896 /* Immediate valuse used in an insn. */
48897 typedef struct imm_info_s
48898 {
48899 int imm;
48900 int imm32;
48901 int imm64;
48902 } imm_info;
48903
48904 static dispatch_windows *dispatch_window_list;
48905 static dispatch_windows *dispatch_window_list1;
48906
48907 /* Get dispatch group of insn. */
48908
48909 static enum dispatch_group
48910 get_mem_group (rtx_insn *insn)
48911 {
48912 enum attr_memory memory;
48913
48914 if (INSN_CODE (insn) < 0)
48915 return disp_no_group;
48916 memory = get_attr_memory (insn);
48917 if (memory == MEMORY_STORE)
48918 return disp_store;
48919
48920 if (memory == MEMORY_LOAD)
48921 return disp_load;
48922
48923 if (memory == MEMORY_BOTH)
48924 return disp_load_store;
48925
48926 return disp_no_group;
48927 }
48928
48929 /* Return true if insn is a compare instruction. */
48930
48931 static bool
48932 is_cmp (rtx_insn *insn)
48933 {
48934 enum attr_type type;
48935
48936 type = get_attr_type (insn);
48937 return (type == TYPE_TEST
48938 || type == TYPE_ICMP
48939 || type == TYPE_FCMP
48940 || GET_CODE (PATTERN (insn)) == COMPARE);
48941 }
48942
48943 /* Return true if a dispatch violation encountered. */
48944
48945 static bool
48946 dispatch_violation (void)
48947 {
48948 if (dispatch_window_list->next)
48949 return dispatch_window_list->next->violation;
48950 return dispatch_window_list->violation;
48951 }
48952
48953 /* Return true if insn is a branch instruction. */
48954
48955 static bool
48956 is_branch (rtx insn)
48957 {
48958 return (CALL_P (insn) || JUMP_P (insn));
48959 }
48960
48961 /* Return true if insn is a prefetch instruction. */
48962
48963 static bool
48964 is_prefetch (rtx insn)
48965 {
48966 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
48967 }
48968
48969 /* This function initializes a dispatch window and the list container holding a
48970 pointer to the window. */
48971
48972 static void
48973 init_window (int window_num)
48974 {
48975 int i;
48976 dispatch_windows *new_list;
48977
48978 if (window_num == 0)
48979 new_list = dispatch_window_list;
48980 else
48981 new_list = dispatch_window_list1;
48982
48983 new_list->num_insn = 0;
48984 new_list->num_uops = 0;
48985 new_list->window_size = 0;
48986 new_list->next = NULL;
48987 new_list->prev = NULL;
48988 new_list->window_num = window_num;
48989 new_list->num_imm = 0;
48990 new_list->num_imm_32 = 0;
48991 new_list->num_imm_64 = 0;
48992 new_list->imm_size = 0;
48993 new_list->num_loads = 0;
48994 new_list->num_stores = 0;
48995 new_list->violation = false;
48996
48997 for (i = 0; i < MAX_INSN; i++)
48998 {
48999 new_list->window[i].insn = NULL;
49000 new_list->window[i].group = disp_no_group;
49001 new_list->window[i].path = no_path;
49002 new_list->window[i].byte_len = 0;
49003 new_list->window[i].imm_bytes = 0;
49004 }
49005 return;
49006 }
49007
49008 /* This function allocates and initializes a dispatch window and the
49009 list container holding a pointer to the window. */
49010
49011 static dispatch_windows *
49012 allocate_window (void)
49013 {
49014 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
49015 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
49016
49017 return new_list;
49018 }
49019
49020 /* This routine initializes the dispatch scheduling information. It
49021 initiates building dispatch scheduler tables and constructs the
49022 first dispatch window. */
49023
49024 static void
49025 init_dispatch_sched (void)
49026 {
49027 /* Allocate a dispatch list and a window. */
49028 dispatch_window_list = allocate_window ();
49029 dispatch_window_list1 = allocate_window ();
49030 init_window (0);
49031 init_window (1);
49032 }
49033
49034 /* This function returns true if a branch is detected. End of a basic block
49035 does not have to be a branch, but here we assume only branches end a
49036 window. */
49037
49038 static bool
49039 is_end_basic_block (enum dispatch_group group)
49040 {
49041 return group == disp_branch;
49042 }
49043
49044 /* This function is called when the end of a window processing is reached. */
49045
49046 static void
49047 process_end_window (void)
49048 {
49049 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
49050 if (dispatch_window_list->next)
49051 {
49052 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
49053 gcc_assert (dispatch_window_list->window_size
49054 + dispatch_window_list1->window_size <= 48);
49055 init_window (1);
49056 }
49057 init_window (0);
49058 }
49059
49060 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
49061 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
49062 for 48 bytes of instructions. Note that these windows are not dispatch
49063 windows that their sizes are DISPATCH_WINDOW_SIZE. */
49064
49065 static dispatch_windows *
49066 allocate_next_window (int window_num)
49067 {
49068 if (window_num == 0)
49069 {
49070 if (dispatch_window_list->next)
49071 init_window (1);
49072 init_window (0);
49073 return dispatch_window_list;
49074 }
49075
49076 dispatch_window_list->next = dispatch_window_list1;
49077 dispatch_window_list1->prev = dispatch_window_list;
49078
49079 return dispatch_window_list1;
49080 }
49081
49082 /* Compute number of immediate operands of an instruction. */
49083
49084 static void
49085 find_constant (rtx in_rtx, imm_info *imm_values)
49086 {
49087 if (INSN_P (in_rtx))
49088 in_rtx = PATTERN (in_rtx);
49089 subrtx_iterator::array_type array;
49090 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
49091 if (const_rtx x = *iter)
49092 switch (GET_CODE (x))
49093 {
49094 case CONST:
49095 case SYMBOL_REF:
49096 case CONST_INT:
49097 (imm_values->imm)++;
49098 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
49099 (imm_values->imm32)++;
49100 else
49101 (imm_values->imm64)++;
49102 break;
49103
49104 case CONST_DOUBLE:
49105 (imm_values->imm)++;
49106 (imm_values->imm64)++;
49107 break;
49108
49109 case CODE_LABEL:
49110 if (LABEL_KIND (x) == LABEL_NORMAL)
49111 {
49112 (imm_values->imm)++;
49113 (imm_values->imm32)++;
49114 }
49115 break;
49116
49117 default:
49118 break;
49119 }
49120 }
49121
49122 /* Return total size of immediate operands of an instruction along with number
49123 of corresponding immediate-operands. It initializes its parameters to zero
49124 befor calling FIND_CONSTANT.
49125 INSN is the input instruction. IMM is the total of immediates.
49126 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
49127 bit immediates. */
49128
49129 static int
49130 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
49131 {
49132 imm_info imm_values = {0, 0, 0};
49133
49134 find_constant (insn, &imm_values);
49135 *imm = imm_values.imm;
49136 *imm32 = imm_values.imm32;
49137 *imm64 = imm_values.imm64;
49138 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
49139 }
49140
49141 /* This function indicates if an operand of an instruction is an
49142 immediate. */
49143
49144 static bool
49145 has_immediate (rtx insn)
49146 {
49147 int num_imm_operand;
49148 int num_imm32_operand;
49149 int num_imm64_operand;
49150
49151 if (insn)
49152 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49153 &num_imm64_operand);
49154 return false;
49155 }
49156
49157 /* Return single or double path for instructions. */
49158
49159 static enum insn_path
49160 get_insn_path (rtx_insn *insn)
49161 {
49162 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
49163
49164 if ((int)path == 0)
49165 return path_single;
49166
49167 if ((int)path == 1)
49168 return path_double;
49169
49170 return path_multi;
49171 }
49172
49173 /* Return insn dispatch group. */
49174
49175 static enum dispatch_group
49176 get_insn_group (rtx_insn *insn)
49177 {
49178 enum dispatch_group group = get_mem_group (insn);
49179 if (group)
49180 return group;
49181
49182 if (is_branch (insn))
49183 return disp_branch;
49184
49185 if (is_cmp (insn))
49186 return disp_cmp;
49187
49188 if (has_immediate (insn))
49189 return disp_imm;
49190
49191 if (is_prefetch (insn))
49192 return disp_prefetch;
49193
49194 return disp_no_group;
49195 }
49196
49197 /* Count number of GROUP restricted instructions in a dispatch
49198 window WINDOW_LIST. */
49199
49200 static int
49201 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
49202 {
49203 enum dispatch_group group = get_insn_group (insn);
49204 int imm_size;
49205 int num_imm_operand;
49206 int num_imm32_operand;
49207 int num_imm64_operand;
49208
49209 if (group == disp_no_group)
49210 return 0;
49211
49212 if (group == disp_imm)
49213 {
49214 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49215 &num_imm64_operand);
49216 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
49217 || num_imm_operand + window_list->num_imm > MAX_IMM
49218 || (num_imm32_operand > 0
49219 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
49220 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
49221 || (num_imm64_operand > 0
49222 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
49223 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
49224 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
49225 && num_imm64_operand > 0
49226 && ((window_list->num_imm_64 > 0
49227 && window_list->num_insn >= 2)
49228 || window_list->num_insn >= 3)))
49229 return BIG;
49230
49231 return 1;
49232 }
49233
49234 if ((group == disp_load_store
49235 && (window_list->num_loads >= MAX_LOAD
49236 || window_list->num_stores >= MAX_STORE))
49237 || ((group == disp_load
49238 || group == disp_prefetch)
49239 && window_list->num_loads >= MAX_LOAD)
49240 || (group == disp_store
49241 && window_list->num_stores >= MAX_STORE))
49242 return BIG;
49243
49244 return 1;
49245 }
49246
49247 /* This function returns true if insn satisfies dispatch rules on the
49248 last window scheduled. */
49249
49250 static bool
49251 fits_dispatch_window (rtx_insn *insn)
49252 {
49253 dispatch_windows *window_list = dispatch_window_list;
49254 dispatch_windows *window_list_next = dispatch_window_list->next;
49255 unsigned int num_restrict;
49256 enum dispatch_group group = get_insn_group (insn);
49257 enum insn_path path = get_insn_path (insn);
49258 int sum;
49259
49260 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
49261 instructions should be given the lowest priority in the
49262 scheduling process in Haifa scheduler to make sure they will be
49263 scheduled in the same dispatch window as the reference to them. */
49264 if (group == disp_jcc || group == disp_cmp)
49265 return false;
49266
49267 /* Check nonrestricted. */
49268 if (group == disp_no_group || group == disp_branch)
49269 return true;
49270
49271 /* Get last dispatch window. */
49272 if (window_list_next)
49273 window_list = window_list_next;
49274
49275 if (window_list->window_num == 1)
49276 {
49277 sum = window_list->prev->window_size + window_list->window_size;
49278
49279 if (sum == 32
49280 || (min_insn_size (insn) + sum) >= 48)
49281 /* Window 1 is full. Go for next window. */
49282 return true;
49283 }
49284
49285 num_restrict = count_num_restricted (insn, window_list);
49286
49287 if (num_restrict > num_allowable_groups[group])
49288 return false;
49289
49290 /* See if it fits in the first window. */
49291 if (window_list->window_num == 0)
49292 {
49293 /* The first widow should have only single and double path
49294 uops. */
49295 if (path == path_double
49296 && (window_list->num_uops + 2) > MAX_INSN)
49297 return false;
49298 else if (path != path_single)
49299 return false;
49300 }
49301 return true;
49302 }
49303
49304 /* Add an instruction INSN with NUM_UOPS micro-operations to the
49305 dispatch window WINDOW_LIST. */
49306
49307 static void
49308 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
49309 {
49310 int byte_len = min_insn_size (insn);
49311 int num_insn = window_list->num_insn;
49312 int imm_size;
49313 sched_insn_info *window = window_list->window;
49314 enum dispatch_group group = get_insn_group (insn);
49315 enum insn_path path = get_insn_path (insn);
49316 int num_imm_operand;
49317 int num_imm32_operand;
49318 int num_imm64_operand;
49319
49320 if (!window_list->violation && group != disp_cmp
49321 && !fits_dispatch_window (insn))
49322 window_list->violation = true;
49323
49324 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49325 &num_imm64_operand);
49326
49327 /* Initialize window with new instruction. */
49328 window[num_insn].insn = insn;
49329 window[num_insn].byte_len = byte_len;
49330 window[num_insn].group = group;
49331 window[num_insn].path = path;
49332 window[num_insn].imm_bytes = imm_size;
49333
49334 window_list->window_size += byte_len;
49335 window_list->num_insn = num_insn + 1;
49336 window_list->num_uops = window_list->num_uops + num_uops;
49337 window_list->imm_size += imm_size;
49338 window_list->num_imm += num_imm_operand;
49339 window_list->num_imm_32 += num_imm32_operand;
49340 window_list->num_imm_64 += num_imm64_operand;
49341
49342 if (group == disp_store)
49343 window_list->num_stores += 1;
49344 else if (group == disp_load
49345 || group == disp_prefetch)
49346 window_list->num_loads += 1;
49347 else if (group == disp_load_store)
49348 {
49349 window_list->num_stores += 1;
49350 window_list->num_loads += 1;
49351 }
49352 }
49353
49354 /* Adds a scheduled instruction, INSN, to the current dispatch window.
49355 If the total bytes of instructions or the number of instructions in
49356 the window exceed allowable, it allocates a new window. */
49357
49358 static void
49359 add_to_dispatch_window (rtx_insn *insn)
49360 {
49361 int byte_len;
49362 dispatch_windows *window_list;
49363 dispatch_windows *next_list;
49364 dispatch_windows *window0_list;
49365 enum insn_path path;
49366 enum dispatch_group insn_group;
49367 bool insn_fits;
49368 int num_insn;
49369 int num_uops;
49370 int window_num;
49371 int insn_num_uops;
49372 int sum;
49373
49374 if (INSN_CODE (insn) < 0)
49375 return;
49376
49377 byte_len = min_insn_size (insn);
49378 window_list = dispatch_window_list;
49379 next_list = window_list->next;
49380 path = get_insn_path (insn);
49381 insn_group = get_insn_group (insn);
49382
49383 /* Get the last dispatch window. */
49384 if (next_list)
49385 window_list = dispatch_window_list->next;
49386
49387 if (path == path_single)
49388 insn_num_uops = 1;
49389 else if (path == path_double)
49390 insn_num_uops = 2;
49391 else
49392 insn_num_uops = (int) path;
49393
49394 /* If current window is full, get a new window.
49395 Window number zero is full, if MAX_INSN uops are scheduled in it.
49396 Window number one is full, if window zero's bytes plus window
49397 one's bytes is 32, or if the bytes of the new instruction added
49398 to the total makes it greater than 48, or it has already MAX_INSN
49399 instructions in it. */
49400 num_insn = window_list->num_insn;
49401 num_uops = window_list->num_uops;
49402 window_num = window_list->window_num;
49403 insn_fits = fits_dispatch_window (insn);
49404
49405 if (num_insn >= MAX_INSN
49406 || num_uops + insn_num_uops > MAX_INSN
49407 || !(insn_fits))
49408 {
49409 window_num = ~window_num & 1;
49410 window_list = allocate_next_window (window_num);
49411 }
49412
49413 if (window_num == 0)
49414 {
49415 add_insn_window (insn, window_list, insn_num_uops);
49416 if (window_list->num_insn >= MAX_INSN
49417 && insn_group == disp_branch)
49418 {
49419 process_end_window ();
49420 return;
49421 }
49422 }
49423 else if (window_num == 1)
49424 {
49425 window0_list = window_list->prev;
49426 sum = window0_list->window_size + window_list->window_size;
49427 if (sum == 32
49428 || (byte_len + sum) >= 48)
49429 {
49430 process_end_window ();
49431 window_list = dispatch_window_list;
49432 }
49433
49434 add_insn_window (insn, window_list, insn_num_uops);
49435 }
49436 else
49437 gcc_unreachable ();
49438
49439 if (is_end_basic_block (insn_group))
49440 {
49441 /* End of basic block is reached do end-basic-block process. */
49442 process_end_window ();
49443 return;
49444 }
49445 }
49446
49447 /* Print the dispatch window, WINDOW_NUM, to FILE. */
49448
49449 DEBUG_FUNCTION static void
49450 debug_dispatch_window_file (FILE *file, int window_num)
49451 {
49452 dispatch_windows *list;
49453 int i;
49454
49455 if (window_num == 0)
49456 list = dispatch_window_list;
49457 else
49458 list = dispatch_window_list1;
49459
49460 fprintf (file, "Window #%d:\n", list->window_num);
49461 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
49462 list->num_insn, list->num_uops, list->window_size);
49463 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
49464 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
49465
49466 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
49467 list->num_stores);
49468 fprintf (file, " insn info:\n");
49469
49470 for (i = 0; i < MAX_INSN; i++)
49471 {
49472 if (!list->window[i].insn)
49473 break;
49474 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
49475 i, group_name[list->window[i].group],
49476 i, (void *)list->window[i].insn,
49477 i, list->window[i].path,
49478 i, list->window[i].byte_len,
49479 i, list->window[i].imm_bytes);
49480 }
49481 }
49482
49483 /* Print to stdout a dispatch window. */
49484
49485 DEBUG_FUNCTION void
49486 debug_dispatch_window (int window_num)
49487 {
49488 debug_dispatch_window_file (stdout, window_num);
49489 }
49490
49491 /* Print INSN dispatch information to FILE. */
49492
49493 DEBUG_FUNCTION static void
49494 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
49495 {
49496 int byte_len;
49497 enum insn_path path;
49498 enum dispatch_group group;
49499 int imm_size;
49500 int num_imm_operand;
49501 int num_imm32_operand;
49502 int num_imm64_operand;
49503
49504 if (INSN_CODE (insn) < 0)
49505 return;
49506
49507 byte_len = min_insn_size (insn);
49508 path = get_insn_path (insn);
49509 group = get_insn_group (insn);
49510 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49511 &num_imm64_operand);
49512
49513 fprintf (file, " insn info:\n");
49514 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
49515 group_name[group], path, byte_len);
49516 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
49517 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
49518 }
49519
49520 /* Print to STDERR the status of the ready list with respect to
49521 dispatch windows. */
49522
49523 DEBUG_FUNCTION void
49524 debug_ready_dispatch (void)
49525 {
49526 int i;
49527 int no_ready = number_in_ready ();
49528
49529 fprintf (stdout, "Number of ready: %d\n", no_ready);
49530
49531 for (i = 0; i < no_ready; i++)
49532 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
49533 }
49534
49535 /* This routine is the driver of the dispatch scheduler. */
49536
49537 static void
49538 do_dispatch (rtx_insn *insn, int mode)
49539 {
49540 if (mode == DISPATCH_INIT)
49541 init_dispatch_sched ();
49542 else if (mode == ADD_TO_DISPATCH_WINDOW)
49543 add_to_dispatch_window (insn);
49544 }
49545
49546 /* Return TRUE if Dispatch Scheduling is supported. */
49547
49548 static bool
49549 has_dispatch (rtx_insn *insn, int action)
49550 {
49551 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
49552 && flag_dispatch_scheduler)
49553 switch (action)
49554 {
49555 default:
49556 return false;
49557
49558 case IS_DISPATCH_ON:
49559 return true;
49560 break;
49561
49562 case IS_CMP:
49563 return is_cmp (insn);
49564
49565 case DISPATCH_VIOLATION:
49566 return dispatch_violation ();
49567
49568 case FITS_DISPATCH_WINDOW:
49569 return fits_dispatch_window (insn);
49570 }
49571
49572 return false;
49573 }
49574
49575 /* Implementation of reassociation_width target hook used by
49576 reassoc phase to identify parallelism level in reassociated
49577 tree. Statements tree_code is passed in OPC. Arguments type
49578 is passed in MODE.
49579
49580 Currently parallel reassociation is enabled for Atom
49581 processors only and we set reassociation width to be 2
49582 because Atom may issue up to 2 instructions per cycle.
49583
49584 Return value should be fixed if parallel reassociation is
49585 enabled for other processors. */
49586
49587 static int
49588 ix86_reassociation_width (unsigned int, enum machine_mode mode)
49589 {
49590 int res = 1;
49591
49592 /* Vector part. */
49593 if (VECTOR_MODE_P (mode))
49594 {
49595 if (TARGET_VECTOR_PARALLEL_EXECUTION)
49596 return 2;
49597 else
49598 return 1;
49599 }
49600
49601 /* Scalar part. */
49602 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
49603 res = 2;
49604 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
49605 res = 2;
49606
49607 return res;
49608 }
49609
49610 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
49611 place emms and femms instructions. */
49612
49613 static enum machine_mode
49614 ix86_preferred_simd_mode (enum machine_mode mode)
49615 {
49616 if (!TARGET_SSE)
49617 return word_mode;
49618
49619 switch (mode)
49620 {
49621 case QImode:
49622 return TARGET_AVX512BW ? V64QImode :
49623 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
49624 case HImode:
49625 return TARGET_AVX512BW ? V32HImode :
49626 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
49627 case SImode:
49628 return TARGET_AVX512F ? V16SImode :
49629 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
49630 case DImode:
49631 return TARGET_AVX512F ? V8DImode :
49632 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
49633
49634 case SFmode:
49635 if (TARGET_AVX512F)
49636 return V16SFmode;
49637 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
49638 return V8SFmode;
49639 else
49640 return V4SFmode;
49641
49642 case DFmode:
49643 if (!TARGET_VECTORIZE_DOUBLE)
49644 return word_mode;
49645 else if (TARGET_AVX512F)
49646 return V8DFmode;
49647 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
49648 return V4DFmode;
49649 else if (TARGET_SSE2)
49650 return V2DFmode;
49651 /* FALLTHRU */
49652
49653 default:
49654 return word_mode;
49655 }
49656 }
49657
49658 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
49659 vectors. If AVX512F is enabled then try vectorizing with 512bit,
49660 256bit and 128bit vectors. */
49661
49662 static unsigned int
49663 ix86_autovectorize_vector_sizes (void)
49664 {
49665 return TARGET_AVX512F ? 64 | 32 | 16 :
49666 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
49667 }
49668
49669 \f
49670
49671 /* Return class of registers which could be used for pseudo of MODE
49672 and of class RCLASS for spilling instead of memory. Return NO_REGS
49673 if it is not possible or non-profitable. */
49674 static reg_class_t
49675 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
49676 {
49677 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
49678 && (mode == SImode || (TARGET_64BIT && mode == DImode))
49679 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
49680 return ALL_SSE_REGS;
49681 return NO_REGS;
49682 }
49683
49684 /* Implement targetm.vectorize.init_cost. */
49685
49686 static void *
49687 ix86_init_cost (struct loop *)
49688 {
49689 unsigned *cost = XNEWVEC (unsigned, 3);
49690 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
49691 return cost;
49692 }
49693
49694 /* Implement targetm.vectorize.add_stmt_cost. */
49695
49696 static unsigned
49697 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
49698 struct _stmt_vec_info *stmt_info, int misalign,
49699 enum vect_cost_model_location where)
49700 {
49701 unsigned *cost = (unsigned *) data;
49702 unsigned retval = 0;
49703
49704 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
49705 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
49706
49707 /* Statements in an inner loop relative to the loop being
49708 vectorized are weighted more heavily. The value here is
49709 arbitrary and could potentially be improved with analysis. */
49710 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
49711 count *= 50; /* FIXME. */
49712
49713 retval = (unsigned) (count * stmt_cost);
49714
49715 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
49716 for Silvermont as it has out of order integer pipeline and can execute
49717 2 scalar instruction per tick, but has in order SIMD pipeline. */
49718 if (TARGET_SILVERMONT || TARGET_INTEL)
49719 if (stmt_info && stmt_info->stmt)
49720 {
49721 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
49722 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
49723 retval = (retval * 17) / 10;
49724 }
49725
49726 cost[where] += retval;
49727
49728 return retval;
49729 }
49730
49731 /* Implement targetm.vectorize.finish_cost. */
49732
49733 static void
49734 ix86_finish_cost (void *data, unsigned *prologue_cost,
49735 unsigned *body_cost, unsigned *epilogue_cost)
49736 {
49737 unsigned *cost = (unsigned *) data;
49738 *prologue_cost = cost[vect_prologue];
49739 *body_cost = cost[vect_body];
49740 *epilogue_cost = cost[vect_epilogue];
49741 }
49742
49743 /* Implement targetm.vectorize.destroy_cost_data. */
49744
49745 static void
49746 ix86_destroy_cost_data (void *data)
49747 {
49748 free (data);
49749 }
49750
49751 /* Validate target specific memory model bits in VAL. */
49752
49753 static unsigned HOST_WIDE_INT
49754 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
49755 {
49756 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
49757 bool strong;
49758
49759 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
49760 |MEMMODEL_MASK)
49761 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
49762 {
49763 warning (OPT_Winvalid_memory_model,
49764 "Unknown architecture specific memory model");
49765 return MEMMODEL_SEQ_CST;
49766 }
49767 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
49768 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
49769 {
49770 warning (OPT_Winvalid_memory_model,
49771 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
49772 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
49773 }
49774 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
49775 {
49776 warning (OPT_Winvalid_memory_model,
49777 "HLE_RELEASE not used with RELEASE or stronger memory model");
49778 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
49779 }
49780 return val;
49781 }
49782
49783 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
49784 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
49785 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
49786 or number of vecsize_mangle variants that should be emitted. */
49787
49788 static int
49789 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
49790 struct cgraph_simd_clone *clonei,
49791 tree base_type, int num)
49792 {
49793 int ret = 1;
49794
49795 if (clonei->simdlen
49796 && (clonei->simdlen < 2
49797 || clonei->simdlen > 16
49798 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
49799 {
49800 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49801 "unsupported simdlen %d", clonei->simdlen);
49802 return 0;
49803 }
49804
49805 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
49806 if (TREE_CODE (ret_type) != VOID_TYPE)
49807 switch (TYPE_MODE (ret_type))
49808 {
49809 case QImode:
49810 case HImode:
49811 case SImode:
49812 case DImode:
49813 case SFmode:
49814 case DFmode:
49815 /* case SCmode: */
49816 /* case DCmode: */
49817 break;
49818 default:
49819 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49820 "unsupported return type %qT for simd\n", ret_type);
49821 return 0;
49822 }
49823
49824 tree t;
49825 int i;
49826
49827 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
49828 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
49829 switch (TYPE_MODE (TREE_TYPE (t)))
49830 {
49831 case QImode:
49832 case HImode:
49833 case SImode:
49834 case DImode:
49835 case SFmode:
49836 case DFmode:
49837 /* case SCmode: */
49838 /* case DCmode: */
49839 break;
49840 default:
49841 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49842 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
49843 return 0;
49844 }
49845
49846 if (clonei->cilk_elemental)
49847 {
49848 /* Parse here processor clause. If not present, default to 'b'. */
49849 clonei->vecsize_mangle = 'b';
49850 }
49851 else if (!TREE_PUBLIC (node->decl))
49852 {
49853 /* If the function isn't exported, we can pick up just one ISA
49854 for the clones. */
49855 if (TARGET_AVX2)
49856 clonei->vecsize_mangle = 'd';
49857 else if (TARGET_AVX)
49858 clonei->vecsize_mangle = 'c';
49859 else
49860 clonei->vecsize_mangle = 'b';
49861 ret = 1;
49862 }
49863 else
49864 {
49865 clonei->vecsize_mangle = "bcd"[num];
49866 ret = 3;
49867 }
49868 switch (clonei->vecsize_mangle)
49869 {
49870 case 'b':
49871 clonei->vecsize_int = 128;
49872 clonei->vecsize_float = 128;
49873 break;
49874 case 'c':
49875 clonei->vecsize_int = 128;
49876 clonei->vecsize_float = 256;
49877 break;
49878 case 'd':
49879 clonei->vecsize_int = 256;
49880 clonei->vecsize_float = 256;
49881 break;
49882 }
49883 if (clonei->simdlen == 0)
49884 {
49885 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
49886 clonei->simdlen = clonei->vecsize_int;
49887 else
49888 clonei->simdlen = clonei->vecsize_float;
49889 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
49890 if (clonei->simdlen > 16)
49891 clonei->simdlen = 16;
49892 }
49893 return ret;
49894 }
49895
49896 /* Add target attribute to SIMD clone NODE if needed. */
49897
49898 static void
49899 ix86_simd_clone_adjust (struct cgraph_node *node)
49900 {
49901 const char *str = NULL;
49902 gcc_assert (node->decl == cfun->decl);
49903 switch (node->simdclone->vecsize_mangle)
49904 {
49905 case 'b':
49906 if (!TARGET_SSE2)
49907 str = "sse2";
49908 break;
49909 case 'c':
49910 if (!TARGET_AVX)
49911 str = "avx";
49912 break;
49913 case 'd':
49914 if (!TARGET_AVX2)
49915 str = "avx2";
49916 break;
49917 default:
49918 gcc_unreachable ();
49919 }
49920 if (str == NULL)
49921 return;
49922 push_cfun (NULL);
49923 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
49924 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
49925 gcc_assert (ok);
49926 pop_cfun ();
49927 ix86_previous_fndecl = NULL_TREE;
49928 ix86_set_current_function (node->decl);
49929 }
49930
49931 /* If SIMD clone NODE can't be used in a vectorized loop
49932 in current function, return -1, otherwise return a badness of using it
49933 (0 if it is most desirable from vecsize_mangle point of view, 1
49934 slightly less desirable, etc.). */
49935
49936 static int
49937 ix86_simd_clone_usable (struct cgraph_node *node)
49938 {
49939 switch (node->simdclone->vecsize_mangle)
49940 {
49941 case 'b':
49942 if (!TARGET_SSE2)
49943 return -1;
49944 if (!TARGET_AVX)
49945 return 0;
49946 return TARGET_AVX2 ? 2 : 1;
49947 case 'c':
49948 if (!TARGET_AVX)
49949 return -1;
49950 return TARGET_AVX2 ? 1 : 0;
49951 break;
49952 case 'd':
49953 if (!TARGET_AVX2)
49954 return -1;
49955 return 0;
49956 default:
49957 gcc_unreachable ();
49958 }
49959 }
49960
49961 /* This function adjusts the unroll factor based on
49962 the hardware capabilities. For ex, bdver3 has
49963 a loop buffer which makes unrolling of smaller
49964 loops less important. This function decides the
49965 unroll factor using number of memory references
49966 (value 32 is used) as a heuristic. */
49967
49968 static unsigned
49969 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
49970 {
49971 basic_block *bbs;
49972 rtx_insn *insn;
49973 unsigned i;
49974 unsigned mem_count = 0;
49975
49976 if (!TARGET_ADJUST_UNROLL)
49977 return nunroll;
49978
49979 /* Count the number of memory references within the loop body.
49980 This value determines the unrolling factor for bdver3 and bdver4
49981 architectures. */
49982 subrtx_iterator::array_type array;
49983 bbs = get_loop_body (loop);
49984 for (i = 0; i < loop->num_nodes; i++)
49985 FOR_BB_INSNS (bbs[i], insn)
49986 if (NONDEBUG_INSN_P (insn))
49987 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
49988 if (const_rtx x = *iter)
49989 if (MEM_P (x))
49990 {
49991 enum machine_mode mode = GET_MODE (x);
49992 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
49993 if (n_words > 4)
49994 mem_count += 2;
49995 else
49996 mem_count += 1;
49997 }
49998 free (bbs);
49999
50000 if (mem_count && mem_count <=32)
50001 return 32/mem_count;
50002
50003 return nunroll;
50004 }
50005
50006
50007 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
50008
50009 static bool
50010 ix86_float_exceptions_rounding_supported_p (void)
50011 {
50012 /* For x87 floating point with standard excess precision handling,
50013 there is no adddf3 pattern (since x87 floating point only has
50014 XFmode operations) so the default hook implementation gets this
50015 wrong. */
50016 return TARGET_80387 || TARGET_SSE_MATH;
50017 }
50018
50019 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
50020
50021 static void
50022 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
50023 {
50024 if (!TARGET_80387 && !TARGET_SSE_MATH)
50025 return;
50026 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
50027 if (TARGET_80387)
50028 {
50029 tree fenv_index_type = build_index_type (size_int (6));
50030 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
50031 tree fenv_var = create_tmp_var (fenv_type, NULL);
50032 mark_addressable (fenv_var);
50033 tree fenv_ptr = build_pointer_type (fenv_type);
50034 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
50035 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
50036 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
50037 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
50038 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
50039 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
50040 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
50041 tree hold_fnclex = build_call_expr (fnclex, 0);
50042 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
50043 hold_fnclex);
50044 *clear = build_call_expr (fnclex, 0);
50045 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
50046 tree fnstsw_call = build_call_expr (fnstsw, 0);
50047 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
50048 sw_var, fnstsw_call);
50049 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
50050 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
50051 exceptions_var, exceptions_x87);
50052 *update = build2 (COMPOUND_EXPR, integer_type_node,
50053 sw_mod, update_mod);
50054 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
50055 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
50056 }
50057 if (TARGET_SSE_MATH)
50058 {
50059 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
50060 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
50061 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
50062 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
50063 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
50064 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
50065 mxcsr_orig_var, stmxcsr_hold_call);
50066 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
50067 mxcsr_orig_var,
50068 build_int_cst (unsigned_type_node, 0x1f80));
50069 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
50070 build_int_cst (unsigned_type_node, 0xffffffc0));
50071 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
50072 mxcsr_mod_var, hold_mod_val);
50073 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50074 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
50075 hold_assign_orig, hold_assign_mod);
50076 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
50077 ldmxcsr_hold_call);
50078 if (*hold)
50079 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
50080 else
50081 *hold = hold_all;
50082 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50083 if (*clear)
50084 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
50085 ldmxcsr_clear_call);
50086 else
50087 *clear = ldmxcsr_clear_call;
50088 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
50089 tree exceptions_sse = fold_convert (integer_type_node,
50090 stxmcsr_update_call);
50091 if (*update)
50092 {
50093 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
50094 exceptions_var, exceptions_sse);
50095 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
50096 exceptions_var, exceptions_mod);
50097 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
50098 exceptions_assign);
50099 }
50100 else
50101 *update = build2 (MODIFY_EXPR, integer_type_node,
50102 exceptions_var, exceptions_sse);
50103 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
50104 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50105 ldmxcsr_update_call);
50106 }
50107 tree atomic_feraiseexcept
50108 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
50109 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
50110 1, exceptions_var);
50111 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50112 atomic_feraiseexcept_call);
50113 }
50114
50115 /* Initialize the GCC target structure. */
50116 #undef TARGET_RETURN_IN_MEMORY
50117 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
50118
50119 #undef TARGET_LEGITIMIZE_ADDRESS
50120 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
50121
50122 #undef TARGET_ATTRIBUTE_TABLE
50123 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
50124 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
50125 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
50126 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50127 # undef TARGET_MERGE_DECL_ATTRIBUTES
50128 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
50129 #endif
50130
50131 #undef TARGET_COMP_TYPE_ATTRIBUTES
50132 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
50133
50134 #undef TARGET_INIT_BUILTINS
50135 #define TARGET_INIT_BUILTINS ix86_init_builtins
50136 #undef TARGET_BUILTIN_DECL
50137 #define TARGET_BUILTIN_DECL ix86_builtin_decl
50138 #undef TARGET_EXPAND_BUILTIN
50139 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
50140
50141 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
50142 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
50143 ix86_builtin_vectorized_function
50144
50145 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
50146 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
50147
50148 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
50149 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
50150
50151 #undef TARGET_VECTORIZE_BUILTIN_GATHER
50152 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
50153
50154 #undef TARGET_BUILTIN_RECIPROCAL
50155 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
50156
50157 #undef TARGET_ASM_FUNCTION_EPILOGUE
50158 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
50159
50160 #undef TARGET_ENCODE_SECTION_INFO
50161 #ifndef SUBTARGET_ENCODE_SECTION_INFO
50162 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
50163 #else
50164 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
50165 #endif
50166
50167 #undef TARGET_ASM_OPEN_PAREN
50168 #define TARGET_ASM_OPEN_PAREN ""
50169 #undef TARGET_ASM_CLOSE_PAREN
50170 #define TARGET_ASM_CLOSE_PAREN ""
50171
50172 #undef TARGET_ASM_BYTE_OP
50173 #define TARGET_ASM_BYTE_OP ASM_BYTE
50174
50175 #undef TARGET_ASM_ALIGNED_HI_OP
50176 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
50177 #undef TARGET_ASM_ALIGNED_SI_OP
50178 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
50179 #ifdef ASM_QUAD
50180 #undef TARGET_ASM_ALIGNED_DI_OP
50181 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
50182 #endif
50183
50184 #undef TARGET_PROFILE_BEFORE_PROLOGUE
50185 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
50186
50187 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
50188 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
50189
50190 #undef TARGET_ASM_UNALIGNED_HI_OP
50191 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
50192 #undef TARGET_ASM_UNALIGNED_SI_OP
50193 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
50194 #undef TARGET_ASM_UNALIGNED_DI_OP
50195 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
50196
50197 #undef TARGET_PRINT_OPERAND
50198 #define TARGET_PRINT_OPERAND ix86_print_operand
50199 #undef TARGET_PRINT_OPERAND_ADDRESS
50200 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
50201 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
50202 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
50203 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
50204 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
50205
50206 #undef TARGET_SCHED_INIT_GLOBAL
50207 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
50208 #undef TARGET_SCHED_ADJUST_COST
50209 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
50210 #undef TARGET_SCHED_ISSUE_RATE
50211 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
50212 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
50213 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
50214 ia32_multipass_dfa_lookahead
50215 #undef TARGET_SCHED_MACRO_FUSION_P
50216 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
50217 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
50218 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
50219
50220 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
50221 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
50222
50223 #undef TARGET_MEMMODEL_CHECK
50224 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
50225
50226 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
50227 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
50228
50229 #ifdef HAVE_AS_TLS
50230 #undef TARGET_HAVE_TLS
50231 #define TARGET_HAVE_TLS true
50232 #endif
50233 #undef TARGET_CANNOT_FORCE_CONST_MEM
50234 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
50235 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
50236 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
50237
50238 #undef TARGET_DELEGITIMIZE_ADDRESS
50239 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
50240
50241 #undef TARGET_MS_BITFIELD_LAYOUT_P
50242 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
50243
50244 #if TARGET_MACHO
50245 #undef TARGET_BINDS_LOCAL_P
50246 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
50247 #endif
50248 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50249 #undef TARGET_BINDS_LOCAL_P
50250 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
50251 #endif
50252
50253 #undef TARGET_ASM_OUTPUT_MI_THUNK
50254 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
50255 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
50256 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
50257
50258 #undef TARGET_ASM_FILE_START
50259 #define TARGET_ASM_FILE_START x86_file_start
50260
50261 #undef TARGET_OPTION_OVERRIDE
50262 #define TARGET_OPTION_OVERRIDE ix86_option_override
50263
50264 #undef TARGET_REGISTER_MOVE_COST
50265 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
50266 #undef TARGET_MEMORY_MOVE_COST
50267 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
50268 #undef TARGET_RTX_COSTS
50269 #define TARGET_RTX_COSTS ix86_rtx_costs
50270 #undef TARGET_ADDRESS_COST
50271 #define TARGET_ADDRESS_COST ix86_address_cost
50272
50273 #undef TARGET_FIXED_CONDITION_CODE_REGS
50274 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
50275 #undef TARGET_CC_MODES_COMPATIBLE
50276 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
50277
50278 #undef TARGET_MACHINE_DEPENDENT_REORG
50279 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
50280
50281 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
50282 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
50283
50284 #undef TARGET_BUILD_BUILTIN_VA_LIST
50285 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
50286
50287 #undef TARGET_FOLD_BUILTIN
50288 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
50289
50290 #undef TARGET_COMPARE_VERSION_PRIORITY
50291 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
50292
50293 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
50294 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
50295 ix86_generate_version_dispatcher_body
50296
50297 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
50298 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
50299 ix86_get_function_versions_dispatcher
50300
50301 #undef TARGET_ENUM_VA_LIST_P
50302 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
50303
50304 #undef TARGET_FN_ABI_VA_LIST
50305 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
50306
50307 #undef TARGET_CANONICAL_VA_LIST_TYPE
50308 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
50309
50310 #undef TARGET_EXPAND_BUILTIN_VA_START
50311 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
50312
50313 #undef TARGET_MD_ASM_CLOBBERS
50314 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
50315
50316 #undef TARGET_PROMOTE_PROTOTYPES
50317 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
50318 #undef TARGET_SETUP_INCOMING_VARARGS
50319 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
50320 #undef TARGET_MUST_PASS_IN_STACK
50321 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
50322 #undef TARGET_FUNCTION_ARG_ADVANCE
50323 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
50324 #undef TARGET_FUNCTION_ARG
50325 #define TARGET_FUNCTION_ARG ix86_function_arg
50326 #undef TARGET_INIT_PIC_REG
50327 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
50328 #undef TARGET_USE_PSEUDO_PIC_REG
50329 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
50330 #undef TARGET_FUNCTION_ARG_BOUNDARY
50331 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50332 #undef TARGET_PASS_BY_REFERENCE
50333 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50334 #undef TARGET_INTERNAL_ARG_POINTER
50335 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50336 #undef TARGET_UPDATE_STACK_BOUNDARY
50337 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50338 #undef TARGET_GET_DRAP_RTX
50339 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50340 #undef TARGET_STRICT_ARGUMENT_NAMING
50341 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50342 #undef TARGET_STATIC_CHAIN
50343 #define TARGET_STATIC_CHAIN ix86_static_chain
50344 #undef TARGET_TRAMPOLINE_INIT
50345 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50346 #undef TARGET_RETURN_POPS_ARGS
50347 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50348
50349 #undef TARGET_LEGITIMATE_COMBINED_INSN
50350 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50351
50352 #undef TARGET_ASAN_SHADOW_OFFSET
50353 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50354
50355 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50356 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50357
50358 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50359 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50360
50361 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50362 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50363
50364 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
50365 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
50366 ix86_libgcc_floating_mode_supported_p
50367
50368 #undef TARGET_C_MODE_FOR_SUFFIX
50369 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50370
50371 #ifdef HAVE_AS_TLS
50372 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50373 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50374 #endif
50375
50376 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50377 #undef TARGET_INSERT_ATTRIBUTES
50378 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50379 #endif
50380
50381 #undef TARGET_MANGLE_TYPE
50382 #define TARGET_MANGLE_TYPE ix86_mangle_type
50383
50384 #if !TARGET_MACHO
50385 #undef TARGET_STACK_PROTECT_FAIL
50386 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50387 #endif
50388
50389 #undef TARGET_FUNCTION_VALUE
50390 #define TARGET_FUNCTION_VALUE ix86_function_value
50391
50392 #undef TARGET_FUNCTION_VALUE_REGNO_P
50393 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50394
50395 #undef TARGET_PROMOTE_FUNCTION_MODE
50396 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50397
50398 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50399 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50400
50401 #undef TARGET_INSTANTIATE_DECLS
50402 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50403
50404 #undef TARGET_SECONDARY_RELOAD
50405 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50406
50407 #undef TARGET_CLASS_MAX_NREGS
50408 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50409
50410 #undef TARGET_PREFERRED_RELOAD_CLASS
50411 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50412 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50413 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50414 #undef TARGET_CLASS_LIKELY_SPILLED_P
50415 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50416
50417 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50418 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50419 ix86_builtin_vectorization_cost
50420 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50421 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50422 ix86_vectorize_vec_perm_const_ok
50423 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50424 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50425 ix86_preferred_simd_mode
50426 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50427 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50428 ix86_autovectorize_vector_sizes
50429 #undef TARGET_VECTORIZE_INIT_COST
50430 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50431 #undef TARGET_VECTORIZE_ADD_STMT_COST
50432 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50433 #undef TARGET_VECTORIZE_FINISH_COST
50434 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50435 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50436 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50437
50438 #undef TARGET_SET_CURRENT_FUNCTION
50439 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50440
50441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50442 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50443
50444 #undef TARGET_OPTION_SAVE
50445 #define TARGET_OPTION_SAVE ix86_function_specific_save
50446
50447 #undef TARGET_OPTION_RESTORE
50448 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50449
50450 #undef TARGET_OPTION_PRINT
50451 #define TARGET_OPTION_PRINT ix86_function_specific_print
50452
50453 #undef TARGET_OPTION_FUNCTION_VERSIONS
50454 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
50455
50456 #undef TARGET_CAN_INLINE_P
50457 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50458
50459 #undef TARGET_EXPAND_TO_RTL_HOOK
50460 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
50461
50462 #undef TARGET_LEGITIMATE_ADDRESS_P
50463 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50464
50465 #undef TARGET_LRA_P
50466 #define TARGET_LRA_P hook_bool_void_true
50467
50468 #undef TARGET_REGISTER_PRIORITY
50469 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50470
50471 #undef TARGET_REGISTER_USAGE_LEVELING_P
50472 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50473
50474 #undef TARGET_LEGITIMATE_CONSTANT_P
50475 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50476
50477 #undef TARGET_FRAME_POINTER_REQUIRED
50478 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50479
50480 #undef TARGET_CAN_ELIMINATE
50481 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50482
50483 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50484 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50485
50486 #undef TARGET_ASM_CODE_END
50487 #define TARGET_ASM_CODE_END ix86_code_end
50488
50489 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50490 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50491
50492 #if TARGET_MACHO
50493 #undef TARGET_INIT_LIBFUNCS
50494 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
50495 #endif
50496
50497 #undef TARGET_LOOP_UNROLL_ADJUST
50498 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50499
50500 #undef TARGET_SPILL_CLASS
50501 #define TARGET_SPILL_CLASS ix86_spill_class
50502
50503 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50504 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50505 ix86_simd_clone_compute_vecsize_and_simdlen
50506
50507 #undef TARGET_SIMD_CLONE_ADJUST
50508 #define TARGET_SIMD_CLONE_ADJUST \
50509 ix86_simd_clone_adjust
50510
50511 #undef TARGET_SIMD_CLONE_USABLE
50512 #define TARGET_SIMD_CLONE_USABLE \
50513 ix86_simd_clone_usable
50514
50515 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50516 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50517 ix86_float_exceptions_rounding_supported_p
50518
50519 #undef TARGET_MODE_EMIT
50520 #define TARGET_MODE_EMIT ix86_emit_mode_set
50521
50522 #undef TARGET_MODE_NEEDED
50523 #define TARGET_MODE_NEEDED ix86_mode_needed
50524
50525 #undef TARGET_MODE_AFTER
50526 #define TARGET_MODE_AFTER ix86_mode_after
50527
50528 #undef TARGET_MODE_ENTRY
50529 #define TARGET_MODE_ENTRY ix86_mode_entry
50530
50531 #undef TARGET_MODE_EXIT
50532 #define TARGET_MODE_EXIT ix86_mode_exit
50533
50534 #undef TARGET_MODE_PRIORITY
50535 #define TARGET_MODE_PRIORITY ix86_mode_priority
50536
50537 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50538 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50539
50540 struct gcc_target targetm = TARGET_INITIALIZER;
50541 \f
50542 #include "gt-i386.h"