AVX-512. Update float unspecs: storeu, rcp14, rsqrt14, scalef, getexp, fixupimm,...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3352 #endif
3353 }
3354 #endif
3355
3356 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3357 {
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_64 for TARGET_X32. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3362 }
3363 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3365 | OPTION_MASK_ABI_X32
3366 | OPTION_MASK_ABI_64);
3367 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3368 {
3369 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3370 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3371 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3372 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3373 }
3374
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3378
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3381 #endif
3382
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_flag_pic = 2;
3386
3387 /* Need to check -mtune=generic first. */
3388 if (opts->x_ix86_tune_string)
3389 {
3390 /* As special support for cross compilers we read -mtune=native
3391 as -mtune=generic. With native compilers we won't see the
3392 -mtune=native, as it was changed by the driver. */
3393 if (!strcmp (opts->x_ix86_tune_string, "native"))
3394 {
3395 opts->x_ix86_tune_string = "generic";
3396 }
3397 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3398 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3399 "%stune=k8%s or %stune=generic%s instead as appropriate",
3400 prefix, suffix, prefix, suffix, prefix, suffix);
3401 }
3402 else
3403 {
3404 if (opts->x_ix86_arch_string)
3405 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3406 if (!opts->x_ix86_tune_string)
3407 {
3408 opts->x_ix86_tune_string
3409 = processor_target_table[TARGET_CPU_DEFAULT].name;
3410 ix86_tune_defaulted = 1;
3411 }
3412
3413 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3414 or defaulted. We need to use a sensible tune option. */
3415 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3416 {
3417 opts->x_ix86_tune_string = "generic";
3418 }
3419 }
3420
3421 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3422 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3423 {
3424 /* rep; movq isn't available in 32-bit code. */
3425 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3426 opts->x_ix86_stringop_alg = no_stringop;
3427 }
3428
3429 if (!opts->x_ix86_arch_string)
3430 opts->x_ix86_arch_string
3431 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3432 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3433 else
3434 ix86_arch_specified = 1;
3435
3436 if (opts_set->x_ix86_pmode)
3437 {
3438 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_SI)
3440 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3441 && opts->x_ix86_pmode == PMODE_DI))
3442 error ("address mode %qs not supported in the %s bit mode",
3443 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3445 }
3446 else
3447 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3448 ? PMODE_DI : PMODE_SI;
3449
3450 if (!opts_set->x_ix86_abi)
3451 opts->x_ix86_abi = DEFAULT_ABI;
3452
3453 /* For targets using ms ABI enable ms-extensions, if not
3454 explicit turned off. For non-ms ABI we turn off this
3455 option. */
3456 if (!opts_set->x_flag_ms_extensions)
3457 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3458
3459 if (opts_set->x_ix86_cmodel)
3460 {
3461 switch (opts->x_ix86_cmodel)
3462 {
3463 case CM_SMALL:
3464 case CM_SMALL_PIC:
3465 if (opts->x_flag_pic)
3466 opts->x_ix86_cmodel = CM_SMALL_PIC;
3467 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in the %s bit mode",
3469 "small", "32");
3470 break;
3471
3472 case CM_MEDIUM:
3473 case CM_MEDIUM_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "medium", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "medium");
3482 break;
3483
3484 case CM_LARGE:
3485 case CM_LARGE_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_LARGE_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "large", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "large");
3494 break;
3495
3496 case CM_32:
3497 if (opts->x_flag_pic)
3498 error ("code model %s does not support PIC mode", "32");
3499 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "32", "64");
3502 break;
3503
3504 case CM_KERNEL:
3505 if (opts->x_flag_pic)
3506 {
3507 error ("code model %s does not support PIC mode", "kernel");
3508 opts->x_ix86_cmodel = CM_32;
3509 }
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "kernel", "32");
3513 break;
3514
3515 default:
3516 gcc_unreachable ();
3517 }
3518 }
3519 else
3520 {
3521 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3522 use of rip-relative addressing. This eliminates fixups that
3523 would otherwise be needed if this object is to be placed in a
3524 DLL, and is essentially just as efficient as direct addressing. */
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3526 && (TARGET_RDOS || TARGET_PECOFF))
3527 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3528 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3529 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3530 else
3531 opts->x_ix86_cmodel = CM_32;
3532 }
3533 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3534 {
3535 error ("-masm=intel not supported in this configuration");
3536 opts->x_ix86_asm_dialect = ASM_ATT;
3537 }
3538 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3539 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3540 sorry ("%i-bit mode not compiled in",
3541 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3542
3543 for (i = 0; i < pta_size; i++)
3544 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3545 {
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_arch = processor_alias_table[i].processor;
3548 /* Default cpu tuning to the architecture. */
3549 ix86_tune = ix86_arch;
3550
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && !(processor_alias_table[i].flags & PTA_64BIT))
3553 error ("CPU you selected does not support x86-64 "
3554 "instruction set");
3555
3556 if (processor_alias_table[i].flags & PTA_MMX
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3559 if (processor_alias_table[i].flags & PTA_3DNOW
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3562 if (processor_alias_table[i].flags & PTA_3DNOW_A
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3565 if (processor_alias_table[i].flags & PTA_SSE
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3568 if (processor_alias_table[i].flags & PTA_SSE2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3571 if (processor_alias_table[i].flags & PTA_SSE3
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3574 if (processor_alias_table[i].flags & PTA_SSSE3
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3577 if (processor_alias_table[i].flags & PTA_SSE4_1
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3580 if (processor_alias_table[i].flags & PTA_SSE4_2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3583 if (processor_alias_table[i].flags & PTA_AVX
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3586 if (processor_alias_table[i].flags & PTA_AVX2
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3589 if (processor_alias_table[i].flags & PTA_FMA
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3592 if (processor_alias_table[i].flags & PTA_SSE4A
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3595 if (processor_alias_table[i].flags & PTA_FMA4
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3598 if (processor_alias_table[i].flags & PTA_XOP
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3601 if (processor_alias_table[i].flags & PTA_LWP
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3604 if (processor_alias_table[i].flags & PTA_ABM
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3607 if (processor_alias_table[i].flags & PTA_BMI
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3610 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3613 if (processor_alias_table[i].flags & PTA_TBM
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3616 if (processor_alias_table[i].flags & PTA_BMI2
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3619 if (processor_alias_table[i].flags & PTA_CX16
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3622 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3625 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3626 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3629 if (processor_alias_table[i].flags & PTA_MOVBE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3632 if (processor_alias_table[i].flags & PTA_AES
3633 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3634 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3635 if (processor_alias_table[i].flags & PTA_SHA
3636 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3637 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3638 if (processor_alias_table[i].flags & PTA_PCLMUL
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3641 if (processor_alias_table[i].flags & PTA_FSGSBASE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3644 if (processor_alias_table[i].flags & PTA_RDRND
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3647 if (processor_alias_table[i].flags & PTA_F16C
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3650 if (processor_alias_table[i].flags & PTA_RTM
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3653 if (processor_alias_table[i].flags & PTA_HLE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3656 if (processor_alias_table[i].flags & PTA_PRFCHW
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3659 if (processor_alias_table[i].flags & PTA_RDSEED
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3662 if (processor_alias_table[i].flags & PTA_ADX
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3665 if (processor_alias_table[i].flags & PTA_FXSR
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3668 if (processor_alias_table[i].flags & PTA_XSAVE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3671 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3674 if (processor_alias_table[i].flags & PTA_AVX512F
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3677 if (processor_alias_table[i].flags & PTA_AVX512ER
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3680 if (processor_alias_table[i].flags & PTA_AVX512PF
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3683 if (processor_alias_table[i].flags & PTA_AVX512CD
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3686 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3689 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3692 if (processor_alias_table[i].flags & PTA_XSAVEC
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3695 if (processor_alias_table[i].flags & PTA_XSAVES
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3698 if (processor_alias_table[i].flags & PTA_AVX512DQ
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3701 if (processor_alias_table[i].flags & PTA_AVX512BW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3704 if (processor_alias_table[i].flags & PTA_AVX512VL
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3707 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3708 x86_prefetch_sse = true;
3709
3710 break;
3711 }
3712
3713 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3714 error ("generic CPU can be used only for %stune=%s %s",
3715 prefix, suffix, sw);
3716 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3717 error ("intel CPU can be used only for %stune=%s %s",
3718 prefix, suffix, sw);
3719 else if (i == pta_size)
3720 error ("bad value (%s) for %sarch=%s %s",
3721 opts->x_ix86_arch_string, prefix, suffix, sw);
3722
3723 ix86_arch_mask = 1u << ix86_arch;
3724 for (i = 0; i < X86_ARCH_LAST; ++i)
3725 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3726
3727 for (i = 0; i < pta_size; i++)
3728 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3729 {
3730 ix86_schedule = processor_alias_table[i].schedule;
3731 ix86_tune = processor_alias_table[i].processor;
3732 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3733 {
3734 if (!(processor_alias_table[i].flags & PTA_64BIT))
3735 {
3736 if (ix86_tune_defaulted)
3737 {
3738 opts->x_ix86_tune_string = "x86-64";
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string,
3741 processor_alias_table[i].name))
3742 break;
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3745 }
3746 else
3747 error ("CPU you selected does not support x86-64 "
3748 "instruction set");
3749 }
3750 }
3751 /* Intel CPUs have always interpreted SSE prefetch instructions as
3752 NOPs; so, we can enable SSE prefetch instructions even when
3753 -mtune (rather than -march) points us to a processor that has them.
3754 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3755 higher processors. */
3756 if (TARGET_CMOV
3757 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3758 x86_prefetch_sse = true;
3759 break;
3760 }
3761
3762 if (ix86_tune_specified && i == pta_size)
3763 error ("bad value (%s) for %stune=%s %s",
3764 opts->x_ix86_tune_string, prefix, suffix, sw);
3765
3766 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3767
3768 #ifndef USE_IX86_FRAME_POINTER
3769 #define USE_IX86_FRAME_POINTER 0
3770 #endif
3771
3772 #ifndef USE_X86_64_FRAME_POINTER
3773 #define USE_X86_64_FRAME_POINTER 0
3774 #endif
3775
3776 /* Set the default values for switches whose default depends on TARGET_64BIT
3777 in case they weren't overwritten by command line options. */
3778 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3779 {
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3782 if (opts->x_flag_asynchronous_unwind_tables
3783 && !opts_set->x_flag_unwind_tables
3784 && TARGET_64BIT_MS_ABI)
3785 opts->x_flag_unwind_tables = 1;
3786 if (opts->x_flag_asynchronous_unwind_tables == 2)
3787 opts->x_flag_unwind_tables
3788 = opts->x_flag_asynchronous_unwind_tables = 1;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = 0;
3791 }
3792 else
3793 {
3794 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3795 opts->x_flag_omit_frame_pointer
3796 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3797 if (opts->x_flag_asynchronous_unwind_tables == 2)
3798 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3799 if (opts->x_flag_pcc_struct_return == 2)
3800 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3801 }
3802
3803 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3804 if (opts->x_optimize_size)
3805 ix86_cost = &ix86_size_cost;
3806 else
3807 ix86_cost = ix86_tune_cost;
3808
3809 /* Arrange to set up i386_stack_locals for all functions. */
3810 init_machine_status = ix86_init_machine_status;
3811
3812 /* Validate -mregparm= value. */
3813 if (opts_set->x_ix86_regparm)
3814 {
3815 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 warning (0, "-mregparm is ignored in 64-bit mode");
3817 if (opts->x_ix86_regparm > REGPARM_MAX)
3818 {
3819 error ("-mregparm=%d is not between 0 and %d",
3820 opts->x_ix86_regparm, REGPARM_MAX);
3821 opts->x_ix86_regparm = 0;
3822 }
3823 }
3824 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3825 opts->x_ix86_regparm = REGPARM_MAX;
3826
3827 /* Default align_* from the processor table. */
3828 if (opts->x_align_loops == 0)
3829 {
3830 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3831 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3832 }
3833 if (opts->x_align_jumps == 0)
3834 {
3835 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3836 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3837 }
3838 if (opts->x_align_functions == 0)
3839 {
3840 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3841 }
3842
3843 /* Provide default for -mbranch-cost= value. */
3844 if (!opts_set->x_ix86_branch_cost)
3845 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3846
3847 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3848 {
3849 opts->x_target_flags
3850 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3851
3852 /* Enable by default the SSE and MMX builtins. Do allow the user to
3853 explicitly disable any of these. In particular, disabling SSE and
3854 MMX for kernel code is extremely useful. */
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3858 | TARGET_SUBTARGET64_ISA_DEFAULT)
3859 & ~opts->x_ix86_isa_flags_explicit);
3860
3861 if (TARGET_RTD_P (opts->x_target_flags))
3862 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3863 }
3864 else
3865 {
3866 opts->x_target_flags
3867 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3868
3869 if (!ix86_arch_specified)
3870 opts->x_ix86_isa_flags
3871 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3872
3873 /* i386 ABI does not specify red zone. It still makes sense to use it
3874 when programmer takes care to stack from being destroyed. */
3875 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3876 opts->x_target_flags |= MASK_NO_RED_ZONE;
3877 }
3878
3879 /* Keep nonleaf frame pointers. */
3880 if (opts->x_flag_omit_frame_pointer)
3881 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3882 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3883 opts->x_flag_omit_frame_pointer = 1;
3884
3885 /* If we're doing fast math, we don't care about comparison order
3886 wrt NaNs. This lets us use a shorter comparison sequence. */
3887 if (opts->x_flag_finite_math_only)
3888 opts->x_target_flags &= ~MASK_IEEE_FP;
3889
3890 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3891 since the insns won't need emulation. */
3892 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3893 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3894
3895 /* Likewise, if the target doesn't have a 387, or we've specified
3896 software floating point, don't use 387 inline intrinsics. */
3897 if (!TARGET_80387_P (opts->x_target_flags))
3898 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3899
3900 /* Turn on MMX builtins for -msse. */
3901 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_isa_flags
3903 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3904
3905 /* Enable SSE prefetch. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3907 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3908 x86_prefetch_sse = true;
3909
3910 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3911 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3912 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3913 opts->x_ix86_isa_flags
3914 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3915
3916 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3917 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3918 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3919 opts->x_ix86_isa_flags
3920 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3921
3922 /* Enable lzcnt instruction for -mabm. */
3923 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3926
3927 /* Validate -mpreferred-stack-boundary= value or default it to
3928 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3929 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3930 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3931 {
3932 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3933 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3934 int max = (TARGET_SEH ? 4 : 12);
3935
3936 if (opts->x_ix86_preferred_stack_boundary_arg < min
3937 || opts->x_ix86_preferred_stack_boundary_arg > max)
3938 {
3939 if (min == max)
3940 error ("-mpreferred-stack-boundary is not supported "
3941 "for this target");
3942 else
3943 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3944 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3945 }
3946 else
3947 ix86_preferred_stack_boundary
3948 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3949 }
3950
3951 /* Set the default value for -mstackrealign. */
3952 if (opts->x_ix86_force_align_arg_pointer == -1)
3953 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3954
3955 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3956
3957 /* Validate -mincoming-stack-boundary= value or default it to
3958 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3959 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3960 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3961 {
3962 if (opts->x_ix86_incoming_stack_boundary_arg
3963 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3964 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3965 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3966 opts->x_ix86_incoming_stack_boundary_arg,
3967 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3968 else
3969 {
3970 ix86_user_incoming_stack_boundary
3971 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3972 ix86_incoming_stack_boundary
3973 = ix86_user_incoming_stack_boundary;
3974 }
3975 }
3976
3977 /* Accept -msseregparm only if at least SSE support is enabled. */
3978 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3979 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3981
3982 if (opts_set->x_ix86_fpmath)
3983 {
3984 if (opts->x_ix86_fpmath & FPMATH_SSE)
3985 {
3986 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3987 {
3988 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3989 opts->x_ix86_fpmath = FPMATH_387;
3990 }
3991 else if ((opts->x_ix86_fpmath & FPMATH_387)
3992 && !TARGET_80387_P (opts->x_target_flags))
3993 {
3994 warning (0, "387 instruction set disabled, using SSE arithmetics");
3995 opts->x_ix86_fpmath = FPMATH_SSE;
3996 }
3997 }
3998 }
3999 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4000 fpmath=387. The second is however default at many targets since the
4001 extra 80bit precision of temporaries is considered to be part of ABI.
4002 Overwrite the default at least for -ffast-math.
4003 TODO: -mfpmath=both seems to produce same performing code with bit
4004 smaller binaries. It is however not clear if register allocation is
4005 ready for this setting.
4006 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4007 codegen. We may switch to 387 with -ffast-math for size optimized
4008 functions. */
4009 else if (fast_math_flags_set_p (&global_options)
4010 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4011 opts->x_ix86_fpmath = FPMATH_SSE;
4012 else
4013 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4014
4015 /* If the i387 is disabled, then do not return values in it. */
4016 if (!TARGET_80387_P (opts->x_target_flags))
4017 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4018
4019 /* Use external vectorized library in vectorizing intrinsics. */
4020 if (opts_set->x_ix86_veclibabi_type)
4021 switch (opts->x_ix86_veclibabi_type)
4022 {
4023 case ix86_veclibabi_type_svml:
4024 ix86_veclib_handler = ix86_veclibabi_svml;
4025 break;
4026
4027 case ix86_veclibabi_type_acml:
4028 ix86_veclib_handler = ix86_veclibabi_acml;
4029 break;
4030
4031 default:
4032 gcc_unreachable ();
4033 }
4034
4035 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4036 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 && !opts->x_optimize_size)
4038 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4039
4040 /* If stack probes are required, the space used for large function
4041 arguments on the stack must also be probed, so enable
4042 -maccumulate-outgoing-args so this happens in the prologue. */
4043 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4044 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4045 {
4046 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4047 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4048 "for correctness", prefix, suffix);
4049 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4050 }
4051
4052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4053 {
4054 char *p;
4055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4056 p = strchr (internal_label_prefix, 'X');
4057 internal_label_prefix_len = p - internal_label_prefix;
4058 *p = '\0';
4059 }
4060
4061 /* When scheduling description is not available, disable scheduler pass
4062 so it won't slow down the compilation and make x87 code slower. */
4063 if (!TARGET_SCHEDULE)
4064 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4065
4066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4067 ix86_tune_cost->simultaneous_prefetches,
4068 opts->x_param_values,
4069 opts_set->x_param_values);
4070 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4071 ix86_tune_cost->prefetch_block,
4072 opts->x_param_values,
4073 opts_set->x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4075 ix86_tune_cost->l1_cache_size,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4079 ix86_tune_cost->l2_cache_size,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082
4083 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4084 if (opts->x_flag_prefetch_loop_arrays < 0
4085 && HAVE_prefetch
4086 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4087 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4088 opts->x_flag_prefetch_loop_arrays = 1;
4089
4090 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4091 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4092 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4093 targetm.expand_builtin_va_start = NULL;
4094
4095 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4096 {
4097 ix86_gen_leave = gen_leave_rex64;
4098 if (Pmode == DImode)
4099 {
4100 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4101 ix86_gen_tls_local_dynamic_base_64
4102 = gen_tls_local_dynamic_base_64_di;
4103 }
4104 else
4105 {
4106 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4107 ix86_gen_tls_local_dynamic_base_64
4108 = gen_tls_local_dynamic_base_64_si;
4109 }
4110 }
4111 else
4112 ix86_gen_leave = gen_leave;
4113
4114 if (Pmode == DImode)
4115 {
4116 ix86_gen_add3 = gen_adddi3;
4117 ix86_gen_sub3 = gen_subdi3;
4118 ix86_gen_sub3_carry = gen_subdi3_carry;
4119 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4120 ix86_gen_andsp = gen_anddi3;
4121 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4122 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4123 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4124 ix86_gen_monitor = gen_sse3_monitor_di;
4125 }
4126 else
4127 {
4128 ix86_gen_add3 = gen_addsi3;
4129 ix86_gen_sub3 = gen_subsi3;
4130 ix86_gen_sub3_carry = gen_subsi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4132 ix86_gen_andsp = gen_andsi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4136 ix86_gen_monitor = gen_sse3_monitor_si;
4137 }
4138
4139 #ifdef USE_IX86_CLD
4140 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4141 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4143 #endif
4144
4145 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4146 {
4147 if (opts->x_flag_fentry > 0)
4148 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4149 "with -fpic");
4150 opts->x_flag_fentry = 0;
4151 }
4152 else if (TARGET_SEH)
4153 {
4154 if (opts->x_flag_fentry == 0)
4155 sorry ("-mno-fentry isn%'t compatible with SEH");
4156 opts->x_flag_fentry = 1;
4157 }
4158 else if (opts->x_flag_fentry < 0)
4159 {
4160 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 opts->x_flag_fentry = 1;
4162 #else
4163 opts->x_flag_fentry = 0;
4164 #endif
4165 }
4166
4167 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4168 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4169 AVX unaligned load/store. */
4170 if (!opts->x_optimize_size)
4171 {
4172 if (flag_expensive_optimizations
4173 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4174 opts->x_target_flags |= MASK_VZEROUPPER;
4175 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4176 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4177 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4178 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4179 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4180 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4181 /* Enable 128-bit AVX instruction generation
4182 for the auto-vectorizer. */
4183 if (TARGET_AVX128_OPTIMAL
4184 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4185 opts->x_target_flags |= MASK_PREFER_AVX128;
4186 }
4187
4188 if (opts->x_ix86_recip_name)
4189 {
4190 char *p = ASTRDUP (opts->x_ix86_recip_name);
4191 char *q;
4192 unsigned int mask, i;
4193 bool invert;
4194
4195 while ((q = strtok (p, ",")) != NULL)
4196 {
4197 p = NULL;
4198 if (*q == '!')
4199 {
4200 invert = true;
4201 q++;
4202 }
4203 else
4204 invert = false;
4205
4206 if (!strcmp (q, "default"))
4207 mask = RECIP_MASK_ALL;
4208 else
4209 {
4210 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4211 if (!strcmp (q, recip_options[i].string))
4212 {
4213 mask = recip_options[i].mask;
4214 break;
4215 }
4216
4217 if (i == ARRAY_SIZE (recip_options))
4218 {
4219 error ("unknown option for -mrecip=%s", q);
4220 invert = false;
4221 mask = RECIP_MASK_NONE;
4222 }
4223 }
4224
4225 opts->x_recip_mask_explicit |= mask;
4226 if (invert)
4227 opts->x_recip_mask &= ~mask;
4228 else
4229 opts->x_recip_mask |= mask;
4230 }
4231 }
4232
4233 if (TARGET_RECIP_P (opts->x_target_flags))
4234 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4235 else if (opts_set->x_target_flags & MASK_RECIP)
4236 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4237
4238 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4239 for 64-bit Bionic. */
4240 if (TARGET_HAS_BIONIC
4241 && !(opts_set->x_target_flags
4242 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4243 opts->x_target_flags |= (TARGET_64BIT
4244 ? MASK_LONG_DOUBLE_128
4245 : MASK_LONG_DOUBLE_64);
4246
4247 /* Only one of them can be active. */
4248 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4249 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4250
4251 /* Save the initial options in case the user does function specific
4252 options. */
4253 if (main_args_p)
4254 target_option_default_node = target_option_current_node
4255 = build_target_option_node (opts);
4256
4257 /* Handle stack protector */
4258 if (!opts_set->x_ix86_stack_protector_guard)
4259 opts->x_ix86_stack_protector_guard
4260 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4261
4262 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4263 if (opts->x_ix86_tune_memcpy_strategy)
4264 {
4265 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4266 ix86_parse_stringop_strategy_string (str, false);
4267 free (str);
4268 }
4269
4270 if (opts->x_ix86_tune_memset_strategy)
4271 {
4272 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4273 ix86_parse_stringop_strategy_string (str, true);
4274 free (str);
4275 }
4276 }
4277
4278 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4279
4280 static void
4281 ix86_option_override (void)
4282 {
4283 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4284 static struct register_pass_info insert_vzeroupper_info
4285 = { pass_insert_vzeroupper, "reload",
4286 1, PASS_POS_INSERT_AFTER
4287 };
4288
4289 ix86_option_override_internal (true, &global_options, &global_options_set);
4290
4291
4292 /* This needs to be done at start up. It's convenient to do it here. */
4293 register_pass (&insert_vzeroupper_info);
4294 }
4295
4296 /* Update register usage after having seen the compiler flags. */
4297
4298 static void
4299 ix86_conditional_register_usage (void)
4300 {
4301 int i, c_mask;
4302 unsigned int j;
4303
4304 /* The PIC register, if it exists, is fixed. */
4305 j = PIC_OFFSET_TABLE_REGNUM;
4306 if (j != INVALID_REGNUM)
4307 fixed_regs[j] = call_used_regs[j] = 1;
4308
4309 /* For 32-bit targets, squash the REX registers. */
4310 if (! TARGET_64BIT)
4311 {
4312 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4317 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4318 }
4319
4320 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4321 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4322 : TARGET_64BIT ? (1 << 2)
4323 : (1 << 1));
4324
4325 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4326
4327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4328 {
4329 /* Set/reset conditionally defined registers from
4330 CALL_USED_REGISTERS initializer. */
4331 if (call_used_regs[i] > 1)
4332 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4333
4334 /* Calculate registers of CLOBBERED_REGS register set
4335 as call used registers from GENERAL_REGS register set. */
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4337 && call_used_regs[i])
4338 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4339 }
4340
4341 /* If MMX is disabled, squash the registers. */
4342 if (! TARGET_MMX)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4345 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4346
4347 /* If SSE is disabled, squash the registers. */
4348 if (! TARGET_SSE)
4349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4350 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4352
4353 /* If the FPU is disabled, squash the registers. */
4354 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4358
4359 /* If AVX512F is disabled, squash the registers. */
4360 if (! TARGET_AVX512F)
4361 {
4362 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4364
4365 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4367 }
4368 }
4369
4370 \f
4371 /* Save the current options */
4372
4373 static void
4374 ix86_function_specific_save (struct cl_target_option *ptr,
4375 struct gcc_options *opts)
4376 {
4377 ptr->arch = ix86_arch;
4378 ptr->schedule = ix86_schedule;
4379 ptr->tune = ix86_tune;
4380 ptr->branch_cost = ix86_branch_cost;
4381 ptr->tune_defaulted = ix86_tune_defaulted;
4382 ptr->arch_specified = ix86_arch_specified;
4383 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4384 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4385 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4386 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4387 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4388 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4389 ptr->x_ix86_abi = opts->x_ix86_abi;
4390 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4391 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4392 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4393 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4394 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4395 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4396 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4397 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4398 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4399 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4400 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4401 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4402 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4403 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4404 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4405 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4406 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4407 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4408 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4409 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4410
4411 /* The fields are char but the variables are not; make sure the
4412 values fit in the fields. */
4413 gcc_assert (ptr->arch == ix86_arch);
4414 gcc_assert (ptr->schedule == ix86_schedule);
4415 gcc_assert (ptr->tune == ix86_tune);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4417 }
4418
4419 /* Restore the current options */
4420
4421 static void
4422 ix86_function_specific_restore (struct gcc_options *opts,
4423 struct cl_target_option *ptr)
4424 {
4425 enum processor_type old_tune = ix86_tune;
4426 enum processor_type old_arch = ix86_arch;
4427 unsigned int ix86_arch_mask;
4428 int i;
4429
4430 /* We don't change -fPIC. */
4431 opts->x_flag_pic = flag_pic;
4432
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 opts->x_ix86_branch_cost = ptr->branch_cost;
4437 ix86_tune_defaulted = ptr->tune_defaulted;
4438 ix86_arch_specified = ptr->arch_specified;
4439 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4440 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4441 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4442 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4443 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4444 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4445 opts->x_ix86_abi = ptr->x_ix86_abi;
4446 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4447 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4448 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4449 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4450 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4451 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4452 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4453 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4454 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4455 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4456 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4457 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4458 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4459 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4460 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4461 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4462 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4463 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4464 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4465 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4466
4467 /* Recreate the arch feature tests if the arch changed */
4468 if (old_arch != ix86_arch)
4469 {
4470 ix86_arch_mask = 1u << ix86_arch;
4471 for (i = 0; i < X86_ARCH_LAST; ++i)
4472 ix86_arch_features[i]
4473 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4474 }
4475
4476 /* Recreate the tune optimization tests */
4477 if (old_tune != ix86_tune)
4478 set_ix86_tune_features (ix86_tune, false);
4479 }
4480
4481 /* Print the current options */
4482
4483 static void
4484 ix86_function_specific_print (FILE *file, int indent,
4485 struct cl_target_option *ptr)
4486 {
4487 char *target_string
4488 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4489 NULL, NULL, ptr->x_ix86_fpmath, false);
4490
4491 gcc_assert (ptr->arch < PROCESSOR_max);
4492 fprintf (file, "%*sarch = %d (%s)\n",
4493 indent, "",
4494 ptr->arch, processor_target_table[ptr->arch].name);
4495
4496 gcc_assert (ptr->tune < PROCESSOR_max);
4497 fprintf (file, "%*stune = %d (%s)\n",
4498 indent, "",
4499 ptr->tune, processor_target_table[ptr->tune].name);
4500
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4502
4503 if (target_string)
4504 {
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4507 }
4508 }
4509
4510 \f
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4514
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4517 struct gcc_options *opts,
4518 struct gcc_options *opts_set,
4519 struct gcc_options *enum_opts_set)
4520 {
4521 char *next_optstr;
4522 bool ret = true;
4523
4524 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4525 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4526 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4527 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4528 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4529
4530 enum ix86_opt_type
4531 {
4532 ix86_opt_unknown,
4533 ix86_opt_yes,
4534 ix86_opt_no,
4535 ix86_opt_str,
4536 ix86_opt_enum,
4537 ix86_opt_isa
4538 };
4539
4540 static const struct
4541 {
4542 const char *string;
4543 size_t len;
4544 enum ix86_opt_type type;
4545 int opt;
4546 int mask;
4547 } attrs[] = {
4548 /* isa options */
4549 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4550 IX86_ATTR_ISA ("abm", OPT_mabm),
4551 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4552 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4553 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4554 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4555 IX86_ATTR_ISA ("aes", OPT_maes),
4556 IX86_ATTR_ISA ("sha", OPT_msha),
4557 IX86_ATTR_ISA ("avx", OPT_mavx),
4558 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4559 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4560 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4561 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4562 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4563 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4564 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4565 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4566 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4567 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4568 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4569 IX86_ATTR_ISA ("sse", OPT_msse),
4570 IX86_ATTR_ISA ("sse2", OPT_msse2),
4571 IX86_ATTR_ISA ("sse3", OPT_msse3),
4572 IX86_ATTR_ISA ("sse4", OPT_msse4),
4573 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4574 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4575 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4576 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4577 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4578 IX86_ATTR_ISA ("fma", OPT_mfma),
4579 IX86_ATTR_ISA ("xop", OPT_mxop),
4580 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4581 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4582 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4583 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4584 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4585 IX86_ATTR_ISA ("hle", OPT_mhle),
4586 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4587 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4588 IX86_ATTR_ISA ("adx", OPT_madx),
4589 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4590 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4591 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4592 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4593 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4594 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4595 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4596
4597 /* enum options */
4598 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4599
4600 /* string options */
4601 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4602 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4603
4604 /* flag options */
4605 IX86_ATTR_YES ("cld",
4606 OPT_mcld,
4607 MASK_CLD),
4608
4609 IX86_ATTR_NO ("fancy-math-387",
4610 OPT_mfancy_math_387,
4611 MASK_NO_FANCY_MATH_387),
4612
4613 IX86_ATTR_YES ("ieee-fp",
4614 OPT_mieee_fp,
4615 MASK_IEEE_FP),
4616
4617 IX86_ATTR_YES ("inline-all-stringops",
4618 OPT_minline_all_stringops,
4619 MASK_INLINE_ALL_STRINGOPS),
4620
4621 IX86_ATTR_YES ("inline-stringops-dynamically",
4622 OPT_minline_stringops_dynamically,
4623 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4624
4625 IX86_ATTR_NO ("align-stringops",
4626 OPT_mno_align_stringops,
4627 MASK_NO_ALIGN_STRINGOPS),
4628
4629 IX86_ATTR_YES ("recip",
4630 OPT_mrecip,
4631 MASK_RECIP),
4632
4633 };
4634
4635 /* If this is a list, recurse to get the options. */
4636 if (TREE_CODE (args) == TREE_LIST)
4637 {
4638 bool ret = true;
4639
4640 for (; args; args = TREE_CHAIN (args))
4641 if (TREE_VALUE (args)
4642 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4643 p_strings, opts, opts_set,
4644 enum_opts_set))
4645 ret = false;
4646
4647 return ret;
4648 }
4649
4650 else if (TREE_CODE (args) != STRING_CST)
4651 {
4652 error ("attribute %<target%> argument not a string");
4653 return false;
4654 }
4655
4656 /* Handle multiple arguments separated by commas. */
4657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4658
4659 while (next_optstr && *next_optstr != '\0')
4660 {
4661 char *p = next_optstr;
4662 char *orig_p = p;
4663 char *comma = strchr (next_optstr, ',');
4664 const char *opt_string;
4665 size_t len, opt_len;
4666 int opt;
4667 bool opt_set_p;
4668 char ch;
4669 unsigned i;
4670 enum ix86_opt_type type = ix86_opt_unknown;
4671 int mask = 0;
4672
4673 if (comma)
4674 {
4675 *comma = '\0';
4676 len = comma - next_optstr;
4677 next_optstr = comma + 1;
4678 }
4679 else
4680 {
4681 len = strlen (p);
4682 next_optstr = NULL;
4683 }
4684
4685 /* Recognize no-xxx. */
4686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4687 {
4688 opt_set_p = false;
4689 p += 3;
4690 len -= 3;
4691 }
4692 else
4693 opt_set_p = true;
4694
4695 /* Find the option. */
4696 ch = *p;
4697 opt = N_OPTS;
4698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4699 {
4700 type = attrs[i].type;
4701 opt_len = attrs[i].len;
4702 if (ch == attrs[i].string[0]
4703 && ((type != ix86_opt_str && type != ix86_opt_enum)
4704 ? len == opt_len
4705 : len > opt_len)
4706 && memcmp (p, attrs[i].string, opt_len) == 0)
4707 {
4708 opt = attrs[i].opt;
4709 mask = attrs[i].mask;
4710 opt_string = attrs[i].string;
4711 break;
4712 }
4713 }
4714
4715 /* Process the option. */
4716 if (opt == N_OPTS)
4717 {
4718 error ("attribute(target(\"%s\")) is unknown", orig_p);
4719 ret = false;
4720 }
4721
4722 else if (type == ix86_opt_isa)
4723 {
4724 struct cl_decoded_option decoded;
4725
4726 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4727 ix86_handle_option (opts, opts_set,
4728 &decoded, input_location);
4729 }
4730
4731 else if (type == ix86_opt_yes || type == ix86_opt_no)
4732 {
4733 if (type == ix86_opt_no)
4734 opt_set_p = !opt_set_p;
4735
4736 if (opt_set_p)
4737 opts->x_target_flags |= mask;
4738 else
4739 opts->x_target_flags &= ~mask;
4740 }
4741
4742 else if (type == ix86_opt_str)
4743 {
4744 if (p_strings[opt])
4745 {
4746 error ("option(\"%s\") was already specified", opt_string);
4747 ret = false;
4748 }
4749 else
4750 p_strings[opt] = xstrdup (p + opt_len);
4751 }
4752
4753 else if (type == ix86_opt_enum)
4754 {
4755 bool arg_ok;
4756 int value;
4757
4758 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4759 if (arg_ok)
4760 set_option (opts, enum_opts_set, opt, value,
4761 p + opt_len, DK_UNSPECIFIED, input_location,
4762 global_dc);
4763 else
4764 {
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4767 }
4768 }
4769
4770 else
4771 gcc_unreachable ();
4772 }
4773
4774 return ret;
4775 }
4776
4777 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4778
4779 tree
4780 ix86_valid_target_attribute_tree (tree args,
4781 struct gcc_options *opts,
4782 struct gcc_options *opts_set)
4783 {
4784 const char *orig_arch_string = opts->x_ix86_arch_string;
4785 const char *orig_tune_string = opts->x_ix86_tune_string;
4786 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4787 int orig_tune_defaulted = ix86_tune_defaulted;
4788 int orig_arch_specified = ix86_arch_specified;
4789 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4790 tree t = NULL_TREE;
4791 int i;
4792 struct cl_target_option *def
4793 = TREE_TARGET_OPTION (target_option_default_node);
4794 struct gcc_options enum_opts_set;
4795
4796 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4797
4798 /* Process each of the options on the chain. */
4799 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4800 opts_set, &enum_opts_set))
4801 return error_mark_node;
4802
4803 /* If the changed options are different from the default, rerun
4804 ix86_option_override_internal, and then save the options away.
4805 The string options are are attribute options, and will be undone
4806 when we copy the save structure. */
4807 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4808 || opts->x_target_flags != def->x_target_flags
4809 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4810 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4811 || enum_opts_set.x_ix86_fpmath)
4812 {
4813 /* If we are using the default tune= or arch=, undo the string assigned,
4814 and use the default. */
4815 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4816 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4817 else if (!orig_arch_specified)
4818 opts->x_ix86_arch_string = NULL;
4819
4820 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4821 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4822 else if (orig_tune_defaulted)
4823 opts->x_ix86_tune_string = NULL;
4824
4825 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4826 if (enum_opts_set.x_ix86_fpmath)
4827 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4828 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4829 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4830 {
4831 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4832 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4833 }
4834
4835 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4836 ix86_option_override_internal (false, opts, opts_set);
4837
4838 /* Add any builtin functions with the new isa if any. */
4839 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4840
4841 /* Save the current options unless we are validating options for
4842 #pragma. */
4843 t = build_target_option_node (opts);
4844
4845 opts->x_ix86_arch_string = orig_arch_string;
4846 opts->x_ix86_tune_string = orig_tune_string;
4847 opts_set->x_ix86_fpmath = orig_fpmath_set;
4848
4849 /* Free up memory allocated to hold the strings */
4850 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4851 free (option_strings[i]);
4852 }
4853
4854 return t;
4855 }
4856
4857 /* Hook to validate attribute((target("string"))). */
4858
4859 static bool
4860 ix86_valid_target_attribute_p (tree fndecl,
4861 tree ARG_UNUSED (name),
4862 tree args,
4863 int ARG_UNUSED (flags))
4864 {
4865 struct gcc_options func_options;
4866 tree new_target, new_optimize;
4867 bool ret = true;
4868
4869 /* attribute((target("default"))) does nothing, beyond
4870 affecting multi-versioning. */
4871 if (TREE_VALUE (args)
4872 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4873 && TREE_CHAIN (args) == NULL_TREE
4874 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4875 return true;
4876
4877 tree old_optimize = build_optimization_node (&global_options);
4878
4879 /* Get the optimization options of the current function. */
4880 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4881
4882 if (!func_optimize)
4883 func_optimize = old_optimize;
4884
4885 /* Init func_options. */
4886 memset (&func_options, 0, sizeof (func_options));
4887 init_options_struct (&func_options, NULL);
4888 lang_hooks.init_options_struct (&func_options);
4889
4890 cl_optimization_restore (&func_options,
4891 TREE_OPTIMIZATION (func_optimize));
4892
4893 /* Initialize func_options to the default before its target options can
4894 be set. */
4895 cl_target_option_restore (&func_options,
4896 TREE_TARGET_OPTION (target_option_default_node));
4897
4898 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4899 &global_options_set);
4900
4901 new_optimize = build_optimization_node (&func_options);
4902
4903 if (new_target == error_mark_node)
4904 ret = false;
4905
4906 else if (fndecl && new_target)
4907 {
4908 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4909
4910 if (old_optimize != new_optimize)
4911 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4912 }
4913
4914 return ret;
4915 }
4916
4917 \f
4918 /* Hook to determine if one function can safely inline another. */
4919
4920 static bool
4921 ix86_can_inline_p (tree caller, tree callee)
4922 {
4923 bool ret = false;
4924 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4926
4927 /* If callee has no option attributes, then it is ok to inline. */
4928 if (!callee_tree)
4929 ret = true;
4930
4931 /* If caller has no option attributes, but callee does then it is not ok to
4932 inline. */
4933 else if (!caller_tree)
4934 ret = false;
4935
4936 else
4937 {
4938 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4939 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4940
4941 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4942 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4943 function. */
4944 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4945 != callee_opts->x_ix86_isa_flags)
4946 ret = false;
4947
4948 /* See if we have the same non-isa options. */
4949 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4950 ret = false;
4951
4952 /* See if arch, tune, etc. are the same. */
4953 else if (caller_opts->arch != callee_opts->arch)
4954 ret = false;
4955
4956 else if (caller_opts->tune != callee_opts->tune)
4957 ret = false;
4958
4959 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4960 ret = false;
4961
4962 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4963 ret = false;
4964
4965 else
4966 ret = true;
4967 }
4968
4969 return ret;
4970 }
4971
4972 \f
4973 /* Remember the last target of ix86_set_current_function. */
4974 static GTY(()) tree ix86_previous_fndecl;
4975
4976 /* Invalidate ix86_previous_fndecl cache. */
4977 void
4978 ix86_reset_previous_fndecl (void)
4979 {
4980 ix86_previous_fndecl = NULL_TREE;
4981 }
4982
4983 /* Establish appropriate back-end context for processing the function
4984 FNDECL. The argument might be NULL to indicate processing at top
4985 level, outside of any function scope. */
4986 static void
4987 ix86_set_current_function (tree fndecl)
4988 {
4989 /* Only change the context if the function changes. This hook is called
4990 several times in the course of compiling a function, and we don't want to
4991 slow things down too much or call target_reinit when it isn't safe. */
4992 if (fndecl && fndecl != ix86_previous_fndecl)
4993 {
4994 tree old_tree = (ix86_previous_fndecl
4995 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4996 : NULL_TREE);
4997
4998 tree new_tree = (fndecl
4999 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5000 : NULL_TREE);
5001
5002 ix86_previous_fndecl = fndecl;
5003 if (old_tree == new_tree)
5004 ;
5005
5006 else if (new_tree)
5007 {
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else
5013 TREE_TARGET_GLOBALS (new_tree)
5014 = save_target_globals_default_opts ();
5015 }
5016
5017 else if (old_tree)
5018 {
5019 new_tree = target_option_current_node;
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else if (new_tree == target_option_default_node)
5025 restore_target_globals (&default_target_globals);
5026 else
5027 TREE_TARGET_GLOBALS (new_tree)
5028 = save_target_globals_default_opts ();
5029 }
5030 }
5031 }
5032
5033 \f
5034 /* Return true if this goes in large data/bss. */
5035
5036 static bool
5037 ix86_in_large_data_p (tree exp)
5038 {
5039 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5040 return false;
5041
5042 /* Functions are never large data. */
5043 if (TREE_CODE (exp) == FUNCTION_DECL)
5044 return false;
5045
5046 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5047 {
5048 const char *section = DECL_SECTION_NAME (exp);
5049 if (strcmp (section, ".ldata") == 0
5050 || strcmp (section, ".lbss") == 0)
5051 return true;
5052 return false;
5053 }
5054 else
5055 {
5056 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5057
5058 /* If this is an incomplete type with size 0, then we can't put it
5059 in data because it might be too big when completed. Also,
5060 int_size_in_bytes returns -1 if size can vary or is larger than
5061 an integer in which case also it is safer to assume that it goes in
5062 large data. */
5063 if (size <= 0 || size > ix86_section_threshold)
5064 return true;
5065 }
5066
5067 return false;
5068 }
5069
5070 /* Switch to the appropriate section for output of DECL.
5071 DECL is either a `VAR_DECL' node or a constant of some sort.
5072 RELOC indicates whether forming the initial value of DECL requires
5073 link-time relocations. */
5074
5075 ATTRIBUTE_UNUSED static section *
5076 x86_64_elf_select_section (tree decl, int reloc,
5077 unsigned HOST_WIDE_INT align)
5078 {
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && ix86_in_large_data_p (decl))
5081 {
5082 const char *sname = NULL;
5083 unsigned int flags = SECTION_WRITE;
5084 switch (categorize_decl_for_section (decl, reloc))
5085 {
5086 case SECCAT_DATA:
5087 sname = ".ldata";
5088 break;
5089 case SECCAT_DATA_REL:
5090 sname = ".ldata.rel";
5091 break;
5092 case SECCAT_DATA_REL_LOCAL:
5093 sname = ".ldata.rel.local";
5094 break;
5095 case SECCAT_DATA_REL_RO:
5096 sname = ".ldata.rel.ro";
5097 break;
5098 case SECCAT_DATA_REL_RO_LOCAL:
5099 sname = ".ldata.rel.ro.local";
5100 break;
5101 case SECCAT_BSS:
5102 sname = ".lbss";
5103 flags |= SECTION_BSS;
5104 break;
5105 case SECCAT_RODATA:
5106 case SECCAT_RODATA_MERGE_STR:
5107 case SECCAT_RODATA_MERGE_STR_INIT:
5108 case SECCAT_RODATA_MERGE_CONST:
5109 sname = ".lrodata";
5110 flags = 0;
5111 break;
5112 case SECCAT_SRODATA:
5113 case SECCAT_SDATA:
5114 case SECCAT_SBSS:
5115 gcc_unreachable ();
5116 case SECCAT_TEXT:
5117 case SECCAT_TDATA:
5118 case SECCAT_TBSS:
5119 /* We don't split these for medium model. Place them into
5120 default sections and hope for best. */
5121 break;
5122 }
5123 if (sname)
5124 {
5125 /* We might get called with string constants, but get_named_section
5126 doesn't like them as they are not DECLs. Also, we need to set
5127 flags in that case. */
5128 if (!DECL_P (decl))
5129 return get_section (sname, flags, NULL);
5130 return get_named_section (decl, sname, reloc);
5131 }
5132 }
5133 return default_elf_select_section (decl, reloc, align);
5134 }
5135
5136 /* Select a set of attributes for section NAME based on the properties
5137 of DECL and whether or not RELOC indicates that DECL's initializer
5138 might contain runtime relocations. */
5139
5140 static unsigned int ATTRIBUTE_UNUSED
5141 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5142 {
5143 unsigned int flags = default_section_type_flags (decl, name, reloc);
5144
5145 if (decl == NULL_TREE
5146 && (strcmp (name, ".ldata.rel.ro") == 0
5147 || strcmp (name, ".ldata.rel.ro.local") == 0))
5148 flags |= SECTION_RELRO;
5149
5150 if (strcmp (name, ".lbss") == 0
5151 || strncmp (name, ".lbss.", 5) == 0
5152 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5153 flags |= SECTION_BSS;
5154
5155 return flags;
5156 }
5157
5158 /* Build up a unique section name, expressed as a
5159 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5160 RELOC indicates whether the initial value of EXP requires
5161 link-time relocations. */
5162
5163 static void ATTRIBUTE_UNUSED
5164 x86_64_elf_unique_section (tree decl, int reloc)
5165 {
5166 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5167 && ix86_in_large_data_p (decl))
5168 {
5169 const char *prefix = NULL;
5170 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5171 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5172
5173 switch (categorize_decl_for_section (decl, reloc))
5174 {
5175 case SECCAT_DATA:
5176 case SECCAT_DATA_REL:
5177 case SECCAT_DATA_REL_LOCAL:
5178 case SECCAT_DATA_REL_RO:
5179 case SECCAT_DATA_REL_RO_LOCAL:
5180 prefix = one_only ? ".ld" : ".ldata";
5181 break;
5182 case SECCAT_BSS:
5183 prefix = one_only ? ".lb" : ".lbss";
5184 break;
5185 case SECCAT_RODATA:
5186 case SECCAT_RODATA_MERGE_STR:
5187 case SECCAT_RODATA_MERGE_STR_INIT:
5188 case SECCAT_RODATA_MERGE_CONST:
5189 prefix = one_only ? ".lr" : ".lrodata";
5190 break;
5191 case SECCAT_SRODATA:
5192 case SECCAT_SDATA:
5193 case SECCAT_SBSS:
5194 gcc_unreachable ();
5195 case SECCAT_TEXT:
5196 case SECCAT_TDATA:
5197 case SECCAT_TBSS:
5198 /* We don't split these for medium model. Place them into
5199 default sections and hope for best. */
5200 break;
5201 }
5202 if (prefix)
5203 {
5204 const char *name, *linkonce;
5205 char *string;
5206
5207 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5208 name = targetm.strip_name_encoding (name);
5209
5210 /* If we're using one_only, then there needs to be a .gnu.linkonce
5211 prefix to the section name. */
5212 linkonce = one_only ? ".gnu.linkonce" : "";
5213
5214 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5215
5216 set_decl_section_name (decl, string);
5217 return;
5218 }
5219 }
5220 default_unique_section (decl, reloc);
5221 }
5222
5223 #ifdef COMMON_ASM_OP
5224 /* This says how to output assembler code to declare an
5225 uninitialized external linkage data object.
5226
5227 For medium model x86-64 we need to use .largecomm opcode for
5228 large objects. */
5229 void
5230 x86_elf_aligned_common (FILE *file,
5231 const char *name, unsigned HOST_WIDE_INT size,
5232 int align)
5233 {
5234 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5235 && size > (unsigned int)ix86_section_threshold)
5236 fputs (".largecomm\t", file);
5237 else
5238 fputs (COMMON_ASM_OP, file);
5239 assemble_name (file, name);
5240 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5241 size, align / BITS_PER_UNIT);
5242 }
5243 #endif
5244
5245 /* Utility function for targets to use in implementing
5246 ASM_OUTPUT_ALIGNED_BSS. */
5247
5248 void
5249 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5250 unsigned HOST_WIDE_INT size, int align)
5251 {
5252 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5253 && size > (unsigned int)ix86_section_threshold)
5254 switch_to_section (get_named_section (decl, ".lbss", 0));
5255 else
5256 switch_to_section (bss_section);
5257 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5258 #ifdef ASM_DECLARE_OBJECT_NAME
5259 last_assemble_variable_decl = decl;
5260 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5261 #else
5262 /* Standard thing is just output label for the object. */
5263 ASM_OUTPUT_LABEL (file, name);
5264 #endif /* ASM_DECLARE_OBJECT_NAME */
5265 ASM_OUTPUT_SKIP (file, size ? size : 1);
5266 }
5267 \f
5268 /* Decide whether we must probe the stack before any space allocation
5269 on this target. It's essentially TARGET_STACK_PROBE except when
5270 -fstack-check causes the stack to be already probed differently. */
5271
5272 bool
5273 ix86_target_stack_probe (void)
5274 {
5275 /* Do not probe the stack twice if static stack checking is enabled. */
5276 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5277 return false;
5278
5279 return TARGET_STACK_PROBE;
5280 }
5281 \f
5282 /* Decide whether we can make a sibling call to a function. DECL is the
5283 declaration of the function being targeted by the call and EXP is the
5284 CALL_EXPR representing the call. */
5285
5286 static bool
5287 ix86_function_ok_for_sibcall (tree decl, tree exp)
5288 {
5289 tree type, decl_or_type;
5290 rtx a, b;
5291
5292 /* If we are generating position-independent code, we cannot sibcall
5293 optimize any indirect call, or a direct call to a global function,
5294 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5295 if (!TARGET_MACHO
5296 && !TARGET_64BIT
5297 && flag_pic
5298 && (!decl || !targetm.binds_local_p (decl)))
5299 return false;
5300
5301 /* If we need to align the outgoing stack, then sibcalling would
5302 unalign the stack, which may break the called function. */
5303 if (ix86_minimum_incoming_stack_boundary (true)
5304 < PREFERRED_STACK_BOUNDARY)
5305 return false;
5306
5307 if (decl)
5308 {
5309 decl_or_type = decl;
5310 type = TREE_TYPE (decl);
5311 }
5312 else
5313 {
5314 /* We're looking at the CALL_EXPR, we need the type of the function. */
5315 type = CALL_EXPR_FN (exp); /* pointer expression */
5316 type = TREE_TYPE (type); /* pointer type */
5317 type = TREE_TYPE (type); /* function type */
5318 decl_or_type = type;
5319 }
5320
5321 /* Check that the return value locations are the same. Like
5322 if we are returning floats on the 80387 register stack, we cannot
5323 make a sibcall from a function that doesn't return a float to a
5324 function that does or, conversely, from a function that does return
5325 a float to a function that doesn't; the necessary stack adjustment
5326 would not be executed. This is also the place we notice
5327 differences in the return value ABI. Note that it is ok for one
5328 of the functions to have void return type as long as the return
5329 value of the other is passed in a register. */
5330 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5331 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5332 cfun->decl, false);
5333 if (STACK_REG_P (a) || STACK_REG_P (b))
5334 {
5335 if (!rtx_equal_p (a, b))
5336 return false;
5337 }
5338 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5339 ;
5340 else if (!rtx_equal_p (a, b))
5341 return false;
5342
5343 if (TARGET_64BIT)
5344 {
5345 /* The SYSV ABI has more call-clobbered registers;
5346 disallow sibcalls from MS to SYSV. */
5347 if (cfun->machine->call_abi == MS_ABI
5348 && ix86_function_type_abi (type) == SYSV_ABI)
5349 return false;
5350 }
5351 else
5352 {
5353 /* If this call is indirect, we'll need to be able to use a
5354 call-clobbered register for the address of the target function.
5355 Make sure that all such registers are not used for passing
5356 parameters. Note that DLLIMPORT functions are indirect. */
5357 if (!decl
5358 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5359 {
5360 if (ix86_function_regparm (type, NULL) >= 3)
5361 {
5362 /* ??? Need to count the actual number of registers to be used,
5363 not the possible number of registers. Fix later. */
5364 return false;
5365 }
5366 }
5367 }
5368
5369 /* Otherwise okay. That also includes certain types of indirect calls. */
5370 return true;
5371 }
5372
5373 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5374 and "sseregparm" calling convention attributes;
5375 arguments as in struct attribute_spec.handler. */
5376
5377 static tree
5378 ix86_handle_cconv_attribute (tree *node, tree name,
5379 tree args,
5380 int,
5381 bool *no_add_attrs)
5382 {
5383 if (TREE_CODE (*node) != FUNCTION_TYPE
5384 && TREE_CODE (*node) != METHOD_TYPE
5385 && TREE_CODE (*node) != FIELD_DECL
5386 && TREE_CODE (*node) != TYPE_DECL)
5387 {
5388 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5389 name);
5390 *no_add_attrs = true;
5391 return NULL_TREE;
5392 }
5393
5394 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5395 if (is_attribute_p ("regparm", name))
5396 {
5397 tree cst;
5398
5399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5400 {
5401 error ("fastcall and regparm attributes are not compatible");
5402 }
5403
5404 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5405 {
5406 error ("regparam and thiscall attributes are not compatible");
5407 }
5408
5409 cst = TREE_VALUE (args);
5410 if (TREE_CODE (cst) != INTEGER_CST)
5411 {
5412 warning (OPT_Wattributes,
5413 "%qE attribute requires an integer constant argument",
5414 name);
5415 *no_add_attrs = true;
5416 }
5417 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5418 {
5419 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5420 name, REGPARM_MAX);
5421 *no_add_attrs = true;
5422 }
5423
5424 return NULL_TREE;
5425 }
5426
5427 if (TARGET_64BIT)
5428 {
5429 /* Do not warn when emulating the MS ABI. */
5430 if ((TREE_CODE (*node) != FUNCTION_TYPE
5431 && TREE_CODE (*node) != METHOD_TYPE)
5432 || ix86_function_type_abi (*node) != MS_ABI)
5433 warning (OPT_Wattributes, "%qE attribute ignored",
5434 name);
5435 *no_add_attrs = true;
5436 return NULL_TREE;
5437 }
5438
5439 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5440 if (is_attribute_p ("fastcall", name))
5441 {
5442 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5443 {
5444 error ("fastcall and cdecl attributes are not compatible");
5445 }
5446 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5447 {
5448 error ("fastcall and stdcall attributes are not compatible");
5449 }
5450 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5451 {
5452 error ("fastcall and regparm attributes are not compatible");
5453 }
5454 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and thiscall attributes are not compatible");
5457 }
5458 }
5459
5460 /* Can combine stdcall with fastcall (redundant), regparm and
5461 sseregparm. */
5462 else if (is_attribute_p ("stdcall", name))
5463 {
5464 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("stdcall and cdecl attributes are not compatible");
5467 }
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5469 {
5470 error ("stdcall and fastcall attributes are not compatible");
5471 }
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("stdcall and thiscall attributes are not compatible");
5475 }
5476 }
5477
5478 /* Can combine cdecl with regparm and sseregparm. */
5479 else if (is_attribute_p ("cdecl", name))
5480 {
5481 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5482 {
5483 error ("stdcall and cdecl attributes are not compatible");
5484 }
5485 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5486 {
5487 error ("fastcall and cdecl attributes are not compatible");
5488 }
5489 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5490 {
5491 error ("cdecl and thiscall attributes are not compatible");
5492 }
5493 }
5494 else if (is_attribute_p ("thiscall", name))
5495 {
5496 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5497 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5498 name);
5499 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5500 {
5501 error ("stdcall and thiscall attributes are not compatible");
5502 }
5503 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5504 {
5505 error ("fastcall and thiscall attributes are not compatible");
5506 }
5507 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("cdecl and thiscall attributes are not compatible");
5510 }
5511 }
5512
5513 /* Can combine sseregparm with all attributes. */
5514
5515 return NULL_TREE;
5516 }
5517
5518 /* The transactional memory builtins are implicitly regparm or fastcall
5519 depending on the ABI. Override the generic do-nothing attribute that
5520 these builtins were declared with, and replace it with one of the two
5521 attributes that we expect elsewhere. */
5522
5523 static tree
5524 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5525 int flags, bool *no_add_attrs)
5526 {
5527 tree alt;
5528
5529 /* In no case do we want to add the placeholder attribute. */
5530 *no_add_attrs = true;
5531
5532 /* The 64-bit ABI is unchanged for transactional memory. */
5533 if (TARGET_64BIT)
5534 return NULL_TREE;
5535
5536 /* ??? Is there a better way to validate 32-bit windows? We have
5537 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5538 if (CHECK_STACK_LIMIT > 0)
5539 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5540 else
5541 {
5542 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5543 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5544 }
5545 decl_attributes (node, alt, flags);
5546
5547 return NULL_TREE;
5548 }
5549
5550 /* This function determines from TYPE the calling-convention. */
5551
5552 unsigned int
5553 ix86_get_callcvt (const_tree type)
5554 {
5555 unsigned int ret = 0;
5556 bool is_stdarg;
5557 tree attrs;
5558
5559 if (TARGET_64BIT)
5560 return IX86_CALLCVT_CDECL;
5561
5562 attrs = TYPE_ATTRIBUTES (type);
5563 if (attrs != NULL_TREE)
5564 {
5565 if (lookup_attribute ("cdecl", attrs))
5566 ret |= IX86_CALLCVT_CDECL;
5567 else if (lookup_attribute ("stdcall", attrs))
5568 ret |= IX86_CALLCVT_STDCALL;
5569 else if (lookup_attribute ("fastcall", attrs))
5570 ret |= IX86_CALLCVT_FASTCALL;
5571 else if (lookup_attribute ("thiscall", attrs))
5572 ret |= IX86_CALLCVT_THISCALL;
5573
5574 /* Regparam isn't allowed for thiscall and fastcall. */
5575 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5576 {
5577 if (lookup_attribute ("regparm", attrs))
5578 ret |= IX86_CALLCVT_REGPARM;
5579 if (lookup_attribute ("sseregparm", attrs))
5580 ret |= IX86_CALLCVT_SSEREGPARM;
5581 }
5582
5583 if (IX86_BASE_CALLCVT(ret) != 0)
5584 return ret;
5585 }
5586
5587 is_stdarg = stdarg_p (type);
5588 if (TARGET_RTD && !is_stdarg)
5589 return IX86_CALLCVT_STDCALL | ret;
5590
5591 if (ret != 0
5592 || is_stdarg
5593 || TREE_CODE (type) != METHOD_TYPE
5594 || ix86_function_type_abi (type) != MS_ABI)
5595 return IX86_CALLCVT_CDECL | ret;
5596
5597 return IX86_CALLCVT_THISCALL;
5598 }
5599
5600 /* Return 0 if the attributes for two types are incompatible, 1 if they
5601 are compatible, and 2 if they are nearly compatible (which causes a
5602 warning to be generated). */
5603
5604 static int
5605 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5606 {
5607 unsigned int ccvt1, ccvt2;
5608
5609 if (TREE_CODE (type1) != FUNCTION_TYPE
5610 && TREE_CODE (type1) != METHOD_TYPE)
5611 return 1;
5612
5613 ccvt1 = ix86_get_callcvt (type1);
5614 ccvt2 = ix86_get_callcvt (type2);
5615 if (ccvt1 != ccvt2)
5616 return 0;
5617 if (ix86_function_regparm (type1, NULL)
5618 != ix86_function_regparm (type2, NULL))
5619 return 0;
5620
5621 return 1;
5622 }
5623 \f
5624 /* Return the regparm value for a function with the indicated TYPE and DECL.
5625 DECL may be NULL when calling function indirectly
5626 or considering a libcall. */
5627
5628 static int
5629 ix86_function_regparm (const_tree type, const_tree decl)
5630 {
5631 tree attr;
5632 int regparm;
5633 unsigned int ccvt;
5634
5635 if (TARGET_64BIT)
5636 return (ix86_function_type_abi (type) == SYSV_ABI
5637 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5638 ccvt = ix86_get_callcvt (type);
5639 regparm = ix86_regparm;
5640
5641 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5642 {
5643 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5644 if (attr)
5645 {
5646 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5647 return regparm;
5648 }
5649 }
5650 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5651 return 2;
5652 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5653 return 1;
5654
5655 /* Use register calling convention for local functions when possible. */
5656 if (decl
5657 && TREE_CODE (decl) == FUNCTION_DECL
5658 /* Caller and callee must agree on the calling convention, so
5659 checking here just optimize means that with
5660 __attribute__((optimize (...))) caller could use regparm convention
5661 and callee not, or vice versa. Instead look at whether the callee
5662 is optimized or not. */
5663 && opt_for_fn (decl, optimize)
5664 && !(profile_flag && !flag_fentry))
5665 {
5666 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5667 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5668 if (i && i->local && i->can_change_signature)
5669 {
5670 int local_regparm, globals = 0, regno;
5671
5672 /* Make sure no regparm register is taken by a
5673 fixed register variable. */
5674 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5675 if (fixed_regs[local_regparm])
5676 break;
5677
5678 /* We don't want to use regparm(3) for nested functions as
5679 these use a static chain pointer in the third argument. */
5680 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5681 local_regparm = 2;
5682
5683 /* In 32-bit mode save a register for the split stack. */
5684 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5685 local_regparm = 2;
5686
5687 /* Each fixed register usage increases register pressure,
5688 so less registers should be used for argument passing.
5689 This functionality can be overriden by an explicit
5690 regparm value. */
5691 for (regno = AX_REG; regno <= DI_REG; regno++)
5692 if (fixed_regs[regno])
5693 globals++;
5694
5695 local_regparm
5696 = globals < local_regparm ? local_regparm - globals : 0;
5697
5698 if (local_regparm > regparm)
5699 regparm = local_regparm;
5700 }
5701 }
5702
5703 return regparm;
5704 }
5705
5706 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5707 DFmode (2) arguments in SSE registers for a function with the
5708 indicated TYPE and DECL. DECL may be NULL when calling function
5709 indirectly or considering a libcall. Otherwise return 0. */
5710
5711 static int
5712 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5713 {
5714 gcc_assert (!TARGET_64BIT);
5715
5716 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5717 by the sseregparm attribute. */
5718 if (TARGET_SSEREGPARM
5719 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5720 {
5721 if (!TARGET_SSE)
5722 {
5723 if (warn)
5724 {
5725 if (decl)
5726 error ("calling %qD with attribute sseregparm without "
5727 "SSE/SSE2 enabled", decl);
5728 else
5729 error ("calling %qT with attribute sseregparm without "
5730 "SSE/SSE2 enabled", type);
5731 }
5732 return 0;
5733 }
5734
5735 return 2;
5736 }
5737
5738 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5739 (and DFmode for SSE2) arguments in SSE registers. */
5740 if (decl && TARGET_SSE_MATH && optimize
5741 && !(profile_flag && !flag_fentry))
5742 {
5743 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5744 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5745 if (i && i->local && i->can_change_signature)
5746 return TARGET_SSE2 ? 2 : 1;
5747 }
5748
5749 return 0;
5750 }
5751
5752 /* Return true if EAX is live at the start of the function. Used by
5753 ix86_expand_prologue to determine if we need special help before
5754 calling allocate_stack_worker. */
5755
5756 static bool
5757 ix86_eax_live_at_start_p (void)
5758 {
5759 /* Cheat. Don't bother working forward from ix86_function_regparm
5760 to the function type to whether an actual argument is located in
5761 eax. Instead just look at cfg info, which is still close enough
5762 to correct at this point. This gives false positives for broken
5763 functions that might use uninitialized data that happens to be
5764 allocated in eax, but who cares? */
5765 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5766 }
5767
5768 static bool
5769 ix86_keep_aggregate_return_pointer (tree fntype)
5770 {
5771 tree attr;
5772
5773 if (!TARGET_64BIT)
5774 {
5775 attr = lookup_attribute ("callee_pop_aggregate_return",
5776 TYPE_ATTRIBUTES (fntype));
5777 if (attr)
5778 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5779
5780 /* For 32-bit MS-ABI the default is to keep aggregate
5781 return pointer. */
5782 if (ix86_function_type_abi (fntype) == MS_ABI)
5783 return true;
5784 }
5785 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5786 }
5787
5788 /* Value is the number of bytes of arguments automatically
5789 popped when returning from a subroutine call.
5790 FUNDECL is the declaration node of the function (as a tree),
5791 FUNTYPE is the data type of the function (as a tree),
5792 or for a library call it is an identifier node for the subroutine name.
5793 SIZE is the number of bytes of arguments passed on the stack.
5794
5795 On the 80386, the RTD insn may be used to pop them if the number
5796 of args is fixed, but if the number is variable then the caller
5797 must pop them all. RTD can't be used for library calls now
5798 because the library is compiled with the Unix compiler.
5799 Use of RTD is a selectable option, since it is incompatible with
5800 standard Unix calling sequences. If the option is not selected,
5801 the caller must always pop the args.
5802
5803 The attribute stdcall is equivalent to RTD on a per module basis. */
5804
5805 static int
5806 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5807 {
5808 unsigned int ccvt;
5809
5810 /* None of the 64-bit ABIs pop arguments. */
5811 if (TARGET_64BIT)
5812 return 0;
5813
5814 ccvt = ix86_get_callcvt (funtype);
5815
5816 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5817 | IX86_CALLCVT_THISCALL)) != 0
5818 && ! stdarg_p (funtype))
5819 return size;
5820
5821 /* Lose any fake structure return argument if it is passed on the stack. */
5822 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5823 && !ix86_keep_aggregate_return_pointer (funtype))
5824 {
5825 int nregs = ix86_function_regparm (funtype, fundecl);
5826 if (nregs == 0)
5827 return GET_MODE_SIZE (Pmode);
5828 }
5829
5830 return 0;
5831 }
5832
5833 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5834
5835 static bool
5836 ix86_legitimate_combined_insn (rtx_insn *insn)
5837 {
5838 /* Check operand constraints in case hard registers were propagated
5839 into insn pattern. This check prevents combine pass from
5840 generating insn patterns with invalid hard register operands.
5841 These invalid insns can eventually confuse reload to error out
5842 with a spill failure. See also PRs 46829 and 46843. */
5843 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5844 {
5845 int i;
5846
5847 extract_insn (insn);
5848 preprocess_constraints (insn);
5849
5850 int n_operands = recog_data.n_operands;
5851 int n_alternatives = recog_data.n_alternatives;
5852 for (i = 0; i < n_operands; i++)
5853 {
5854 rtx op = recog_data.operand[i];
5855 enum machine_mode mode = GET_MODE (op);
5856 const operand_alternative *op_alt;
5857 int offset = 0;
5858 bool win;
5859 int j;
5860
5861 /* For pre-AVX disallow unaligned loads/stores where the
5862 instructions don't support it. */
5863 if (!TARGET_AVX
5864 && VECTOR_MODE_P (GET_MODE (op))
5865 && misaligned_operand (op, GET_MODE (op)))
5866 {
5867 int min_align = get_attr_ssememalign (insn);
5868 if (min_align == 0)
5869 return false;
5870 }
5871
5872 /* A unary operator may be accepted by the predicate, but it
5873 is irrelevant for matching constraints. */
5874 if (UNARY_P (op))
5875 op = XEXP (op, 0);
5876
5877 if (GET_CODE (op) == SUBREG)
5878 {
5879 if (REG_P (SUBREG_REG (op))
5880 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5881 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5882 GET_MODE (SUBREG_REG (op)),
5883 SUBREG_BYTE (op),
5884 GET_MODE (op));
5885 op = SUBREG_REG (op);
5886 }
5887
5888 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5889 continue;
5890
5891 op_alt = recog_op_alt;
5892
5893 /* Operand has no constraints, anything is OK. */
5894 win = !n_alternatives;
5895
5896 alternative_mask enabled = recog_data.enabled_alternatives;
5897 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5898 {
5899 if (!TEST_BIT (enabled, j))
5900 continue;
5901 if (op_alt[i].anything_ok
5902 || (op_alt[i].matches != -1
5903 && operands_match_p
5904 (recog_data.operand[i],
5905 recog_data.operand[op_alt[i].matches]))
5906 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5907 {
5908 win = true;
5909 break;
5910 }
5911 }
5912
5913 if (!win)
5914 return false;
5915 }
5916 }
5917
5918 return true;
5919 }
5920 \f
5921 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5922
5923 static unsigned HOST_WIDE_INT
5924 ix86_asan_shadow_offset (void)
5925 {
5926 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5927 : HOST_WIDE_INT_C (0x7fff8000))
5928 : (HOST_WIDE_INT_1 << 29);
5929 }
5930 \f
5931 /* Argument support functions. */
5932
5933 /* Return true when register may be used to pass function parameters. */
5934 bool
5935 ix86_function_arg_regno_p (int regno)
5936 {
5937 int i;
5938 const int *parm_regs;
5939
5940 if (!TARGET_64BIT)
5941 {
5942 if (TARGET_MACHO)
5943 return (regno < REGPARM_MAX
5944 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5945 else
5946 return (regno < REGPARM_MAX
5947 || (TARGET_MMX && MMX_REGNO_P (regno)
5948 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5949 || (TARGET_SSE && SSE_REGNO_P (regno)
5950 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5951 }
5952
5953 if (TARGET_SSE && SSE_REGNO_P (regno)
5954 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5955 return true;
5956
5957 /* TODO: The function should depend on current function ABI but
5958 builtins.c would need updating then. Therefore we use the
5959 default ABI. */
5960
5961 /* RAX is used as hidden argument to va_arg functions. */
5962 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5963 return true;
5964
5965 if (ix86_abi == MS_ABI)
5966 parm_regs = x86_64_ms_abi_int_parameter_registers;
5967 else
5968 parm_regs = x86_64_int_parameter_registers;
5969 for (i = 0; i < (ix86_abi == MS_ABI
5970 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5971 if (regno == parm_regs[i])
5972 return true;
5973 return false;
5974 }
5975
5976 /* Return if we do not know how to pass TYPE solely in registers. */
5977
5978 static bool
5979 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5980 {
5981 if (must_pass_in_stack_var_size_or_pad (mode, type))
5982 return true;
5983
5984 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5985 The layout_type routine is crafty and tries to trick us into passing
5986 currently unsupported vector types on the stack by using TImode. */
5987 return (!TARGET_64BIT && mode == TImode
5988 && type && TREE_CODE (type) != VECTOR_TYPE);
5989 }
5990
5991 /* It returns the size, in bytes, of the area reserved for arguments passed
5992 in registers for the function represented by fndecl dependent to the used
5993 abi format. */
5994 int
5995 ix86_reg_parm_stack_space (const_tree fndecl)
5996 {
5997 enum calling_abi call_abi = SYSV_ABI;
5998 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5999 call_abi = ix86_function_abi (fndecl);
6000 else
6001 call_abi = ix86_function_type_abi (fndecl);
6002 if (TARGET_64BIT && call_abi == MS_ABI)
6003 return 32;
6004 return 0;
6005 }
6006
6007 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6008 call abi used. */
6009 enum calling_abi
6010 ix86_function_type_abi (const_tree fntype)
6011 {
6012 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6013 {
6014 enum calling_abi abi = ix86_abi;
6015 if (abi == SYSV_ABI)
6016 {
6017 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6018 abi = MS_ABI;
6019 }
6020 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6021 abi = SYSV_ABI;
6022 return abi;
6023 }
6024 return ix86_abi;
6025 }
6026
6027 /* We add this as a workaround in order to use libc_has_function
6028 hook in i386.md. */
6029 bool
6030 ix86_libc_has_function (enum function_class fn_class)
6031 {
6032 return targetm.libc_has_function (fn_class);
6033 }
6034
6035 static bool
6036 ix86_function_ms_hook_prologue (const_tree fn)
6037 {
6038 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6039 {
6040 if (decl_function_context (fn) != NULL_TREE)
6041 error_at (DECL_SOURCE_LOCATION (fn),
6042 "ms_hook_prologue is not compatible with nested function");
6043 else
6044 return true;
6045 }
6046 return false;
6047 }
6048
6049 static enum calling_abi
6050 ix86_function_abi (const_tree fndecl)
6051 {
6052 if (! fndecl)
6053 return ix86_abi;
6054 return ix86_function_type_abi (TREE_TYPE (fndecl));
6055 }
6056
6057 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6058 call abi used. */
6059 enum calling_abi
6060 ix86_cfun_abi (void)
6061 {
6062 if (! cfun)
6063 return ix86_abi;
6064 return cfun->machine->call_abi;
6065 }
6066
6067 /* Write the extra assembler code needed to declare a function properly. */
6068
6069 void
6070 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6071 tree decl)
6072 {
6073 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6074
6075 if (is_ms_hook)
6076 {
6077 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6078 unsigned int filler_cc = 0xcccccccc;
6079
6080 for (i = 0; i < filler_count; i += 4)
6081 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6082 }
6083
6084 #ifdef SUBTARGET_ASM_UNWIND_INIT
6085 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6086 #endif
6087
6088 ASM_OUTPUT_LABEL (asm_out_file, fname);
6089
6090 /* Output magic byte marker, if hot-patch attribute is set. */
6091 if (is_ms_hook)
6092 {
6093 if (TARGET_64BIT)
6094 {
6095 /* leaq [%rsp + 0], %rsp */
6096 asm_fprintf (asm_out_file, ASM_BYTE
6097 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6098 }
6099 else
6100 {
6101 /* movl.s %edi, %edi
6102 push %ebp
6103 movl.s %esp, %ebp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6106 }
6107 }
6108 }
6109
6110 /* regclass.c */
6111 extern void init_regs (void);
6112
6113 /* Implementation of call abi switching target hook. Specific to FNDECL
6114 the specific call register sets are set. See also
6115 ix86_conditional_register_usage for more details. */
6116 void
6117 ix86_call_abi_override (const_tree fndecl)
6118 {
6119 if (fndecl == NULL_TREE)
6120 cfun->machine->call_abi = ix86_abi;
6121 else
6122 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6123 }
6124
6125 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6126 expensive re-initialization of init_regs each time we switch function context
6127 since this is needed only during RTL expansion. */
6128 static void
6129 ix86_maybe_switch_abi (void)
6130 {
6131 if (TARGET_64BIT &&
6132 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6133 reinit_regs ();
6134 }
6135
6136 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6137 for a call to a function whose data type is FNTYPE.
6138 For a library call, FNTYPE is 0. */
6139
6140 void
6141 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6142 tree fntype, /* tree ptr for function decl */
6143 rtx libname, /* SYMBOL_REF of library name or 0 */
6144 tree fndecl,
6145 int caller)
6146 {
6147 struct cgraph_local_info *i;
6148
6149 memset (cum, 0, sizeof (*cum));
6150
6151 if (fndecl)
6152 {
6153 i = cgraph_node::local_info (fndecl);
6154 cum->call_abi = ix86_function_abi (fndecl);
6155 }
6156 else
6157 {
6158 i = NULL;
6159 cum->call_abi = ix86_function_type_abi (fntype);
6160 }
6161
6162 cum->caller = caller;
6163
6164 /* Set up the number of registers to use for passing arguments. */
6165 cum->nregs = ix86_regparm;
6166 if (TARGET_64BIT)
6167 {
6168 cum->nregs = (cum->call_abi == SYSV_ABI
6169 ? X86_64_REGPARM_MAX
6170 : X86_64_MS_REGPARM_MAX);
6171 }
6172 if (TARGET_SSE)
6173 {
6174 cum->sse_nregs = SSE_REGPARM_MAX;
6175 if (TARGET_64BIT)
6176 {
6177 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6178 ? X86_64_SSE_REGPARM_MAX
6179 : X86_64_MS_SSE_REGPARM_MAX);
6180 }
6181 }
6182 if (TARGET_MMX)
6183 cum->mmx_nregs = MMX_REGPARM_MAX;
6184 cum->warn_avx512f = true;
6185 cum->warn_avx = true;
6186 cum->warn_sse = true;
6187 cum->warn_mmx = true;
6188
6189 /* Because type might mismatch in between caller and callee, we need to
6190 use actual type of function for local calls.
6191 FIXME: cgraph_analyze can be told to actually record if function uses
6192 va_start so for local functions maybe_vaarg can be made aggressive
6193 helping K&R code.
6194 FIXME: once typesytem is fixed, we won't need this code anymore. */
6195 if (i && i->local && i->can_change_signature)
6196 fntype = TREE_TYPE (fndecl);
6197 cum->maybe_vaarg = (fntype
6198 ? (!prototype_p (fntype) || stdarg_p (fntype))
6199 : !libname);
6200
6201 if (!TARGET_64BIT)
6202 {
6203 /* If there are variable arguments, then we won't pass anything
6204 in registers in 32-bit mode. */
6205 if (stdarg_p (fntype))
6206 {
6207 cum->nregs = 0;
6208 cum->sse_nregs = 0;
6209 cum->mmx_nregs = 0;
6210 cum->warn_avx512f = false;
6211 cum->warn_avx = false;
6212 cum->warn_sse = false;
6213 cum->warn_mmx = false;
6214 return;
6215 }
6216
6217 /* Use ecx and edx registers if function has fastcall attribute,
6218 else look for regparm information. */
6219 if (fntype)
6220 {
6221 unsigned int ccvt = ix86_get_callcvt (fntype);
6222 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6223 {
6224 cum->nregs = 1;
6225 cum->fastcall = 1; /* Same first register as in fastcall. */
6226 }
6227 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6228 {
6229 cum->nregs = 2;
6230 cum->fastcall = 1;
6231 }
6232 else
6233 cum->nregs = ix86_function_regparm (fntype, fndecl);
6234 }
6235
6236 /* Set up the number of SSE registers used for passing SFmode
6237 and DFmode arguments. Warn for mismatching ABI. */
6238 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6239 }
6240 }
6241
6242 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6243 But in the case of vector types, it is some vector mode.
6244
6245 When we have only some of our vector isa extensions enabled, then there
6246 are some modes for which vector_mode_supported_p is false. For these
6247 modes, the generic vector support in gcc will choose some non-vector mode
6248 in order to implement the type. By computing the natural mode, we'll
6249 select the proper ABI location for the operand and not depend on whatever
6250 the middle-end decides to do with these vector types.
6251
6252 The midde-end can't deal with the vector types > 16 bytes. In this
6253 case, we return the original mode and warn ABI change if CUM isn't
6254 NULL.
6255
6256 If INT_RETURN is true, warn ABI change if the vector mode isn't
6257 available for function return value. */
6258
6259 static enum machine_mode
6260 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6261 bool in_return)
6262 {
6263 enum machine_mode mode = TYPE_MODE (type);
6264
6265 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6266 {
6267 HOST_WIDE_INT size = int_size_in_bytes (type);
6268 if ((size == 8 || size == 16 || size == 32 || size == 64)
6269 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6270 && TYPE_VECTOR_SUBPARTS (type) > 1)
6271 {
6272 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6273
6274 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6275 mode = MIN_MODE_VECTOR_FLOAT;
6276 else
6277 mode = MIN_MODE_VECTOR_INT;
6278
6279 /* Get the mode which has this inner mode and number of units. */
6280 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6281 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6282 && GET_MODE_INNER (mode) == innermode)
6283 {
6284 if (size == 64 && !TARGET_AVX512F)
6285 {
6286 static bool warnedavx512f;
6287 static bool warnedavx512f_ret;
6288
6289 if (cum && cum->warn_avx512f && !warnedavx512f)
6290 {
6291 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6292 "without AVX512F enabled changes the ABI"))
6293 warnedavx512f = true;
6294 }
6295 else if (in_return && !warnedavx512f_ret)
6296 {
6297 if (warning (OPT_Wpsabi, "AVX512F vector return "
6298 "without AVX512F enabled changes the ABI"))
6299 warnedavx512f_ret = true;
6300 }
6301
6302 return TYPE_MODE (type);
6303 }
6304 else if (size == 32 && !TARGET_AVX)
6305 {
6306 static bool warnedavx;
6307 static bool warnedavx_ret;
6308
6309 if (cum && cum->warn_avx && !warnedavx)
6310 {
6311 if (warning (OPT_Wpsabi, "AVX vector argument "
6312 "without AVX enabled changes the ABI"))
6313 warnedavx = true;
6314 }
6315 else if (in_return && !warnedavx_ret)
6316 {
6317 if (warning (OPT_Wpsabi, "AVX vector return "
6318 "without AVX enabled changes the ABI"))
6319 warnedavx_ret = true;
6320 }
6321
6322 return TYPE_MODE (type);
6323 }
6324 else if (((size == 8 && TARGET_64BIT) || size == 16)
6325 && !TARGET_SSE)
6326 {
6327 static bool warnedsse;
6328 static bool warnedsse_ret;
6329
6330 if (cum && cum->warn_sse && !warnedsse)
6331 {
6332 if (warning (OPT_Wpsabi, "SSE vector argument "
6333 "without SSE enabled changes the ABI"))
6334 warnedsse = true;
6335 }
6336 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6337 {
6338 if (warning (OPT_Wpsabi, "SSE vector return "
6339 "without SSE enabled changes the ABI"))
6340 warnedsse_ret = true;
6341 }
6342 }
6343 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6344 {
6345 static bool warnedmmx;
6346 static bool warnedmmx_ret;
6347
6348 if (cum && cum->warn_mmx && !warnedmmx)
6349 {
6350 if (warning (OPT_Wpsabi, "MMX vector argument "
6351 "without MMX enabled changes the ABI"))
6352 warnedmmx = true;
6353 }
6354 else if (in_return && !warnedmmx_ret)
6355 {
6356 if (warning (OPT_Wpsabi, "MMX vector return "
6357 "without MMX enabled changes the ABI"))
6358 warnedmmx_ret = true;
6359 }
6360 }
6361 return mode;
6362 }
6363
6364 gcc_unreachable ();
6365 }
6366 }
6367
6368 return mode;
6369 }
6370
6371 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6372 this may not agree with the mode that the type system has chosen for the
6373 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6374 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6375
6376 static rtx
6377 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6378 unsigned int regno)
6379 {
6380 rtx tmp;
6381
6382 if (orig_mode != BLKmode)
6383 tmp = gen_rtx_REG (orig_mode, regno);
6384 else
6385 {
6386 tmp = gen_rtx_REG (mode, regno);
6387 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6388 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6389 }
6390
6391 return tmp;
6392 }
6393
6394 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6395 of this code is to classify each 8bytes of incoming argument by the register
6396 class and assign registers accordingly. */
6397
6398 /* Return the union class of CLASS1 and CLASS2.
6399 See the x86-64 PS ABI for details. */
6400
6401 static enum x86_64_reg_class
6402 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6403 {
6404 /* Rule #1: If both classes are equal, this is the resulting class. */
6405 if (class1 == class2)
6406 return class1;
6407
6408 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6409 the other class. */
6410 if (class1 == X86_64_NO_CLASS)
6411 return class2;
6412 if (class2 == X86_64_NO_CLASS)
6413 return class1;
6414
6415 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6416 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6417 return X86_64_MEMORY_CLASS;
6418
6419 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6420 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6421 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6422 return X86_64_INTEGERSI_CLASS;
6423 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6424 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6425 return X86_64_INTEGER_CLASS;
6426
6427 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6428 MEMORY is used. */
6429 if (class1 == X86_64_X87_CLASS
6430 || class1 == X86_64_X87UP_CLASS
6431 || class1 == X86_64_COMPLEX_X87_CLASS
6432 || class2 == X86_64_X87_CLASS
6433 || class2 == X86_64_X87UP_CLASS
6434 || class2 == X86_64_COMPLEX_X87_CLASS)
6435 return X86_64_MEMORY_CLASS;
6436
6437 /* Rule #6: Otherwise class SSE is used. */
6438 return X86_64_SSE_CLASS;
6439 }
6440
6441 /* Classify the argument of type TYPE and mode MODE.
6442 CLASSES will be filled by the register class used to pass each word
6443 of the operand. The number of words is returned. In case the parameter
6444 should be passed in memory, 0 is returned. As a special case for zero
6445 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6446
6447 BIT_OFFSET is used internally for handling records and specifies offset
6448 of the offset in bits modulo 512 to avoid overflow cases.
6449
6450 See the x86-64 PS ABI for details.
6451 */
6452
6453 static int
6454 classify_argument (enum machine_mode mode, const_tree type,
6455 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6456 {
6457 HOST_WIDE_INT bytes =
6458 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6459 int words
6460 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6461
6462 /* Variable sized entities are always passed/returned in memory. */
6463 if (bytes < 0)
6464 return 0;
6465
6466 if (mode != VOIDmode
6467 && targetm.calls.must_pass_in_stack (mode, type))
6468 return 0;
6469
6470 if (type && AGGREGATE_TYPE_P (type))
6471 {
6472 int i;
6473 tree field;
6474 enum x86_64_reg_class subclasses[MAX_CLASSES];
6475
6476 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6477 if (bytes > 64)
6478 return 0;
6479
6480 for (i = 0; i < words; i++)
6481 classes[i] = X86_64_NO_CLASS;
6482
6483 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6484 signalize memory class, so handle it as special case. */
6485 if (!words)
6486 {
6487 classes[0] = X86_64_NO_CLASS;
6488 return 1;
6489 }
6490
6491 /* Classify each field of record and merge classes. */
6492 switch (TREE_CODE (type))
6493 {
6494 case RECORD_TYPE:
6495 /* And now merge the fields of structure. */
6496 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6497 {
6498 if (TREE_CODE (field) == FIELD_DECL)
6499 {
6500 int num;
6501
6502 if (TREE_TYPE (field) == error_mark_node)
6503 continue;
6504
6505 /* Bitfields are always classified as integer. Handle them
6506 early, since later code would consider them to be
6507 misaligned integers. */
6508 if (DECL_BIT_FIELD (field))
6509 {
6510 for (i = (int_bit_position (field)
6511 + (bit_offset % 64)) / 8 / 8;
6512 i < ((int_bit_position (field) + (bit_offset % 64))
6513 + tree_to_shwi (DECL_SIZE (field))
6514 + 63) / 8 / 8; i++)
6515 classes[i] =
6516 merge_classes (X86_64_INTEGER_CLASS,
6517 classes[i]);
6518 }
6519 else
6520 {
6521 int pos;
6522
6523 type = TREE_TYPE (field);
6524
6525 /* Flexible array member is ignored. */
6526 if (TYPE_MODE (type) == BLKmode
6527 && TREE_CODE (type) == ARRAY_TYPE
6528 && TYPE_SIZE (type) == NULL_TREE
6529 && TYPE_DOMAIN (type) != NULL_TREE
6530 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6531 == NULL_TREE))
6532 {
6533 static bool warned;
6534
6535 if (!warned && warn_psabi)
6536 {
6537 warned = true;
6538 inform (input_location,
6539 "the ABI of passing struct with"
6540 " a flexible array member has"
6541 " changed in GCC 4.4");
6542 }
6543 continue;
6544 }
6545 num = classify_argument (TYPE_MODE (type), type,
6546 subclasses,
6547 (int_bit_position (field)
6548 + bit_offset) % 512);
6549 if (!num)
6550 return 0;
6551 pos = (int_bit_position (field)
6552 + (bit_offset % 64)) / 8 / 8;
6553 for (i = 0; i < num && (i + pos) < words; i++)
6554 classes[i + pos] =
6555 merge_classes (subclasses[i], classes[i + pos]);
6556 }
6557 }
6558 }
6559 break;
6560
6561 case ARRAY_TYPE:
6562 /* Arrays are handled as small records. */
6563 {
6564 int num;
6565 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6566 TREE_TYPE (type), subclasses, bit_offset);
6567 if (!num)
6568 return 0;
6569
6570 /* The partial classes are now full classes. */
6571 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6572 subclasses[0] = X86_64_SSE_CLASS;
6573 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6574 && !((bit_offset % 64) == 0 && bytes == 4))
6575 subclasses[0] = X86_64_INTEGER_CLASS;
6576
6577 for (i = 0; i < words; i++)
6578 classes[i] = subclasses[i % num];
6579
6580 break;
6581 }
6582 case UNION_TYPE:
6583 case QUAL_UNION_TYPE:
6584 /* Unions are similar to RECORD_TYPE but offset is always 0.
6585 */
6586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6587 {
6588 if (TREE_CODE (field) == FIELD_DECL)
6589 {
6590 int num;
6591
6592 if (TREE_TYPE (field) == error_mark_node)
6593 continue;
6594
6595 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6596 TREE_TYPE (field), subclasses,
6597 bit_offset);
6598 if (!num)
6599 return 0;
6600 for (i = 0; i < num && i < words; i++)
6601 classes[i] = merge_classes (subclasses[i], classes[i]);
6602 }
6603 }
6604 break;
6605
6606 default:
6607 gcc_unreachable ();
6608 }
6609
6610 if (words > 2)
6611 {
6612 /* When size > 16 bytes, if the first one isn't
6613 X86_64_SSE_CLASS or any other ones aren't
6614 X86_64_SSEUP_CLASS, everything should be passed in
6615 memory. */
6616 if (classes[0] != X86_64_SSE_CLASS)
6617 return 0;
6618
6619 for (i = 1; i < words; i++)
6620 if (classes[i] != X86_64_SSEUP_CLASS)
6621 return 0;
6622 }
6623
6624 /* Final merger cleanup. */
6625 for (i = 0; i < words; i++)
6626 {
6627 /* If one class is MEMORY, everything should be passed in
6628 memory. */
6629 if (classes[i] == X86_64_MEMORY_CLASS)
6630 return 0;
6631
6632 /* The X86_64_SSEUP_CLASS should be always preceded by
6633 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6634 if (classes[i] == X86_64_SSEUP_CLASS
6635 && classes[i - 1] != X86_64_SSE_CLASS
6636 && classes[i - 1] != X86_64_SSEUP_CLASS)
6637 {
6638 /* The first one should never be X86_64_SSEUP_CLASS. */
6639 gcc_assert (i != 0);
6640 classes[i] = X86_64_SSE_CLASS;
6641 }
6642
6643 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6644 everything should be passed in memory. */
6645 if (classes[i] == X86_64_X87UP_CLASS
6646 && (classes[i - 1] != X86_64_X87_CLASS))
6647 {
6648 static bool warned;
6649
6650 /* The first one should never be X86_64_X87UP_CLASS. */
6651 gcc_assert (i != 0);
6652 if (!warned && warn_psabi)
6653 {
6654 warned = true;
6655 inform (input_location,
6656 "the ABI of passing union with long double"
6657 " has changed in GCC 4.4");
6658 }
6659 return 0;
6660 }
6661 }
6662 return words;
6663 }
6664
6665 /* Compute alignment needed. We align all types to natural boundaries with
6666 exception of XFmode that is aligned to 64bits. */
6667 if (mode != VOIDmode && mode != BLKmode)
6668 {
6669 int mode_alignment = GET_MODE_BITSIZE (mode);
6670
6671 if (mode == XFmode)
6672 mode_alignment = 128;
6673 else if (mode == XCmode)
6674 mode_alignment = 256;
6675 if (COMPLEX_MODE_P (mode))
6676 mode_alignment /= 2;
6677 /* Misaligned fields are always returned in memory. */
6678 if (bit_offset % mode_alignment)
6679 return 0;
6680 }
6681
6682 /* for V1xx modes, just use the base mode */
6683 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6684 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6685 mode = GET_MODE_INNER (mode);
6686
6687 /* Classification of atomic types. */
6688 switch (mode)
6689 {
6690 case SDmode:
6691 case DDmode:
6692 classes[0] = X86_64_SSE_CLASS;
6693 return 1;
6694 case TDmode:
6695 classes[0] = X86_64_SSE_CLASS;
6696 classes[1] = X86_64_SSEUP_CLASS;
6697 return 2;
6698 case DImode:
6699 case SImode:
6700 case HImode:
6701 case QImode:
6702 case CSImode:
6703 case CHImode:
6704 case CQImode:
6705 {
6706 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6707
6708 /* Analyze last 128 bits only. */
6709 size = (size - 1) & 0x7f;
6710
6711 if (size < 32)
6712 {
6713 classes[0] = X86_64_INTEGERSI_CLASS;
6714 return 1;
6715 }
6716 else if (size < 64)
6717 {
6718 classes[0] = X86_64_INTEGER_CLASS;
6719 return 1;
6720 }
6721 else if (size < 64+32)
6722 {
6723 classes[0] = X86_64_INTEGER_CLASS;
6724 classes[1] = X86_64_INTEGERSI_CLASS;
6725 return 2;
6726 }
6727 else if (size < 64+64)
6728 {
6729 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6730 return 2;
6731 }
6732 else
6733 gcc_unreachable ();
6734 }
6735 case CDImode:
6736 case TImode:
6737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6738 return 2;
6739 case COImode:
6740 case OImode:
6741 /* OImode shouldn't be used directly. */
6742 gcc_unreachable ();
6743 case CTImode:
6744 return 0;
6745 case SFmode:
6746 if (!(bit_offset % 64))
6747 classes[0] = X86_64_SSESF_CLASS;
6748 else
6749 classes[0] = X86_64_SSE_CLASS;
6750 return 1;
6751 case DFmode:
6752 classes[0] = X86_64_SSEDF_CLASS;
6753 return 1;
6754 case XFmode:
6755 classes[0] = X86_64_X87_CLASS;
6756 classes[1] = X86_64_X87UP_CLASS;
6757 return 2;
6758 case TFmode:
6759 classes[0] = X86_64_SSE_CLASS;
6760 classes[1] = X86_64_SSEUP_CLASS;
6761 return 2;
6762 case SCmode:
6763 classes[0] = X86_64_SSE_CLASS;
6764 if (!(bit_offset % 64))
6765 return 1;
6766 else
6767 {
6768 static bool warned;
6769
6770 if (!warned && warn_psabi)
6771 {
6772 warned = true;
6773 inform (input_location,
6774 "the ABI of passing structure with complex float"
6775 " member has changed in GCC 4.4");
6776 }
6777 classes[1] = X86_64_SSESF_CLASS;
6778 return 2;
6779 }
6780 case DCmode:
6781 classes[0] = X86_64_SSEDF_CLASS;
6782 classes[1] = X86_64_SSEDF_CLASS;
6783 return 2;
6784 case XCmode:
6785 classes[0] = X86_64_COMPLEX_X87_CLASS;
6786 return 1;
6787 case TCmode:
6788 /* This modes is larger than 16 bytes. */
6789 return 0;
6790 case V8SFmode:
6791 case V8SImode:
6792 case V32QImode:
6793 case V16HImode:
6794 case V4DFmode:
6795 case V4DImode:
6796 classes[0] = X86_64_SSE_CLASS;
6797 classes[1] = X86_64_SSEUP_CLASS;
6798 classes[2] = X86_64_SSEUP_CLASS;
6799 classes[3] = X86_64_SSEUP_CLASS;
6800 return 4;
6801 case V8DFmode:
6802 case V16SFmode:
6803 case V8DImode:
6804 case V16SImode:
6805 case V32HImode:
6806 case V64QImode:
6807 classes[0] = X86_64_SSE_CLASS;
6808 classes[1] = X86_64_SSEUP_CLASS;
6809 classes[2] = X86_64_SSEUP_CLASS;
6810 classes[3] = X86_64_SSEUP_CLASS;
6811 classes[4] = X86_64_SSEUP_CLASS;
6812 classes[5] = X86_64_SSEUP_CLASS;
6813 classes[6] = X86_64_SSEUP_CLASS;
6814 classes[7] = X86_64_SSEUP_CLASS;
6815 return 8;
6816 case V4SFmode:
6817 case V4SImode:
6818 case V16QImode:
6819 case V8HImode:
6820 case V2DFmode:
6821 case V2DImode:
6822 classes[0] = X86_64_SSE_CLASS;
6823 classes[1] = X86_64_SSEUP_CLASS;
6824 return 2;
6825 case V1TImode:
6826 case V1DImode:
6827 case V2SFmode:
6828 case V2SImode:
6829 case V4HImode:
6830 case V8QImode:
6831 classes[0] = X86_64_SSE_CLASS;
6832 return 1;
6833 case BLKmode:
6834 case VOIDmode:
6835 return 0;
6836 default:
6837 gcc_assert (VECTOR_MODE_P (mode));
6838
6839 if (bytes > 16)
6840 return 0;
6841
6842 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6843
6844 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6845 classes[0] = X86_64_INTEGERSI_CLASS;
6846 else
6847 classes[0] = X86_64_INTEGER_CLASS;
6848 classes[1] = X86_64_INTEGER_CLASS;
6849 return 1 + (bytes > 8);
6850 }
6851 }
6852
6853 /* Examine the argument and return set number of register required in each
6854 class. Return true iff parameter should be passed in memory. */
6855
6856 static bool
6857 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6858 int *int_nregs, int *sse_nregs)
6859 {
6860 enum x86_64_reg_class regclass[MAX_CLASSES];
6861 int n = classify_argument (mode, type, regclass, 0);
6862
6863 *int_nregs = 0;
6864 *sse_nregs = 0;
6865
6866 if (!n)
6867 return true;
6868 for (n--; n >= 0; n--)
6869 switch (regclass[n])
6870 {
6871 case X86_64_INTEGER_CLASS:
6872 case X86_64_INTEGERSI_CLASS:
6873 (*int_nregs)++;
6874 break;
6875 case X86_64_SSE_CLASS:
6876 case X86_64_SSESF_CLASS:
6877 case X86_64_SSEDF_CLASS:
6878 (*sse_nregs)++;
6879 break;
6880 case X86_64_NO_CLASS:
6881 case X86_64_SSEUP_CLASS:
6882 break;
6883 case X86_64_X87_CLASS:
6884 case X86_64_X87UP_CLASS:
6885 case X86_64_COMPLEX_X87_CLASS:
6886 if (!in_return)
6887 return true;
6888 break;
6889 case X86_64_MEMORY_CLASS:
6890 gcc_unreachable ();
6891 }
6892
6893 return false;
6894 }
6895
6896 /* Construct container for the argument used by GCC interface. See
6897 FUNCTION_ARG for the detailed description. */
6898
6899 static rtx
6900 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6901 const_tree type, int in_return, int nintregs, int nsseregs,
6902 const int *intreg, int sse_regno)
6903 {
6904 /* The following variables hold the static issued_error state. */
6905 static bool issued_sse_arg_error;
6906 static bool issued_sse_ret_error;
6907 static bool issued_x87_ret_error;
6908
6909 enum machine_mode tmpmode;
6910 int bytes =
6911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6912 enum x86_64_reg_class regclass[MAX_CLASSES];
6913 int n;
6914 int i;
6915 int nexps = 0;
6916 int needed_sseregs, needed_intregs;
6917 rtx exp[MAX_CLASSES];
6918 rtx ret;
6919
6920 n = classify_argument (mode, type, regclass, 0);
6921 if (!n)
6922 return NULL;
6923 if (examine_argument (mode, type, in_return, &needed_intregs,
6924 &needed_sseregs))
6925 return NULL;
6926 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6927 return NULL;
6928
6929 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6930 some less clueful developer tries to use floating-point anyway. */
6931 if (needed_sseregs && !TARGET_SSE)
6932 {
6933 if (in_return)
6934 {
6935 if (!issued_sse_ret_error)
6936 {
6937 error ("SSE register return with SSE disabled");
6938 issued_sse_ret_error = true;
6939 }
6940 }
6941 else if (!issued_sse_arg_error)
6942 {
6943 error ("SSE register argument with SSE disabled");
6944 issued_sse_arg_error = true;
6945 }
6946 return NULL;
6947 }
6948
6949 /* Likewise, error if the ABI requires us to return values in the
6950 x87 registers and the user specified -mno-80387. */
6951 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6952 for (i = 0; i < n; i++)
6953 if (regclass[i] == X86_64_X87_CLASS
6954 || regclass[i] == X86_64_X87UP_CLASS
6955 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6956 {
6957 if (!issued_x87_ret_error)
6958 {
6959 error ("x87 register return with x87 disabled");
6960 issued_x87_ret_error = true;
6961 }
6962 return NULL;
6963 }
6964
6965 /* First construct simple cases. Avoid SCmode, since we want to use
6966 single register to pass this type. */
6967 if (n == 1 && mode != SCmode)
6968 switch (regclass[0])
6969 {
6970 case X86_64_INTEGER_CLASS:
6971 case X86_64_INTEGERSI_CLASS:
6972 return gen_rtx_REG (mode, intreg[0]);
6973 case X86_64_SSE_CLASS:
6974 case X86_64_SSESF_CLASS:
6975 case X86_64_SSEDF_CLASS:
6976 if (mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 break;
6980 case X86_64_X87_CLASS:
6981 case X86_64_COMPLEX_X87_CLASS:
6982 return gen_rtx_REG (mode, FIRST_STACK_REG);
6983 case X86_64_NO_CLASS:
6984 /* Zero sized array, struct or class. */
6985 return NULL;
6986 default:
6987 gcc_unreachable ();
6988 }
6989 if (n == 2
6990 && regclass[0] == X86_64_SSE_CLASS
6991 && regclass[1] == X86_64_SSEUP_CLASS
6992 && mode != BLKmode)
6993 return gen_reg_or_parallel (mode, orig_mode,
6994 SSE_REGNO (sse_regno));
6995 if (n == 4
6996 && regclass[0] == X86_64_SSE_CLASS
6997 && regclass[1] == X86_64_SSEUP_CLASS
6998 && regclass[2] == X86_64_SSEUP_CLASS
6999 && regclass[3] == X86_64_SSEUP_CLASS
7000 && mode != BLKmode)
7001 return gen_reg_or_parallel (mode, orig_mode,
7002 SSE_REGNO (sse_regno));
7003 if (n == 8
7004 && regclass[0] == X86_64_SSE_CLASS
7005 && regclass[1] == X86_64_SSEUP_CLASS
7006 && regclass[2] == X86_64_SSEUP_CLASS
7007 && regclass[3] == X86_64_SSEUP_CLASS
7008 && regclass[4] == X86_64_SSEUP_CLASS
7009 && regclass[5] == X86_64_SSEUP_CLASS
7010 && regclass[6] == X86_64_SSEUP_CLASS
7011 && regclass[7] == X86_64_SSEUP_CLASS
7012 && mode != BLKmode)
7013 return gen_reg_or_parallel (mode, orig_mode,
7014 SSE_REGNO (sse_regno));
7015 if (n == 2
7016 && regclass[0] == X86_64_X87_CLASS
7017 && regclass[1] == X86_64_X87UP_CLASS)
7018 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7019
7020 if (n == 2
7021 && regclass[0] == X86_64_INTEGER_CLASS
7022 && regclass[1] == X86_64_INTEGER_CLASS
7023 && (mode == CDImode || mode == TImode)
7024 && intreg[0] + 1 == intreg[1])
7025 return gen_rtx_REG (mode, intreg[0]);
7026
7027 /* Otherwise figure out the entries of the PARALLEL. */
7028 for (i = 0; i < n; i++)
7029 {
7030 int pos;
7031
7032 switch (regclass[i])
7033 {
7034 case X86_64_NO_CLASS:
7035 break;
7036 case X86_64_INTEGER_CLASS:
7037 case X86_64_INTEGERSI_CLASS:
7038 /* Merge TImodes on aligned occasions here too. */
7039 if (i * 8 + 8 > bytes)
7040 tmpmode
7041 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7042 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7043 tmpmode = SImode;
7044 else
7045 tmpmode = DImode;
7046 /* We've requested 24 bytes we
7047 don't have mode for. Use DImode. */
7048 if (tmpmode == BLKmode)
7049 tmpmode = DImode;
7050 exp [nexps++]
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (tmpmode, *intreg),
7053 GEN_INT (i*8));
7054 intreg++;
7055 break;
7056 case X86_64_SSESF_CLASS:
7057 exp [nexps++]
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (SFmode,
7060 SSE_REGNO (sse_regno)),
7061 GEN_INT (i*8));
7062 sse_regno++;
7063 break;
7064 case X86_64_SSEDF_CLASS:
7065 exp [nexps++]
7066 = gen_rtx_EXPR_LIST (VOIDmode,
7067 gen_rtx_REG (DFmode,
7068 SSE_REGNO (sse_regno)),
7069 GEN_INT (i*8));
7070 sse_regno++;
7071 break;
7072 case X86_64_SSE_CLASS:
7073 pos = i;
7074 switch (n)
7075 {
7076 case 1:
7077 tmpmode = DImode;
7078 break;
7079 case 2:
7080 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7081 {
7082 tmpmode = TImode;
7083 i++;
7084 }
7085 else
7086 tmpmode = DImode;
7087 break;
7088 case 4:
7089 gcc_assert (i == 0
7090 && regclass[1] == X86_64_SSEUP_CLASS
7091 && regclass[2] == X86_64_SSEUP_CLASS
7092 && regclass[3] == X86_64_SSEUP_CLASS);
7093 tmpmode = OImode;
7094 i += 3;
7095 break;
7096 case 8:
7097 gcc_assert (i == 0
7098 && regclass[1] == X86_64_SSEUP_CLASS
7099 && regclass[2] == X86_64_SSEUP_CLASS
7100 && regclass[3] == X86_64_SSEUP_CLASS
7101 && regclass[4] == X86_64_SSEUP_CLASS
7102 && regclass[5] == X86_64_SSEUP_CLASS
7103 && regclass[6] == X86_64_SSEUP_CLASS
7104 && regclass[7] == X86_64_SSEUP_CLASS);
7105 tmpmode = XImode;
7106 i += 7;
7107 break;
7108 default:
7109 gcc_unreachable ();
7110 }
7111 exp [nexps++]
7112 = gen_rtx_EXPR_LIST (VOIDmode,
7113 gen_rtx_REG (tmpmode,
7114 SSE_REGNO (sse_regno)),
7115 GEN_INT (pos*8));
7116 sse_regno++;
7117 break;
7118 default:
7119 gcc_unreachable ();
7120 }
7121 }
7122
7123 /* Empty aligned struct, union or class. */
7124 if (nexps == 0)
7125 return NULL;
7126
7127 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7128 for (i = 0; i < nexps; i++)
7129 XVECEXP (ret, 0, i) = exp [i];
7130 return ret;
7131 }
7132
7133 /* Update the data in CUM to advance over an argument of mode MODE
7134 and data type TYPE. (TYPE is null for libcalls where that information
7135 may not be available.) */
7136
7137 static void
7138 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7139 const_tree type, HOST_WIDE_INT bytes,
7140 HOST_WIDE_INT words)
7141 {
7142 switch (mode)
7143 {
7144 default:
7145 break;
7146
7147 case BLKmode:
7148 if (bytes < 0)
7149 break;
7150 /* FALLTHRU */
7151
7152 case DImode:
7153 case SImode:
7154 case HImode:
7155 case QImode:
7156 cum->words += words;
7157 cum->nregs -= words;
7158 cum->regno += words;
7159
7160 if (cum->nregs <= 0)
7161 {
7162 cum->nregs = 0;
7163 cum->regno = 0;
7164 }
7165 break;
7166
7167 case OImode:
7168 /* OImode shouldn't be used directly. */
7169 gcc_unreachable ();
7170
7171 case DFmode:
7172 if (cum->float_in_sse < 2)
7173 break;
7174 case SFmode:
7175 if (cum->float_in_sse < 1)
7176 break;
7177 /* FALLTHRU */
7178
7179 case V8SFmode:
7180 case V8SImode:
7181 case V64QImode:
7182 case V32HImode:
7183 case V16SImode:
7184 case V8DImode:
7185 case V16SFmode:
7186 case V8DFmode:
7187 case V32QImode:
7188 case V16HImode:
7189 case V4DFmode:
7190 case V4DImode:
7191 case TImode:
7192 case V16QImode:
7193 case V8HImode:
7194 case V4SImode:
7195 case V2DImode:
7196 case V4SFmode:
7197 case V2DFmode:
7198 if (!type || !AGGREGATE_TYPE_P (type))
7199 {
7200 cum->sse_words += words;
7201 cum->sse_nregs -= 1;
7202 cum->sse_regno += 1;
7203 if (cum->sse_nregs <= 0)
7204 {
7205 cum->sse_nregs = 0;
7206 cum->sse_regno = 0;
7207 }
7208 }
7209 break;
7210
7211 case V8QImode:
7212 case V4HImode:
7213 case V2SImode:
7214 case V2SFmode:
7215 case V1TImode:
7216 case V1DImode:
7217 if (!type || !AGGREGATE_TYPE_P (type))
7218 {
7219 cum->mmx_words += words;
7220 cum->mmx_nregs -= 1;
7221 cum->mmx_regno += 1;
7222 if (cum->mmx_nregs <= 0)
7223 {
7224 cum->mmx_nregs = 0;
7225 cum->mmx_regno = 0;
7226 }
7227 }
7228 break;
7229 }
7230 }
7231
7232 static void
7233 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7234 const_tree type, HOST_WIDE_INT words, bool named)
7235 {
7236 int int_nregs, sse_nregs;
7237
7238 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7239 if (!named && (VALID_AVX512F_REG_MODE (mode)
7240 || VALID_AVX256_REG_MODE (mode)))
7241 return;
7242
7243 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7244 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7245 {
7246 cum->nregs -= int_nregs;
7247 cum->sse_nregs -= sse_nregs;
7248 cum->regno += int_nregs;
7249 cum->sse_regno += sse_nregs;
7250 }
7251 else
7252 {
7253 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7254 cum->words = (cum->words + align - 1) & ~(align - 1);
7255 cum->words += words;
7256 }
7257 }
7258
7259 static void
7260 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7261 HOST_WIDE_INT words)
7262 {
7263 /* Otherwise, this should be passed indirect. */
7264 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7265
7266 cum->words += words;
7267 if (cum->nregs > 0)
7268 {
7269 cum->nregs -= 1;
7270 cum->regno += 1;
7271 }
7272 }
7273
7274 /* Update the data in CUM to advance over an argument of mode MODE and
7275 data type TYPE. (TYPE is null for libcalls where that information
7276 may not be available.) */
7277
7278 static void
7279 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7280 const_tree type, bool named)
7281 {
7282 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7283 HOST_WIDE_INT bytes, words;
7284
7285 if (mode == BLKmode)
7286 bytes = int_size_in_bytes (type);
7287 else
7288 bytes = GET_MODE_SIZE (mode);
7289 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7290
7291 if (type)
7292 mode = type_natural_mode (type, NULL, false);
7293
7294 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7295 function_arg_advance_ms_64 (cum, bytes, words);
7296 else if (TARGET_64BIT)
7297 function_arg_advance_64 (cum, mode, type, words, named);
7298 else
7299 function_arg_advance_32 (cum, mode, type, bytes, words);
7300 }
7301
7302 /* Define where to put the arguments to a function.
7303 Value is zero to push the argument on the stack,
7304 or a hard register in which to store the argument.
7305
7306 MODE is the argument's machine mode.
7307 TYPE is the data type of the argument (as a tree).
7308 This is null for libcalls where that information may
7309 not be available.
7310 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7311 the preceding args and about the function being called.
7312 NAMED is nonzero if this argument is a named parameter
7313 (otherwise it is an extra parameter matching an ellipsis). */
7314
7315 static rtx
7316 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7317 enum machine_mode orig_mode, const_tree type,
7318 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7319 {
7320 /* Avoid the AL settings for the Unix64 ABI. */
7321 if (mode == VOIDmode)
7322 return constm1_rtx;
7323
7324 switch (mode)
7325 {
7326 default:
7327 break;
7328
7329 case BLKmode:
7330 if (bytes < 0)
7331 break;
7332 /* FALLTHRU */
7333 case DImode:
7334 case SImode:
7335 case HImode:
7336 case QImode:
7337 if (words <= cum->nregs)
7338 {
7339 int regno = cum->regno;
7340
7341 /* Fastcall allocates the first two DWORD (SImode) or
7342 smaller arguments to ECX and EDX if it isn't an
7343 aggregate type . */
7344 if (cum->fastcall)
7345 {
7346 if (mode == BLKmode
7347 || mode == DImode
7348 || (type && AGGREGATE_TYPE_P (type)))
7349 break;
7350
7351 /* ECX not EAX is the first allocated register. */
7352 if (regno == AX_REG)
7353 regno = CX_REG;
7354 }
7355 return gen_rtx_REG (mode, regno);
7356 }
7357 break;
7358
7359 case DFmode:
7360 if (cum->float_in_sse < 2)
7361 break;
7362 case SFmode:
7363 if (cum->float_in_sse < 1)
7364 break;
7365 /* FALLTHRU */
7366 case TImode:
7367 /* In 32bit, we pass TImode in xmm registers. */
7368 case V16QImode:
7369 case V8HImode:
7370 case V4SImode:
7371 case V2DImode:
7372 case V4SFmode:
7373 case V2DFmode:
7374 if (!type || !AGGREGATE_TYPE_P (type))
7375 {
7376 if (cum->sse_nregs)
7377 return gen_reg_or_parallel (mode, orig_mode,
7378 cum->sse_regno + FIRST_SSE_REG);
7379 }
7380 break;
7381
7382 case OImode:
7383 case XImode:
7384 /* OImode and XImode shouldn't be used directly. */
7385 gcc_unreachable ();
7386
7387 case V64QImode:
7388 case V32HImode:
7389 case V16SImode:
7390 case V8DImode:
7391 case V16SFmode:
7392 case V8DFmode:
7393 case V8SFmode:
7394 case V8SImode:
7395 case V32QImode:
7396 case V16HImode:
7397 case V4DFmode:
7398 case V4DImode:
7399 if (!type || !AGGREGATE_TYPE_P (type))
7400 {
7401 if (cum->sse_nregs)
7402 return gen_reg_or_parallel (mode, orig_mode,
7403 cum->sse_regno + FIRST_SSE_REG);
7404 }
7405 break;
7406
7407 case V8QImode:
7408 case V4HImode:
7409 case V2SImode:
7410 case V2SFmode:
7411 case V1TImode:
7412 case V1DImode:
7413 if (!type || !AGGREGATE_TYPE_P (type))
7414 {
7415 if (cum->mmx_nregs)
7416 return gen_reg_or_parallel (mode, orig_mode,
7417 cum->mmx_regno + FIRST_MMX_REG);
7418 }
7419 break;
7420 }
7421
7422 return NULL_RTX;
7423 }
7424
7425 static rtx
7426 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7427 enum machine_mode orig_mode, const_tree type, bool named)
7428 {
7429 /* Handle a hidden AL argument containing number of registers
7430 for varargs x86-64 functions. */
7431 if (mode == VOIDmode)
7432 return GEN_INT (cum->maybe_vaarg
7433 ? (cum->sse_nregs < 0
7434 ? X86_64_SSE_REGPARM_MAX
7435 : cum->sse_regno)
7436 : -1);
7437
7438 switch (mode)
7439 {
7440 default:
7441 break;
7442
7443 case V8SFmode:
7444 case V8SImode:
7445 case V32QImode:
7446 case V16HImode:
7447 case V4DFmode:
7448 case V4DImode:
7449 case V16SFmode:
7450 case V16SImode:
7451 case V64QImode:
7452 case V32HImode:
7453 case V8DFmode:
7454 case V8DImode:
7455 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7456 if (!named)
7457 return NULL;
7458 break;
7459 }
7460
7461 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7462 cum->sse_nregs,
7463 &x86_64_int_parameter_registers [cum->regno],
7464 cum->sse_regno);
7465 }
7466
7467 static rtx
7468 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7469 enum machine_mode orig_mode, bool named,
7470 HOST_WIDE_INT bytes)
7471 {
7472 unsigned int regno;
7473
7474 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7475 We use value of -2 to specify that current function call is MSABI. */
7476 if (mode == VOIDmode)
7477 return GEN_INT (-2);
7478
7479 /* If we've run out of registers, it goes on the stack. */
7480 if (cum->nregs == 0)
7481 return NULL_RTX;
7482
7483 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7484
7485 /* Only floating point modes are passed in anything but integer regs. */
7486 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7487 {
7488 if (named)
7489 regno = cum->regno + FIRST_SSE_REG;
7490 else
7491 {
7492 rtx t1, t2;
7493
7494 /* Unnamed floating parameters are passed in both the
7495 SSE and integer registers. */
7496 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7497 t2 = gen_rtx_REG (mode, regno);
7498 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7499 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7500 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7501 }
7502 }
7503 /* Handle aggregated types passed in register. */
7504 if (orig_mode == BLKmode)
7505 {
7506 if (bytes > 0 && bytes <= 8)
7507 mode = (bytes > 4 ? DImode : SImode);
7508 if (mode == BLKmode)
7509 mode = DImode;
7510 }
7511
7512 return gen_reg_or_parallel (mode, orig_mode, regno);
7513 }
7514
7515 /* Return where to put the arguments to a function.
7516 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7517
7518 MODE is the argument's machine mode. TYPE is the data type of the
7519 argument. It is null for libcalls where that information may not be
7520 available. CUM gives information about the preceding args and about
7521 the function being called. NAMED is nonzero if this argument is a
7522 named parameter (otherwise it is an extra parameter matching an
7523 ellipsis). */
7524
7525 static rtx
7526 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7527 const_tree type, bool named)
7528 {
7529 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7530 enum machine_mode mode = omode;
7531 HOST_WIDE_INT bytes, words;
7532 rtx arg;
7533
7534 if (mode == BLKmode)
7535 bytes = int_size_in_bytes (type);
7536 else
7537 bytes = GET_MODE_SIZE (mode);
7538 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7539
7540 /* To simplify the code below, represent vector types with a vector mode
7541 even if MMX/SSE are not active. */
7542 if (type && TREE_CODE (type) == VECTOR_TYPE)
7543 mode = type_natural_mode (type, cum, false);
7544
7545 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7546 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7547 else if (TARGET_64BIT)
7548 arg = function_arg_64 (cum, mode, omode, type, named);
7549 else
7550 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7551
7552 return arg;
7553 }
7554
7555 /* A C expression that indicates when an argument must be passed by
7556 reference. If nonzero for an argument, a copy of that argument is
7557 made in memory and a pointer to the argument is passed instead of
7558 the argument itself. The pointer is passed in whatever way is
7559 appropriate for passing a pointer to that type. */
7560
7561 static bool
7562 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7563 const_tree type, bool)
7564 {
7565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7566
7567 /* See Windows x64 Software Convention. */
7568 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7569 {
7570 int msize = (int) GET_MODE_SIZE (mode);
7571 if (type)
7572 {
7573 /* Arrays are passed by reference. */
7574 if (TREE_CODE (type) == ARRAY_TYPE)
7575 return true;
7576
7577 if (AGGREGATE_TYPE_P (type))
7578 {
7579 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7580 are passed by reference. */
7581 msize = int_size_in_bytes (type);
7582 }
7583 }
7584
7585 /* __m128 is passed by reference. */
7586 switch (msize) {
7587 case 1: case 2: case 4: case 8:
7588 break;
7589 default:
7590 return true;
7591 }
7592 }
7593 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7594 return 1;
7595
7596 return 0;
7597 }
7598
7599 /* Return true when TYPE should be 128bit aligned for 32bit argument
7600 passing ABI. XXX: This function is obsolete and is only used for
7601 checking psABI compatibility with previous versions of GCC. */
7602
7603 static bool
7604 ix86_compat_aligned_value_p (const_tree type)
7605 {
7606 enum machine_mode mode = TYPE_MODE (type);
7607 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7608 || mode == TDmode
7609 || mode == TFmode
7610 || mode == TCmode)
7611 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7612 return true;
7613 if (TYPE_ALIGN (type) < 128)
7614 return false;
7615
7616 if (AGGREGATE_TYPE_P (type))
7617 {
7618 /* Walk the aggregates recursively. */
7619 switch (TREE_CODE (type))
7620 {
7621 case RECORD_TYPE:
7622 case UNION_TYPE:
7623 case QUAL_UNION_TYPE:
7624 {
7625 tree field;
7626
7627 /* Walk all the structure fields. */
7628 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7629 {
7630 if (TREE_CODE (field) == FIELD_DECL
7631 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7632 return true;
7633 }
7634 break;
7635 }
7636
7637 case ARRAY_TYPE:
7638 /* Just for use if some languages passes arrays by value. */
7639 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7640 return true;
7641 break;
7642
7643 default:
7644 gcc_unreachable ();
7645 }
7646 }
7647 return false;
7648 }
7649
7650 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7651 XXX: This function is obsolete and is only used for checking psABI
7652 compatibility with previous versions of GCC. */
7653
7654 static unsigned int
7655 ix86_compat_function_arg_boundary (enum machine_mode mode,
7656 const_tree type, unsigned int align)
7657 {
7658 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7659 natural boundaries. */
7660 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7661 {
7662 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7663 make an exception for SSE modes since these require 128bit
7664 alignment.
7665
7666 The handling here differs from field_alignment. ICC aligns MMX
7667 arguments to 4 byte boundaries, while structure fields are aligned
7668 to 8 byte boundaries. */
7669 if (!type)
7670 {
7671 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7672 align = PARM_BOUNDARY;
7673 }
7674 else
7675 {
7676 if (!ix86_compat_aligned_value_p (type))
7677 align = PARM_BOUNDARY;
7678 }
7679 }
7680 if (align > BIGGEST_ALIGNMENT)
7681 align = BIGGEST_ALIGNMENT;
7682 return align;
7683 }
7684
7685 /* Return true when TYPE should be 128bit aligned for 32bit argument
7686 passing ABI. */
7687
7688 static bool
7689 ix86_contains_aligned_value_p (const_tree type)
7690 {
7691 enum machine_mode mode = TYPE_MODE (type);
7692
7693 if (mode == XFmode || mode == XCmode)
7694 return false;
7695
7696 if (TYPE_ALIGN (type) < 128)
7697 return false;
7698
7699 if (AGGREGATE_TYPE_P (type))
7700 {
7701 /* Walk the aggregates recursively. */
7702 switch (TREE_CODE (type))
7703 {
7704 case RECORD_TYPE:
7705 case UNION_TYPE:
7706 case QUAL_UNION_TYPE:
7707 {
7708 tree field;
7709
7710 /* Walk all the structure fields. */
7711 for (field = TYPE_FIELDS (type);
7712 field;
7713 field = DECL_CHAIN (field))
7714 {
7715 if (TREE_CODE (field) == FIELD_DECL
7716 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7717 return true;
7718 }
7719 break;
7720 }
7721
7722 case ARRAY_TYPE:
7723 /* Just for use if some languages passes arrays by value. */
7724 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7725 return true;
7726 break;
7727
7728 default:
7729 gcc_unreachable ();
7730 }
7731 }
7732 else
7733 return TYPE_ALIGN (type) >= 128;
7734
7735 return false;
7736 }
7737
7738 /* Gives the alignment boundary, in bits, of an argument with the
7739 specified mode and type. */
7740
7741 static unsigned int
7742 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7743 {
7744 unsigned int align;
7745 if (type)
7746 {
7747 /* Since the main variant type is used for call, we convert it to
7748 the main variant type. */
7749 type = TYPE_MAIN_VARIANT (type);
7750 align = TYPE_ALIGN (type);
7751 }
7752 else
7753 align = GET_MODE_ALIGNMENT (mode);
7754 if (align < PARM_BOUNDARY)
7755 align = PARM_BOUNDARY;
7756 else
7757 {
7758 static bool warned;
7759 unsigned int saved_align = align;
7760
7761 if (!TARGET_64BIT)
7762 {
7763 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7764 if (!type)
7765 {
7766 if (mode == XFmode || mode == XCmode)
7767 align = PARM_BOUNDARY;
7768 }
7769 else if (!ix86_contains_aligned_value_p (type))
7770 align = PARM_BOUNDARY;
7771
7772 if (align < 128)
7773 align = PARM_BOUNDARY;
7774 }
7775
7776 if (warn_psabi
7777 && !warned
7778 && align != ix86_compat_function_arg_boundary (mode, type,
7779 saved_align))
7780 {
7781 warned = true;
7782 inform (input_location,
7783 "The ABI for passing parameters with %d-byte"
7784 " alignment has changed in GCC 4.6",
7785 align / BITS_PER_UNIT);
7786 }
7787 }
7788
7789 return align;
7790 }
7791
7792 /* Return true if N is a possible register number of function value. */
7793
7794 static bool
7795 ix86_function_value_regno_p (const unsigned int regno)
7796 {
7797 switch (regno)
7798 {
7799 case AX_REG:
7800 return true;
7801 case DX_REG:
7802 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7803 case DI_REG:
7804 case SI_REG:
7805 return TARGET_64BIT && ix86_abi != MS_ABI;
7806
7807 /* Complex values are returned in %st(0)/%st(1) pair. */
7808 case ST0_REG:
7809 case ST1_REG:
7810 /* TODO: The function should depend on current function ABI but
7811 builtins.c would need updating then. Therefore we use the
7812 default ABI. */
7813 if (TARGET_64BIT && ix86_abi == MS_ABI)
7814 return false;
7815 return TARGET_FLOAT_RETURNS_IN_80387;
7816
7817 /* Complex values are returned in %xmm0/%xmm1 pair. */
7818 case XMM0_REG:
7819 case XMM1_REG:
7820 return TARGET_SSE;
7821
7822 case MM0_REG:
7823 if (TARGET_MACHO || TARGET_64BIT)
7824 return false;
7825 return TARGET_MMX;
7826 }
7827
7828 return false;
7829 }
7830
7831 /* Define how to find the value returned by a function.
7832 VALTYPE is the data type of the value (as a tree).
7833 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7834 otherwise, FUNC is 0. */
7835
7836 static rtx
7837 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7838 const_tree fntype, const_tree fn)
7839 {
7840 unsigned int regno;
7841
7842 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7843 we normally prevent this case when mmx is not available. However
7844 some ABIs may require the result to be returned like DImode. */
7845 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7846 regno = FIRST_MMX_REG;
7847
7848 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7849 we prevent this case when sse is not available. However some ABIs
7850 may require the result to be returned like integer TImode. */
7851 else if (mode == TImode
7852 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7853 regno = FIRST_SSE_REG;
7854
7855 /* 32-byte vector modes in %ymm0. */
7856 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7857 regno = FIRST_SSE_REG;
7858
7859 /* 64-byte vector modes in %zmm0. */
7860 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7861 regno = FIRST_SSE_REG;
7862
7863 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7864 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7865 regno = FIRST_FLOAT_REG;
7866 else
7867 /* Most things go in %eax. */
7868 regno = AX_REG;
7869
7870 /* Override FP return register with %xmm0 for local functions when
7871 SSE math is enabled or for functions with sseregparm attribute. */
7872 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7873 {
7874 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7875 if ((sse_level >= 1 && mode == SFmode)
7876 || (sse_level == 2 && mode == DFmode))
7877 regno = FIRST_SSE_REG;
7878 }
7879
7880 /* OImode shouldn't be used directly. */
7881 gcc_assert (mode != OImode);
7882
7883 return gen_rtx_REG (orig_mode, regno);
7884 }
7885
7886 static rtx
7887 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7888 const_tree valtype)
7889 {
7890 rtx ret;
7891
7892 /* Handle libcalls, which don't provide a type node. */
7893 if (valtype == NULL)
7894 {
7895 unsigned int regno;
7896
7897 switch (mode)
7898 {
7899 case SFmode:
7900 case SCmode:
7901 case DFmode:
7902 case DCmode:
7903 case TFmode:
7904 case SDmode:
7905 case DDmode:
7906 case TDmode:
7907 regno = FIRST_SSE_REG;
7908 break;
7909 case XFmode:
7910 case XCmode:
7911 regno = FIRST_FLOAT_REG;
7912 break;
7913 case TCmode:
7914 return NULL;
7915 default:
7916 regno = AX_REG;
7917 }
7918
7919 return gen_rtx_REG (mode, regno);
7920 }
7921 else if (POINTER_TYPE_P (valtype))
7922 {
7923 /* Pointers are always returned in word_mode. */
7924 mode = word_mode;
7925 }
7926
7927 ret = construct_container (mode, orig_mode, valtype, 1,
7928 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7929 x86_64_int_return_registers, 0);
7930
7931 /* For zero sized structures, construct_container returns NULL, but we
7932 need to keep rest of compiler happy by returning meaningful value. */
7933 if (!ret)
7934 ret = gen_rtx_REG (orig_mode, AX_REG);
7935
7936 return ret;
7937 }
7938
7939 static rtx
7940 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7941 const_tree valtype)
7942 {
7943 unsigned int regno = AX_REG;
7944
7945 if (TARGET_SSE)
7946 {
7947 switch (GET_MODE_SIZE (mode))
7948 {
7949 case 16:
7950 if (valtype != NULL_TREE
7951 && !VECTOR_INTEGER_TYPE_P (valtype)
7952 && !VECTOR_INTEGER_TYPE_P (valtype)
7953 && !INTEGRAL_TYPE_P (valtype)
7954 && !VECTOR_FLOAT_TYPE_P (valtype))
7955 break;
7956 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7957 && !COMPLEX_MODE_P (mode))
7958 regno = FIRST_SSE_REG;
7959 break;
7960 case 8:
7961 case 4:
7962 if (mode == SFmode || mode == DFmode)
7963 regno = FIRST_SSE_REG;
7964 break;
7965 default:
7966 break;
7967 }
7968 }
7969 return gen_rtx_REG (orig_mode, regno);
7970 }
7971
7972 static rtx
7973 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7974 enum machine_mode orig_mode, enum machine_mode mode)
7975 {
7976 const_tree fn, fntype;
7977
7978 fn = NULL_TREE;
7979 if (fntype_or_decl && DECL_P (fntype_or_decl))
7980 fn = fntype_or_decl;
7981 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7982
7983 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7984 return function_value_ms_64 (orig_mode, mode, valtype);
7985 else if (TARGET_64BIT)
7986 return function_value_64 (orig_mode, mode, valtype);
7987 else
7988 return function_value_32 (orig_mode, mode, fntype, fn);
7989 }
7990
7991 static rtx
7992 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7993 {
7994 enum machine_mode mode, orig_mode;
7995
7996 orig_mode = TYPE_MODE (valtype);
7997 mode = type_natural_mode (valtype, NULL, true);
7998 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7999 }
8000
8001 /* Pointer function arguments and return values are promoted to
8002 word_mode. */
8003
8004 static enum machine_mode
8005 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8006 int *punsignedp, const_tree fntype,
8007 int for_return)
8008 {
8009 if (type != NULL_TREE && POINTER_TYPE_P (type))
8010 {
8011 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8012 return word_mode;
8013 }
8014 return default_promote_function_mode (type, mode, punsignedp, fntype,
8015 for_return);
8016 }
8017
8018 /* Return true if a structure, union or array with MODE containing FIELD
8019 should be accessed using BLKmode. */
8020
8021 static bool
8022 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8023 {
8024 /* Union with XFmode must be in BLKmode. */
8025 return (mode == XFmode
8026 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8027 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8028 }
8029
8030 rtx
8031 ix86_libcall_value (enum machine_mode mode)
8032 {
8033 return ix86_function_value_1 (NULL, NULL, mode, mode);
8034 }
8035
8036 /* Return true iff type is returned in memory. */
8037
8038 static bool
8039 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8040 {
8041 #ifdef SUBTARGET_RETURN_IN_MEMORY
8042 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8043 #else
8044 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8045 HOST_WIDE_INT size;
8046
8047 if (TARGET_64BIT)
8048 {
8049 if (ix86_function_type_abi (fntype) == MS_ABI)
8050 {
8051 size = int_size_in_bytes (type);
8052
8053 /* __m128 is returned in xmm0. */
8054 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8055 || INTEGRAL_TYPE_P (type)
8056 || VECTOR_FLOAT_TYPE_P (type))
8057 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8058 && !COMPLEX_MODE_P (mode)
8059 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8060 return false;
8061
8062 /* Otherwise, the size must be exactly in [1248]. */
8063 return size != 1 && size != 2 && size != 4 && size != 8;
8064 }
8065 else
8066 {
8067 int needed_intregs, needed_sseregs;
8068
8069 return examine_argument (mode, type, 1,
8070 &needed_intregs, &needed_sseregs);
8071 }
8072 }
8073 else
8074 {
8075 if (mode == BLKmode)
8076 return true;
8077
8078 size = int_size_in_bytes (type);
8079
8080 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8081 return false;
8082
8083 if (VECTOR_MODE_P (mode) || mode == TImode)
8084 {
8085 /* User-created vectors small enough to fit in EAX. */
8086 if (size < 8)
8087 return false;
8088
8089 /* Unless ABI prescibes otherwise,
8090 MMX/3dNow values are returned in MM0 if available. */
8091
8092 if (size == 8)
8093 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8094
8095 /* SSE values are returned in XMM0 if available. */
8096 if (size == 16)
8097 return !TARGET_SSE;
8098
8099 /* AVX values are returned in YMM0 if available. */
8100 if (size == 32)
8101 return !TARGET_AVX;
8102
8103 /* AVX512F values are returned in ZMM0 if available. */
8104 if (size == 64)
8105 return !TARGET_AVX512F;
8106 }
8107
8108 if (mode == XFmode)
8109 return false;
8110
8111 if (size > 12)
8112 return true;
8113
8114 /* OImode shouldn't be used directly. */
8115 gcc_assert (mode != OImode);
8116
8117 return false;
8118 }
8119 #endif
8120 }
8121
8122 \f
8123 /* Create the va_list data type. */
8124
8125 /* Returns the calling convention specific va_list date type.
8126 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8127
8128 static tree
8129 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8130 {
8131 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8132
8133 /* For i386 we use plain pointer to argument area. */
8134 if (!TARGET_64BIT || abi == MS_ABI)
8135 return build_pointer_type (char_type_node);
8136
8137 record = lang_hooks.types.make_type (RECORD_TYPE);
8138 type_decl = build_decl (BUILTINS_LOCATION,
8139 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8140
8141 f_gpr = build_decl (BUILTINS_LOCATION,
8142 FIELD_DECL, get_identifier ("gp_offset"),
8143 unsigned_type_node);
8144 f_fpr = build_decl (BUILTINS_LOCATION,
8145 FIELD_DECL, get_identifier ("fp_offset"),
8146 unsigned_type_node);
8147 f_ovf = build_decl (BUILTINS_LOCATION,
8148 FIELD_DECL, get_identifier ("overflow_arg_area"),
8149 ptr_type_node);
8150 f_sav = build_decl (BUILTINS_LOCATION,
8151 FIELD_DECL, get_identifier ("reg_save_area"),
8152 ptr_type_node);
8153
8154 va_list_gpr_counter_field = f_gpr;
8155 va_list_fpr_counter_field = f_fpr;
8156
8157 DECL_FIELD_CONTEXT (f_gpr) = record;
8158 DECL_FIELD_CONTEXT (f_fpr) = record;
8159 DECL_FIELD_CONTEXT (f_ovf) = record;
8160 DECL_FIELD_CONTEXT (f_sav) = record;
8161
8162 TYPE_STUB_DECL (record) = type_decl;
8163 TYPE_NAME (record) = type_decl;
8164 TYPE_FIELDS (record) = f_gpr;
8165 DECL_CHAIN (f_gpr) = f_fpr;
8166 DECL_CHAIN (f_fpr) = f_ovf;
8167 DECL_CHAIN (f_ovf) = f_sav;
8168
8169 layout_type (record);
8170
8171 /* The correct type is an array type of one element. */
8172 return build_array_type (record, build_index_type (size_zero_node));
8173 }
8174
8175 /* Setup the builtin va_list data type and for 64-bit the additional
8176 calling convention specific va_list data types. */
8177
8178 static tree
8179 ix86_build_builtin_va_list (void)
8180 {
8181 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8182
8183 /* Initialize abi specific va_list builtin types. */
8184 if (TARGET_64BIT)
8185 {
8186 tree t;
8187 if (ix86_abi == MS_ABI)
8188 {
8189 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8190 if (TREE_CODE (t) != RECORD_TYPE)
8191 t = build_variant_type_copy (t);
8192 sysv_va_list_type_node = t;
8193 }
8194 else
8195 {
8196 t = ret;
8197 if (TREE_CODE (t) != RECORD_TYPE)
8198 t = build_variant_type_copy (t);
8199 sysv_va_list_type_node = t;
8200 }
8201 if (ix86_abi != MS_ABI)
8202 {
8203 t = ix86_build_builtin_va_list_abi (MS_ABI);
8204 if (TREE_CODE (t) != RECORD_TYPE)
8205 t = build_variant_type_copy (t);
8206 ms_va_list_type_node = t;
8207 }
8208 else
8209 {
8210 t = ret;
8211 if (TREE_CODE (t) != RECORD_TYPE)
8212 t = build_variant_type_copy (t);
8213 ms_va_list_type_node = t;
8214 }
8215 }
8216
8217 return ret;
8218 }
8219
8220 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8221
8222 static void
8223 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8224 {
8225 rtx save_area, mem;
8226 alias_set_type set;
8227 int i, max;
8228
8229 /* GPR size of varargs save area. */
8230 if (cfun->va_list_gpr_size)
8231 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8232 else
8233 ix86_varargs_gpr_size = 0;
8234
8235 /* FPR size of varargs save area. We don't need it if we don't pass
8236 anything in SSE registers. */
8237 if (TARGET_SSE && cfun->va_list_fpr_size)
8238 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8239 else
8240 ix86_varargs_fpr_size = 0;
8241
8242 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8243 return;
8244
8245 save_area = frame_pointer_rtx;
8246 set = get_varargs_alias_set ();
8247
8248 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8249 if (max > X86_64_REGPARM_MAX)
8250 max = X86_64_REGPARM_MAX;
8251
8252 for (i = cum->regno; i < max; i++)
8253 {
8254 mem = gen_rtx_MEM (word_mode,
8255 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8256 MEM_NOTRAP_P (mem) = 1;
8257 set_mem_alias_set (mem, set);
8258 emit_move_insn (mem,
8259 gen_rtx_REG (word_mode,
8260 x86_64_int_parameter_registers[i]));
8261 }
8262
8263 if (ix86_varargs_fpr_size)
8264 {
8265 enum machine_mode smode;
8266 rtx_code_label *label;
8267 rtx test;
8268
8269 /* Now emit code to save SSE registers. The AX parameter contains number
8270 of SSE parameter registers used to call this function, though all we
8271 actually check here is the zero/non-zero status. */
8272
8273 label = gen_label_rtx ();
8274 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8275 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8276 label));
8277
8278 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8279 we used movdqa (i.e. TImode) instead? Perhaps even better would
8280 be if we could determine the real mode of the data, via a hook
8281 into pass_stdarg. Ignore all that for now. */
8282 smode = V4SFmode;
8283 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8284 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8285
8286 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8287 if (max > X86_64_SSE_REGPARM_MAX)
8288 max = X86_64_SSE_REGPARM_MAX;
8289
8290 for (i = cum->sse_regno; i < max; ++i)
8291 {
8292 mem = plus_constant (Pmode, save_area,
8293 i * 16 + ix86_varargs_gpr_size);
8294 mem = gen_rtx_MEM (smode, mem);
8295 MEM_NOTRAP_P (mem) = 1;
8296 set_mem_alias_set (mem, set);
8297 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8298
8299 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8300 }
8301
8302 emit_label (label);
8303 }
8304 }
8305
8306 static void
8307 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8308 {
8309 alias_set_type set = get_varargs_alias_set ();
8310 int i;
8311
8312 /* Reset to zero, as there might be a sysv vaarg used
8313 before. */
8314 ix86_varargs_gpr_size = 0;
8315 ix86_varargs_fpr_size = 0;
8316
8317 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8318 {
8319 rtx reg, mem;
8320
8321 mem = gen_rtx_MEM (Pmode,
8322 plus_constant (Pmode, virtual_incoming_args_rtx,
8323 i * UNITS_PER_WORD));
8324 MEM_NOTRAP_P (mem) = 1;
8325 set_mem_alias_set (mem, set);
8326
8327 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8328 emit_move_insn (mem, reg);
8329 }
8330 }
8331
8332 static void
8333 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8334 tree type, int *, int no_rtl)
8335 {
8336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8337 CUMULATIVE_ARGS next_cum;
8338 tree fntype;
8339
8340 /* This argument doesn't appear to be used anymore. Which is good,
8341 because the old code here didn't suppress rtl generation. */
8342 gcc_assert (!no_rtl);
8343
8344 if (!TARGET_64BIT)
8345 return;
8346
8347 fntype = TREE_TYPE (current_function_decl);
8348
8349 /* For varargs, we do not want to skip the dummy va_dcl argument.
8350 For stdargs, we do want to skip the last named argument. */
8351 next_cum = *cum;
8352 if (stdarg_p (fntype))
8353 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8354 true);
8355
8356 if (cum->call_abi == MS_ABI)
8357 setup_incoming_varargs_ms_64 (&next_cum);
8358 else
8359 setup_incoming_varargs_64 (&next_cum);
8360 }
8361
8362 /* Checks if TYPE is of kind va_list char *. */
8363
8364 static bool
8365 is_va_list_char_pointer (tree type)
8366 {
8367 tree canonic;
8368
8369 /* For 32-bit it is always true. */
8370 if (!TARGET_64BIT)
8371 return true;
8372 canonic = ix86_canonical_va_list_type (type);
8373 return (canonic == ms_va_list_type_node
8374 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8375 }
8376
8377 /* Implement va_start. */
8378
8379 static void
8380 ix86_va_start (tree valist, rtx nextarg)
8381 {
8382 HOST_WIDE_INT words, n_gpr, n_fpr;
8383 tree f_gpr, f_fpr, f_ovf, f_sav;
8384 tree gpr, fpr, ovf, sav, t;
8385 tree type;
8386 rtx ovf_rtx;
8387
8388 if (flag_split_stack
8389 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8390 {
8391 unsigned int scratch_regno;
8392
8393 /* When we are splitting the stack, we can't refer to the stack
8394 arguments using internal_arg_pointer, because they may be on
8395 the old stack. The split stack prologue will arrange to
8396 leave a pointer to the old stack arguments in a scratch
8397 register, which we here copy to a pseudo-register. The split
8398 stack prologue can't set the pseudo-register directly because
8399 it (the prologue) runs before any registers have been saved. */
8400
8401 scratch_regno = split_stack_prologue_scratch_regno ();
8402 if (scratch_regno != INVALID_REGNUM)
8403 {
8404 rtx reg;
8405 rtx_insn *seq;
8406
8407 reg = gen_reg_rtx (Pmode);
8408 cfun->machine->split_stack_varargs_pointer = reg;
8409
8410 start_sequence ();
8411 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8412 seq = get_insns ();
8413 end_sequence ();
8414
8415 push_topmost_sequence ();
8416 emit_insn_after (seq, entry_of_function ());
8417 pop_topmost_sequence ();
8418 }
8419 }
8420
8421 /* Only 64bit target needs something special. */
8422 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8423 {
8424 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8425 std_expand_builtin_va_start (valist, nextarg);
8426 else
8427 {
8428 rtx va_r, next;
8429
8430 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8431 next = expand_binop (ptr_mode, add_optab,
8432 cfun->machine->split_stack_varargs_pointer,
8433 crtl->args.arg_offset_rtx,
8434 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8435 convert_move (va_r, next, 0);
8436 }
8437 return;
8438 }
8439
8440 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8441 f_fpr = DECL_CHAIN (f_gpr);
8442 f_ovf = DECL_CHAIN (f_fpr);
8443 f_sav = DECL_CHAIN (f_ovf);
8444
8445 valist = build_simple_mem_ref (valist);
8446 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8447 /* The following should be folded into the MEM_REF offset. */
8448 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8449 f_gpr, NULL_TREE);
8450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8451 f_fpr, NULL_TREE);
8452 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8453 f_ovf, NULL_TREE);
8454 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8455 f_sav, NULL_TREE);
8456
8457 /* Count number of gp and fp argument registers used. */
8458 words = crtl->args.info.words;
8459 n_gpr = crtl->args.info.regno;
8460 n_fpr = crtl->args.info.sse_regno;
8461
8462 if (cfun->va_list_gpr_size)
8463 {
8464 type = TREE_TYPE (gpr);
8465 t = build2 (MODIFY_EXPR, type,
8466 gpr, build_int_cst (type, n_gpr * 8));
8467 TREE_SIDE_EFFECTS (t) = 1;
8468 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8469 }
8470
8471 if (TARGET_SSE && cfun->va_list_fpr_size)
8472 {
8473 type = TREE_TYPE (fpr);
8474 t = build2 (MODIFY_EXPR, type, fpr,
8475 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8476 TREE_SIDE_EFFECTS (t) = 1;
8477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8478 }
8479
8480 /* Find the overflow area. */
8481 type = TREE_TYPE (ovf);
8482 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8483 ovf_rtx = crtl->args.internal_arg_pointer;
8484 else
8485 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8486 t = make_tree (type, ovf_rtx);
8487 if (words != 0)
8488 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8489 t = build2 (MODIFY_EXPR, type, ovf, t);
8490 TREE_SIDE_EFFECTS (t) = 1;
8491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8492
8493 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8494 {
8495 /* Find the register save area.
8496 Prologue of the function save it right above stack frame. */
8497 type = TREE_TYPE (sav);
8498 t = make_tree (type, frame_pointer_rtx);
8499 if (!ix86_varargs_gpr_size)
8500 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8501 t = build2 (MODIFY_EXPR, type, sav, t);
8502 TREE_SIDE_EFFECTS (t) = 1;
8503 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8504 }
8505 }
8506
8507 /* Implement va_arg. */
8508
8509 static tree
8510 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8511 gimple_seq *post_p)
8512 {
8513 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8514 tree f_gpr, f_fpr, f_ovf, f_sav;
8515 tree gpr, fpr, ovf, sav, t;
8516 int size, rsize;
8517 tree lab_false, lab_over = NULL_TREE;
8518 tree addr, t2;
8519 rtx container;
8520 int indirect_p = 0;
8521 tree ptrtype;
8522 enum machine_mode nat_mode;
8523 unsigned int arg_boundary;
8524
8525 /* Only 64bit target needs something special. */
8526 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8527 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8528
8529 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8530 f_fpr = DECL_CHAIN (f_gpr);
8531 f_ovf = DECL_CHAIN (f_fpr);
8532 f_sav = DECL_CHAIN (f_ovf);
8533
8534 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8535 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8536 valist = build_va_arg_indirect_ref (valist);
8537 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8538 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8539 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8540
8541 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8542 if (indirect_p)
8543 type = build_pointer_type (type);
8544 size = int_size_in_bytes (type);
8545 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8546
8547 nat_mode = type_natural_mode (type, NULL, false);
8548 switch (nat_mode)
8549 {
8550 case V8SFmode:
8551 case V8SImode:
8552 case V32QImode:
8553 case V16HImode:
8554 case V4DFmode:
8555 case V4DImode:
8556 case V16SFmode:
8557 case V16SImode:
8558 case V64QImode:
8559 case V32HImode:
8560 case V8DFmode:
8561 case V8DImode:
8562 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8563 if (!TARGET_64BIT_MS_ABI)
8564 {
8565 container = NULL;
8566 break;
8567 }
8568
8569 default:
8570 container = construct_container (nat_mode, TYPE_MODE (type),
8571 type, 0, X86_64_REGPARM_MAX,
8572 X86_64_SSE_REGPARM_MAX, intreg,
8573 0);
8574 break;
8575 }
8576
8577 /* Pull the value out of the saved registers. */
8578
8579 addr = create_tmp_var (ptr_type_node, "addr");
8580
8581 if (container)
8582 {
8583 int needed_intregs, needed_sseregs;
8584 bool need_temp;
8585 tree int_addr, sse_addr;
8586
8587 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8588 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8589
8590 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8591
8592 need_temp = (!REG_P (container)
8593 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8594 || TYPE_ALIGN (type) > 128));
8595
8596 /* In case we are passing structure, verify that it is consecutive block
8597 on the register save area. If not we need to do moves. */
8598 if (!need_temp && !REG_P (container))
8599 {
8600 /* Verify that all registers are strictly consecutive */
8601 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8602 {
8603 int i;
8604
8605 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8606 {
8607 rtx slot = XVECEXP (container, 0, i);
8608 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8609 || INTVAL (XEXP (slot, 1)) != i * 16)
8610 need_temp = 1;
8611 }
8612 }
8613 else
8614 {
8615 int i;
8616
8617 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8618 {
8619 rtx slot = XVECEXP (container, 0, i);
8620 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8621 || INTVAL (XEXP (slot, 1)) != i * 8)
8622 need_temp = 1;
8623 }
8624 }
8625 }
8626 if (!need_temp)
8627 {
8628 int_addr = addr;
8629 sse_addr = addr;
8630 }
8631 else
8632 {
8633 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8634 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8635 }
8636
8637 /* First ensure that we fit completely in registers. */
8638 if (needed_intregs)
8639 {
8640 t = build_int_cst (TREE_TYPE (gpr),
8641 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8642 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8643 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8644 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8645 gimplify_and_add (t, pre_p);
8646 }
8647 if (needed_sseregs)
8648 {
8649 t = build_int_cst (TREE_TYPE (fpr),
8650 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8651 + X86_64_REGPARM_MAX * 8);
8652 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8653 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8654 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8655 gimplify_and_add (t, pre_p);
8656 }
8657
8658 /* Compute index to start of area used for integer regs. */
8659 if (needed_intregs)
8660 {
8661 /* int_addr = gpr + sav; */
8662 t = fold_build_pointer_plus (sav, gpr);
8663 gimplify_assign (int_addr, t, pre_p);
8664 }
8665 if (needed_sseregs)
8666 {
8667 /* sse_addr = fpr + sav; */
8668 t = fold_build_pointer_plus (sav, fpr);
8669 gimplify_assign (sse_addr, t, pre_p);
8670 }
8671 if (need_temp)
8672 {
8673 int i, prev_size = 0;
8674 tree temp = create_tmp_var (type, "va_arg_tmp");
8675
8676 /* addr = &temp; */
8677 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8678 gimplify_assign (addr, t, pre_p);
8679
8680 for (i = 0; i < XVECLEN (container, 0); i++)
8681 {
8682 rtx slot = XVECEXP (container, 0, i);
8683 rtx reg = XEXP (slot, 0);
8684 enum machine_mode mode = GET_MODE (reg);
8685 tree piece_type;
8686 tree addr_type;
8687 tree daddr_type;
8688 tree src_addr, src;
8689 int src_offset;
8690 tree dest_addr, dest;
8691 int cur_size = GET_MODE_SIZE (mode);
8692
8693 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8694 prev_size = INTVAL (XEXP (slot, 1));
8695 if (prev_size + cur_size > size)
8696 {
8697 cur_size = size - prev_size;
8698 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8699 if (mode == BLKmode)
8700 mode = QImode;
8701 }
8702 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8703 if (mode == GET_MODE (reg))
8704 addr_type = build_pointer_type (piece_type);
8705 else
8706 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8707 true);
8708 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8709 true);
8710
8711 if (SSE_REGNO_P (REGNO (reg)))
8712 {
8713 src_addr = sse_addr;
8714 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8715 }
8716 else
8717 {
8718 src_addr = int_addr;
8719 src_offset = REGNO (reg) * 8;
8720 }
8721 src_addr = fold_convert (addr_type, src_addr);
8722 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8723
8724 dest_addr = fold_convert (daddr_type, addr);
8725 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8726 if (cur_size == GET_MODE_SIZE (mode))
8727 {
8728 src = build_va_arg_indirect_ref (src_addr);
8729 dest = build_va_arg_indirect_ref (dest_addr);
8730
8731 gimplify_assign (dest, src, pre_p);
8732 }
8733 else
8734 {
8735 tree copy
8736 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8737 3, dest_addr, src_addr,
8738 size_int (cur_size));
8739 gimplify_and_add (copy, pre_p);
8740 }
8741 prev_size += cur_size;
8742 }
8743 }
8744
8745 if (needed_intregs)
8746 {
8747 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8748 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8749 gimplify_assign (gpr, t, pre_p);
8750 }
8751
8752 if (needed_sseregs)
8753 {
8754 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8755 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8756 gimplify_assign (fpr, t, pre_p);
8757 }
8758
8759 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8760
8761 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8762 }
8763
8764 /* ... otherwise out of the overflow area. */
8765
8766 /* When we align parameter on stack for caller, if the parameter
8767 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8768 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8769 here with caller. */
8770 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8771 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8772 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8773
8774 /* Care for on-stack alignment if needed. */
8775 if (arg_boundary <= 64 || size == 0)
8776 t = ovf;
8777 else
8778 {
8779 HOST_WIDE_INT align = arg_boundary / 8;
8780 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8781 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8782 build_int_cst (TREE_TYPE (t), -align));
8783 }
8784
8785 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8786 gimplify_assign (addr, t, pre_p);
8787
8788 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8789 gimplify_assign (unshare_expr (ovf), t, pre_p);
8790
8791 if (container)
8792 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8793
8794 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8795 addr = fold_convert (ptrtype, addr);
8796
8797 if (indirect_p)
8798 addr = build_va_arg_indirect_ref (addr);
8799 return build_va_arg_indirect_ref (addr);
8800 }
8801 \f
8802 /* Return true if OPNUM's MEM should be matched
8803 in movabs* patterns. */
8804
8805 bool
8806 ix86_check_movabs (rtx insn, int opnum)
8807 {
8808 rtx set, mem;
8809
8810 set = PATTERN (insn);
8811 if (GET_CODE (set) == PARALLEL)
8812 set = XVECEXP (set, 0, 0);
8813 gcc_assert (GET_CODE (set) == SET);
8814 mem = XEXP (set, opnum);
8815 while (GET_CODE (mem) == SUBREG)
8816 mem = SUBREG_REG (mem);
8817 gcc_assert (MEM_P (mem));
8818 return volatile_ok || !MEM_VOLATILE_P (mem);
8819 }
8820 \f
8821 /* Initialize the table of extra 80387 mathematical constants. */
8822
8823 static void
8824 init_ext_80387_constants (void)
8825 {
8826 static const char * cst[5] =
8827 {
8828 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8829 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8830 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8831 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8832 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8833 };
8834 int i;
8835
8836 for (i = 0; i < 5; i++)
8837 {
8838 real_from_string (&ext_80387_constants_table[i], cst[i]);
8839 /* Ensure each constant is rounded to XFmode precision. */
8840 real_convert (&ext_80387_constants_table[i],
8841 XFmode, &ext_80387_constants_table[i]);
8842 }
8843
8844 ext_80387_constants_init = 1;
8845 }
8846
8847 /* Return non-zero if the constant is something that
8848 can be loaded with a special instruction. */
8849
8850 int
8851 standard_80387_constant_p (rtx x)
8852 {
8853 enum machine_mode mode = GET_MODE (x);
8854
8855 REAL_VALUE_TYPE r;
8856
8857 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8858 return -1;
8859
8860 if (x == CONST0_RTX (mode))
8861 return 1;
8862 if (x == CONST1_RTX (mode))
8863 return 2;
8864
8865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8866
8867 /* For XFmode constants, try to find a special 80387 instruction when
8868 optimizing for size or on those CPUs that benefit from them. */
8869 if (mode == XFmode
8870 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8871 {
8872 int i;
8873
8874 if (! ext_80387_constants_init)
8875 init_ext_80387_constants ();
8876
8877 for (i = 0; i < 5; i++)
8878 if (real_identical (&r, &ext_80387_constants_table[i]))
8879 return i + 3;
8880 }
8881
8882 /* Load of the constant -0.0 or -1.0 will be split as
8883 fldz;fchs or fld1;fchs sequence. */
8884 if (real_isnegzero (&r))
8885 return 8;
8886 if (real_identical (&r, &dconstm1))
8887 return 9;
8888
8889 return 0;
8890 }
8891
8892 /* Return the opcode of the special instruction to be used to load
8893 the constant X. */
8894
8895 const char *
8896 standard_80387_constant_opcode (rtx x)
8897 {
8898 switch (standard_80387_constant_p (x))
8899 {
8900 case 1:
8901 return "fldz";
8902 case 2:
8903 return "fld1";
8904 case 3:
8905 return "fldlg2";
8906 case 4:
8907 return "fldln2";
8908 case 5:
8909 return "fldl2e";
8910 case 6:
8911 return "fldl2t";
8912 case 7:
8913 return "fldpi";
8914 case 8:
8915 case 9:
8916 return "#";
8917 default:
8918 gcc_unreachable ();
8919 }
8920 }
8921
8922 /* Return the CONST_DOUBLE representing the 80387 constant that is
8923 loaded by the specified special instruction. The argument IDX
8924 matches the return value from standard_80387_constant_p. */
8925
8926 rtx
8927 standard_80387_constant_rtx (int idx)
8928 {
8929 int i;
8930
8931 if (! ext_80387_constants_init)
8932 init_ext_80387_constants ();
8933
8934 switch (idx)
8935 {
8936 case 3:
8937 case 4:
8938 case 5:
8939 case 6:
8940 case 7:
8941 i = idx - 3;
8942 break;
8943
8944 default:
8945 gcc_unreachable ();
8946 }
8947
8948 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8949 XFmode);
8950 }
8951
8952 /* Return 1 if X is all 0s and 2 if x is all 1s
8953 in supported SSE/AVX vector mode. */
8954
8955 int
8956 standard_sse_constant_p (rtx x)
8957 {
8958 enum machine_mode mode = GET_MODE (x);
8959
8960 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8961 return 1;
8962 if (vector_all_ones_operand (x, mode))
8963 switch (mode)
8964 {
8965 case V16QImode:
8966 case V8HImode:
8967 case V4SImode:
8968 case V2DImode:
8969 if (TARGET_SSE2)
8970 return 2;
8971 case V32QImode:
8972 case V16HImode:
8973 case V8SImode:
8974 case V4DImode:
8975 if (TARGET_AVX2)
8976 return 2;
8977 case V64QImode:
8978 case V32HImode:
8979 case V16SImode:
8980 case V8DImode:
8981 if (TARGET_AVX512F)
8982 return 2;
8983 default:
8984 break;
8985 }
8986
8987 return 0;
8988 }
8989
8990 /* Return the opcode of the special instruction to be used to load
8991 the constant X. */
8992
8993 const char *
8994 standard_sse_constant_opcode (rtx insn, rtx x)
8995 {
8996 switch (standard_sse_constant_p (x))
8997 {
8998 case 1:
8999 switch (get_attr_mode (insn))
9000 {
9001 case MODE_XI:
9002 return "vpxord\t%g0, %g0, %g0";
9003 case MODE_V16SF:
9004 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9005 : "vpxord\t%g0, %g0, %g0";
9006 case MODE_V8DF:
9007 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9008 : "vpxorq\t%g0, %g0, %g0";
9009 case MODE_TI:
9010 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9011 : "%vpxor\t%0, %d0";
9012 case MODE_V2DF:
9013 return "%vxorpd\t%0, %d0";
9014 case MODE_V4SF:
9015 return "%vxorps\t%0, %d0";
9016
9017 case MODE_OI:
9018 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9019 : "vpxor\t%x0, %x0, %x0";
9020 case MODE_V4DF:
9021 return "vxorpd\t%x0, %x0, %x0";
9022 case MODE_V8SF:
9023 return "vxorps\t%x0, %x0, %x0";
9024
9025 default:
9026 break;
9027 }
9028
9029 case 2:
9030 if (TARGET_AVX512VL
9031 || get_attr_mode (insn) == MODE_XI
9032 || get_attr_mode (insn) == MODE_V8DF
9033 || get_attr_mode (insn) == MODE_V16SF)
9034 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9035 if (TARGET_AVX)
9036 return "vpcmpeqd\t%0, %0, %0";
9037 else
9038 return "pcmpeqd\t%0, %0";
9039
9040 default:
9041 break;
9042 }
9043 gcc_unreachable ();
9044 }
9045
9046 /* Returns true if OP contains a symbol reference */
9047
9048 bool
9049 symbolic_reference_mentioned_p (rtx op)
9050 {
9051 const char *fmt;
9052 int i;
9053
9054 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9055 return true;
9056
9057 fmt = GET_RTX_FORMAT (GET_CODE (op));
9058 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9059 {
9060 if (fmt[i] == 'E')
9061 {
9062 int j;
9063
9064 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9065 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9066 return true;
9067 }
9068
9069 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9070 return true;
9071 }
9072
9073 return false;
9074 }
9075
9076 /* Return true if it is appropriate to emit `ret' instructions in the
9077 body of a function. Do this only if the epilogue is simple, needing a
9078 couple of insns. Prior to reloading, we can't tell how many registers
9079 must be saved, so return false then. Return false if there is no frame
9080 marker to de-allocate. */
9081
9082 bool
9083 ix86_can_use_return_insn_p (void)
9084 {
9085 struct ix86_frame frame;
9086
9087 if (! reload_completed || frame_pointer_needed)
9088 return 0;
9089
9090 /* Don't allow more than 32k pop, since that's all we can do
9091 with one instruction. */
9092 if (crtl->args.pops_args && crtl->args.size >= 32768)
9093 return 0;
9094
9095 ix86_compute_frame_layout (&frame);
9096 return (frame.stack_pointer_offset == UNITS_PER_WORD
9097 && (frame.nregs + frame.nsseregs) == 0);
9098 }
9099 \f
9100 /* Value should be nonzero if functions must have frame pointers.
9101 Zero means the frame pointer need not be set up (and parms may
9102 be accessed via the stack pointer) in functions that seem suitable. */
9103
9104 static bool
9105 ix86_frame_pointer_required (void)
9106 {
9107 /* If we accessed previous frames, then the generated code expects
9108 to be able to access the saved ebp value in our frame. */
9109 if (cfun->machine->accesses_prev_frame)
9110 return true;
9111
9112 /* Several x86 os'es need a frame pointer for other reasons,
9113 usually pertaining to setjmp. */
9114 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9115 return true;
9116
9117 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9118 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9119 return true;
9120
9121 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9122 allocation is 4GB. */
9123 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9124 return true;
9125
9126 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9127 turns off the frame pointer by default. Turn it back on now if
9128 we've not got a leaf function. */
9129 if (TARGET_OMIT_LEAF_FRAME_POINTER
9130 && (!crtl->is_leaf
9131 || ix86_current_function_calls_tls_descriptor))
9132 return true;
9133
9134 if (crtl->profile && !flag_fentry)
9135 return true;
9136
9137 return false;
9138 }
9139
9140 /* Record that the current function accesses previous call frames. */
9141
9142 void
9143 ix86_setup_frame_addresses (void)
9144 {
9145 cfun->machine->accesses_prev_frame = 1;
9146 }
9147 \f
9148 #ifndef USE_HIDDEN_LINKONCE
9149 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9150 # define USE_HIDDEN_LINKONCE 1
9151 # else
9152 # define USE_HIDDEN_LINKONCE 0
9153 # endif
9154 #endif
9155
9156 static int pic_labels_used;
9157
9158 /* Fills in the label name that should be used for a pc thunk for
9159 the given register. */
9160
9161 static void
9162 get_pc_thunk_name (char name[32], unsigned int regno)
9163 {
9164 gcc_assert (!TARGET_64BIT);
9165
9166 if (USE_HIDDEN_LINKONCE)
9167 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9168 else
9169 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9170 }
9171
9172
9173 /* This function generates code for -fpic that loads %ebx with
9174 the return address of the caller and then returns. */
9175
9176 static void
9177 ix86_code_end (void)
9178 {
9179 rtx xops[2];
9180 int regno;
9181
9182 for (regno = AX_REG; regno <= SP_REG; regno++)
9183 {
9184 char name[32];
9185 tree decl;
9186
9187 if (!(pic_labels_used & (1 << regno)))
9188 continue;
9189
9190 get_pc_thunk_name (name, regno);
9191
9192 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9193 get_identifier (name),
9194 build_function_type_list (void_type_node, NULL_TREE));
9195 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9196 NULL_TREE, void_type_node);
9197 TREE_PUBLIC (decl) = 1;
9198 TREE_STATIC (decl) = 1;
9199 DECL_IGNORED_P (decl) = 1;
9200
9201 #if TARGET_MACHO
9202 if (TARGET_MACHO)
9203 {
9204 switch_to_section (darwin_sections[text_coal_section]);
9205 fputs ("\t.weak_definition\t", asm_out_file);
9206 assemble_name (asm_out_file, name);
9207 fputs ("\n\t.private_extern\t", asm_out_file);
9208 assemble_name (asm_out_file, name);
9209 putc ('\n', asm_out_file);
9210 ASM_OUTPUT_LABEL (asm_out_file, name);
9211 DECL_WEAK (decl) = 1;
9212 }
9213 else
9214 #endif
9215 if (USE_HIDDEN_LINKONCE)
9216 {
9217 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9218
9219 targetm.asm_out.unique_section (decl, 0);
9220 switch_to_section (get_named_section (decl, NULL, 0));
9221
9222 targetm.asm_out.globalize_label (asm_out_file, name);
9223 fputs ("\t.hidden\t", asm_out_file);
9224 assemble_name (asm_out_file, name);
9225 putc ('\n', asm_out_file);
9226 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9227 }
9228 else
9229 {
9230 switch_to_section (text_section);
9231 ASM_OUTPUT_LABEL (asm_out_file, name);
9232 }
9233
9234 DECL_INITIAL (decl) = make_node (BLOCK);
9235 current_function_decl = decl;
9236 init_function_start (decl);
9237 first_function_block_is_cold = false;
9238 /* Make sure unwind info is emitted for the thunk if needed. */
9239 final_start_function (emit_barrier (), asm_out_file, 1);
9240
9241 /* Pad stack IP move with 4 instructions (two NOPs count
9242 as one instruction). */
9243 if (TARGET_PAD_SHORT_FUNCTION)
9244 {
9245 int i = 8;
9246
9247 while (i--)
9248 fputs ("\tnop\n", asm_out_file);
9249 }
9250
9251 xops[0] = gen_rtx_REG (Pmode, regno);
9252 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9253 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9254 fputs ("\tret\n", asm_out_file);
9255 final_end_function ();
9256 init_insn_lengths ();
9257 free_after_compilation (cfun);
9258 set_cfun (NULL);
9259 current_function_decl = NULL;
9260 }
9261
9262 if (flag_split_stack)
9263 file_end_indicate_split_stack ();
9264 }
9265
9266 /* Emit code for the SET_GOT patterns. */
9267
9268 const char *
9269 output_set_got (rtx dest, rtx label)
9270 {
9271 rtx xops[3];
9272
9273 xops[0] = dest;
9274
9275 if (TARGET_VXWORKS_RTP && flag_pic)
9276 {
9277 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9278 xops[2] = gen_rtx_MEM (Pmode,
9279 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9280 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9281
9282 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9283 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9284 an unadorned address. */
9285 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9286 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9287 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9288 return "";
9289 }
9290
9291 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9292
9293 if (!flag_pic)
9294 {
9295 if (TARGET_MACHO)
9296 /* We don't need a pic base, we're not producing pic. */
9297 gcc_unreachable ();
9298
9299 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9300 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9301 targetm.asm_out.internal_label (asm_out_file, "L",
9302 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9303 }
9304 else
9305 {
9306 char name[32];
9307 get_pc_thunk_name (name, REGNO (dest));
9308 pic_labels_used |= 1 << REGNO (dest);
9309
9310 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9311 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9312 output_asm_insn ("call\t%X2", xops);
9313
9314 #if TARGET_MACHO
9315 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9316 This is what will be referenced by the Mach-O PIC subsystem. */
9317 if (machopic_should_output_picbase_label () || !label)
9318 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9319
9320 /* When we are restoring the pic base at the site of a nonlocal label,
9321 and we decided to emit the pic base above, we will still output a
9322 local label used for calculating the correction offset (even though
9323 the offset will be 0 in that case). */
9324 if (label)
9325 targetm.asm_out.internal_label (asm_out_file, "L",
9326 CODE_LABEL_NUMBER (label));
9327 #endif
9328 }
9329
9330 if (!TARGET_MACHO)
9331 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9332
9333 return "";
9334 }
9335
9336 /* Generate an "push" pattern for input ARG. */
9337
9338 static rtx
9339 gen_push (rtx arg)
9340 {
9341 struct machine_function *m = cfun->machine;
9342
9343 if (m->fs.cfa_reg == stack_pointer_rtx)
9344 m->fs.cfa_offset += UNITS_PER_WORD;
9345 m->fs.sp_offset += UNITS_PER_WORD;
9346
9347 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9348 arg = gen_rtx_REG (word_mode, REGNO (arg));
9349
9350 return gen_rtx_SET (VOIDmode,
9351 gen_rtx_MEM (word_mode,
9352 gen_rtx_PRE_DEC (Pmode,
9353 stack_pointer_rtx)),
9354 arg);
9355 }
9356
9357 /* Generate an "pop" pattern for input ARG. */
9358
9359 static rtx
9360 gen_pop (rtx arg)
9361 {
9362 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9363 arg = gen_rtx_REG (word_mode, REGNO (arg));
9364
9365 return gen_rtx_SET (VOIDmode,
9366 arg,
9367 gen_rtx_MEM (word_mode,
9368 gen_rtx_POST_INC (Pmode,
9369 stack_pointer_rtx)));
9370 }
9371
9372 /* Return >= 0 if there is an unused call-clobbered register available
9373 for the entire function. */
9374
9375 static unsigned int
9376 ix86_select_alt_pic_regnum (void)
9377 {
9378 if (crtl->is_leaf
9379 && !crtl->profile
9380 && !ix86_current_function_calls_tls_descriptor)
9381 {
9382 int i, drap;
9383 /* Can't use the same register for both PIC and DRAP. */
9384 if (crtl->drap_reg)
9385 drap = REGNO (crtl->drap_reg);
9386 else
9387 drap = -1;
9388 for (i = 2; i >= 0; --i)
9389 if (i != drap && !df_regs_ever_live_p (i))
9390 return i;
9391 }
9392
9393 return INVALID_REGNUM;
9394 }
9395
9396 /* Return TRUE if we need to save REGNO. */
9397
9398 static bool
9399 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9400 {
9401 if (pic_offset_table_rtx
9402 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9403 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9404 || crtl->profile
9405 || crtl->calls_eh_return
9406 || crtl->uses_const_pool
9407 || cfun->has_nonlocal_label))
9408 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9409
9410 if (crtl->calls_eh_return && maybe_eh_return)
9411 {
9412 unsigned i;
9413 for (i = 0; ; i++)
9414 {
9415 unsigned test = EH_RETURN_DATA_REGNO (i);
9416 if (test == INVALID_REGNUM)
9417 break;
9418 if (test == regno)
9419 return true;
9420 }
9421 }
9422
9423 if (crtl->drap_reg
9424 && regno == REGNO (crtl->drap_reg)
9425 && !cfun->machine->no_drap_save_restore)
9426 return true;
9427
9428 return (df_regs_ever_live_p (regno)
9429 && !call_used_regs[regno]
9430 && !fixed_regs[regno]
9431 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9432 }
9433
9434 /* Return number of saved general prupose registers. */
9435
9436 static int
9437 ix86_nsaved_regs (void)
9438 {
9439 int nregs = 0;
9440 int regno;
9441
9442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9443 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9444 nregs ++;
9445 return nregs;
9446 }
9447
9448 /* Return number of saved SSE registrers. */
9449
9450 static int
9451 ix86_nsaved_sseregs (void)
9452 {
9453 int nregs = 0;
9454 int regno;
9455
9456 if (!TARGET_64BIT_MS_ABI)
9457 return 0;
9458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9459 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9460 nregs ++;
9461 return nregs;
9462 }
9463
9464 /* Given FROM and TO register numbers, say whether this elimination is
9465 allowed. If stack alignment is needed, we can only replace argument
9466 pointer with hard frame pointer, or replace frame pointer with stack
9467 pointer. Otherwise, frame pointer elimination is automatically
9468 handled and all other eliminations are valid. */
9469
9470 static bool
9471 ix86_can_eliminate (const int from, const int to)
9472 {
9473 if (stack_realign_fp)
9474 return ((from == ARG_POINTER_REGNUM
9475 && to == HARD_FRAME_POINTER_REGNUM)
9476 || (from == FRAME_POINTER_REGNUM
9477 && to == STACK_POINTER_REGNUM));
9478 else
9479 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9480 }
9481
9482 /* Return the offset between two registers, one to be eliminated, and the other
9483 its replacement, at the start of a routine. */
9484
9485 HOST_WIDE_INT
9486 ix86_initial_elimination_offset (int from, int to)
9487 {
9488 struct ix86_frame frame;
9489 ix86_compute_frame_layout (&frame);
9490
9491 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9492 return frame.hard_frame_pointer_offset;
9493 else if (from == FRAME_POINTER_REGNUM
9494 && to == HARD_FRAME_POINTER_REGNUM)
9495 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9496 else
9497 {
9498 gcc_assert (to == STACK_POINTER_REGNUM);
9499
9500 if (from == ARG_POINTER_REGNUM)
9501 return frame.stack_pointer_offset;
9502
9503 gcc_assert (from == FRAME_POINTER_REGNUM);
9504 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9505 }
9506 }
9507
9508 /* In a dynamically-aligned function, we can't know the offset from
9509 stack pointer to frame pointer, so we must ensure that setjmp
9510 eliminates fp against the hard fp (%ebp) rather than trying to
9511 index from %esp up to the top of the frame across a gap that is
9512 of unknown (at compile-time) size. */
9513 static rtx
9514 ix86_builtin_setjmp_frame_value (void)
9515 {
9516 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9517 }
9518
9519 /* When using -fsplit-stack, the allocation routines set a field in
9520 the TCB to the bottom of the stack plus this much space, measured
9521 in bytes. */
9522
9523 #define SPLIT_STACK_AVAILABLE 256
9524
9525 /* Fill structure ix86_frame about frame of currently computed function. */
9526
9527 static void
9528 ix86_compute_frame_layout (struct ix86_frame *frame)
9529 {
9530 unsigned HOST_WIDE_INT stack_alignment_needed;
9531 HOST_WIDE_INT offset;
9532 unsigned HOST_WIDE_INT preferred_alignment;
9533 HOST_WIDE_INT size = get_frame_size ();
9534 HOST_WIDE_INT to_allocate;
9535
9536 frame->nregs = ix86_nsaved_regs ();
9537 frame->nsseregs = ix86_nsaved_sseregs ();
9538
9539 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9540 function prologues and leaf. */
9541 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9542 && (!crtl->is_leaf || cfun->calls_alloca != 0
9543 || ix86_current_function_calls_tls_descriptor))
9544 {
9545 crtl->preferred_stack_boundary = 128;
9546 crtl->stack_alignment_needed = 128;
9547 }
9548 /* preferred_stack_boundary is never updated for call
9549 expanded from tls descriptor. Update it here. We don't update it in
9550 expand stage because according to the comments before
9551 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9552 away. */
9553 else if (ix86_current_function_calls_tls_descriptor
9554 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9555 {
9556 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9557 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9558 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9559 }
9560
9561 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9562 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9563
9564 gcc_assert (!size || stack_alignment_needed);
9565 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9566 gcc_assert (preferred_alignment <= stack_alignment_needed);
9567
9568 /* For SEH we have to limit the amount of code movement into the prologue.
9569 At present we do this via a BLOCKAGE, at which point there's very little
9570 scheduling that can be done, which means that there's very little point
9571 in doing anything except PUSHs. */
9572 if (TARGET_SEH)
9573 cfun->machine->use_fast_prologue_epilogue = false;
9574
9575 /* During reload iteration the amount of registers saved can change.
9576 Recompute the value as needed. Do not recompute when amount of registers
9577 didn't change as reload does multiple calls to the function and does not
9578 expect the decision to change within single iteration. */
9579 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9580 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9581 {
9582 int count = frame->nregs;
9583 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9584
9585 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9586
9587 /* The fast prologue uses move instead of push to save registers. This
9588 is significantly longer, but also executes faster as modern hardware
9589 can execute the moves in parallel, but can't do that for push/pop.
9590
9591 Be careful about choosing what prologue to emit: When function takes
9592 many instructions to execute we may use slow version as well as in
9593 case function is known to be outside hot spot (this is known with
9594 feedback only). Weight the size of function by number of registers
9595 to save as it is cheap to use one or two push instructions but very
9596 slow to use many of them. */
9597 if (count)
9598 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9599 if (node->frequency < NODE_FREQUENCY_NORMAL
9600 || (flag_branch_probabilities
9601 && node->frequency < NODE_FREQUENCY_HOT))
9602 cfun->machine->use_fast_prologue_epilogue = false;
9603 else
9604 cfun->machine->use_fast_prologue_epilogue
9605 = !expensive_function_p (count);
9606 }
9607
9608 frame->save_regs_using_mov
9609 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9610 /* If static stack checking is enabled and done with probes,
9611 the registers need to be saved before allocating the frame. */
9612 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9613
9614 /* Skip return address. */
9615 offset = UNITS_PER_WORD;
9616
9617 /* Skip pushed static chain. */
9618 if (ix86_static_chain_on_stack)
9619 offset += UNITS_PER_WORD;
9620
9621 /* Skip saved base pointer. */
9622 if (frame_pointer_needed)
9623 offset += UNITS_PER_WORD;
9624 frame->hfp_save_offset = offset;
9625
9626 /* The traditional frame pointer location is at the top of the frame. */
9627 frame->hard_frame_pointer_offset = offset;
9628
9629 /* Register save area */
9630 offset += frame->nregs * UNITS_PER_WORD;
9631 frame->reg_save_offset = offset;
9632
9633 /* On SEH target, registers are pushed just before the frame pointer
9634 location. */
9635 if (TARGET_SEH)
9636 frame->hard_frame_pointer_offset = offset;
9637
9638 /* Align and set SSE register save area. */
9639 if (frame->nsseregs)
9640 {
9641 /* The only ABI that has saved SSE registers (Win64) also has a
9642 16-byte aligned default stack, and thus we don't need to be
9643 within the re-aligned local stack frame to save them. */
9644 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9645 offset = (offset + 16 - 1) & -16;
9646 offset += frame->nsseregs * 16;
9647 }
9648 frame->sse_reg_save_offset = offset;
9649
9650 /* The re-aligned stack starts here. Values before this point are not
9651 directly comparable with values below this point. In order to make
9652 sure that no value happens to be the same before and after, force
9653 the alignment computation below to add a non-zero value. */
9654 if (stack_realign_fp)
9655 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9656
9657 /* Va-arg area */
9658 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9659 offset += frame->va_arg_size;
9660
9661 /* Align start of frame for local function. */
9662 if (stack_realign_fp
9663 || offset != frame->sse_reg_save_offset
9664 || size != 0
9665 || !crtl->is_leaf
9666 || cfun->calls_alloca
9667 || ix86_current_function_calls_tls_descriptor)
9668 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9669
9670 /* Frame pointer points here. */
9671 frame->frame_pointer_offset = offset;
9672
9673 offset += size;
9674
9675 /* Add outgoing arguments area. Can be skipped if we eliminated
9676 all the function calls as dead code.
9677 Skipping is however impossible when function calls alloca. Alloca
9678 expander assumes that last crtl->outgoing_args_size
9679 of stack frame are unused. */
9680 if (ACCUMULATE_OUTGOING_ARGS
9681 && (!crtl->is_leaf || cfun->calls_alloca
9682 || ix86_current_function_calls_tls_descriptor))
9683 {
9684 offset += crtl->outgoing_args_size;
9685 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9686 }
9687 else
9688 frame->outgoing_arguments_size = 0;
9689
9690 /* Align stack boundary. Only needed if we're calling another function
9691 or using alloca. */
9692 if (!crtl->is_leaf || cfun->calls_alloca
9693 || ix86_current_function_calls_tls_descriptor)
9694 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9695
9696 /* We've reached end of stack frame. */
9697 frame->stack_pointer_offset = offset;
9698
9699 /* Size prologue needs to allocate. */
9700 to_allocate = offset - frame->sse_reg_save_offset;
9701
9702 if ((!to_allocate && frame->nregs <= 1)
9703 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9704 frame->save_regs_using_mov = false;
9705
9706 if (ix86_using_red_zone ()
9707 && crtl->sp_is_unchanging
9708 && crtl->is_leaf
9709 && !ix86_current_function_calls_tls_descriptor)
9710 {
9711 frame->red_zone_size = to_allocate;
9712 if (frame->save_regs_using_mov)
9713 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9714 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9715 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9716 }
9717 else
9718 frame->red_zone_size = 0;
9719 frame->stack_pointer_offset -= frame->red_zone_size;
9720
9721 /* The SEH frame pointer location is near the bottom of the frame.
9722 This is enforced by the fact that the difference between the
9723 stack pointer and the frame pointer is limited to 240 bytes in
9724 the unwind data structure. */
9725 if (TARGET_SEH)
9726 {
9727 HOST_WIDE_INT diff;
9728
9729 /* If we can leave the frame pointer where it is, do so. Also, returns
9730 the establisher frame for __builtin_frame_address (0). */
9731 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9732 if (diff <= SEH_MAX_FRAME_SIZE
9733 && (diff > 240 || (diff & 15) != 0)
9734 && !crtl->accesses_prior_frames)
9735 {
9736 /* Ideally we'd determine what portion of the local stack frame
9737 (within the constraint of the lowest 240) is most heavily used.
9738 But without that complication, simply bias the frame pointer
9739 by 128 bytes so as to maximize the amount of the local stack
9740 frame that is addressable with 8-bit offsets. */
9741 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9742 }
9743 }
9744 }
9745
9746 /* This is semi-inlined memory_address_length, but simplified
9747 since we know that we're always dealing with reg+offset, and
9748 to avoid having to create and discard all that rtl. */
9749
9750 static inline int
9751 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9752 {
9753 int len = 4;
9754
9755 if (offset == 0)
9756 {
9757 /* EBP and R13 cannot be encoded without an offset. */
9758 len = (regno == BP_REG || regno == R13_REG);
9759 }
9760 else if (IN_RANGE (offset, -128, 127))
9761 len = 1;
9762
9763 /* ESP and R12 must be encoded with a SIB byte. */
9764 if (regno == SP_REG || regno == R12_REG)
9765 len++;
9766
9767 return len;
9768 }
9769
9770 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9771 The valid base registers are taken from CFUN->MACHINE->FS. */
9772
9773 static rtx
9774 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9775 {
9776 const struct machine_function *m = cfun->machine;
9777 rtx base_reg = NULL;
9778 HOST_WIDE_INT base_offset = 0;
9779
9780 if (m->use_fast_prologue_epilogue)
9781 {
9782 /* Choose the base register most likely to allow the most scheduling
9783 opportunities. Generally FP is valid throughout the function,
9784 while DRAP must be reloaded within the epilogue. But choose either
9785 over the SP due to increased encoding size. */
9786
9787 if (m->fs.fp_valid)
9788 {
9789 base_reg = hard_frame_pointer_rtx;
9790 base_offset = m->fs.fp_offset - cfa_offset;
9791 }
9792 else if (m->fs.drap_valid)
9793 {
9794 base_reg = crtl->drap_reg;
9795 base_offset = 0 - cfa_offset;
9796 }
9797 else if (m->fs.sp_valid)
9798 {
9799 base_reg = stack_pointer_rtx;
9800 base_offset = m->fs.sp_offset - cfa_offset;
9801 }
9802 }
9803 else
9804 {
9805 HOST_WIDE_INT toffset;
9806 int len = 16, tlen;
9807
9808 /* Choose the base register with the smallest address encoding.
9809 With a tie, choose FP > DRAP > SP. */
9810 if (m->fs.sp_valid)
9811 {
9812 base_reg = stack_pointer_rtx;
9813 base_offset = m->fs.sp_offset - cfa_offset;
9814 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9815 }
9816 if (m->fs.drap_valid)
9817 {
9818 toffset = 0 - cfa_offset;
9819 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9820 if (tlen <= len)
9821 {
9822 base_reg = crtl->drap_reg;
9823 base_offset = toffset;
9824 len = tlen;
9825 }
9826 }
9827 if (m->fs.fp_valid)
9828 {
9829 toffset = m->fs.fp_offset - cfa_offset;
9830 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9831 if (tlen <= len)
9832 {
9833 base_reg = hard_frame_pointer_rtx;
9834 base_offset = toffset;
9835 len = tlen;
9836 }
9837 }
9838 }
9839 gcc_assert (base_reg != NULL);
9840
9841 return plus_constant (Pmode, base_reg, base_offset);
9842 }
9843
9844 /* Emit code to save registers in the prologue. */
9845
9846 static void
9847 ix86_emit_save_regs (void)
9848 {
9849 unsigned int regno;
9850 rtx insn;
9851
9852 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9853 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9854 {
9855 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9856 RTX_FRAME_RELATED_P (insn) = 1;
9857 }
9858 }
9859
9860 /* Emit a single register save at CFA - CFA_OFFSET. */
9861
9862 static void
9863 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9864 HOST_WIDE_INT cfa_offset)
9865 {
9866 struct machine_function *m = cfun->machine;
9867 rtx reg = gen_rtx_REG (mode, regno);
9868 rtx mem, addr, base, insn;
9869
9870 addr = choose_baseaddr (cfa_offset);
9871 mem = gen_frame_mem (mode, addr);
9872
9873 /* For SSE saves, we need to indicate the 128-bit alignment. */
9874 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9875
9876 insn = emit_move_insn (mem, reg);
9877 RTX_FRAME_RELATED_P (insn) = 1;
9878
9879 base = addr;
9880 if (GET_CODE (base) == PLUS)
9881 base = XEXP (base, 0);
9882 gcc_checking_assert (REG_P (base));
9883
9884 /* When saving registers into a re-aligned local stack frame, avoid
9885 any tricky guessing by dwarf2out. */
9886 if (m->fs.realigned)
9887 {
9888 gcc_checking_assert (stack_realign_drap);
9889
9890 if (regno == REGNO (crtl->drap_reg))
9891 {
9892 /* A bit of a hack. We force the DRAP register to be saved in
9893 the re-aligned stack frame, which provides us with a copy
9894 of the CFA that will last past the prologue. Install it. */
9895 gcc_checking_assert (cfun->machine->fs.fp_valid);
9896 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9897 cfun->machine->fs.fp_offset - cfa_offset);
9898 mem = gen_rtx_MEM (mode, addr);
9899 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9900 }
9901 else
9902 {
9903 /* The frame pointer is a stable reference within the
9904 aligned frame. Use it. */
9905 gcc_checking_assert (cfun->machine->fs.fp_valid);
9906 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9907 cfun->machine->fs.fp_offset - cfa_offset);
9908 mem = gen_rtx_MEM (mode, addr);
9909 add_reg_note (insn, REG_CFA_EXPRESSION,
9910 gen_rtx_SET (VOIDmode, mem, reg));
9911 }
9912 }
9913
9914 /* The memory may not be relative to the current CFA register,
9915 which means that we may need to generate a new pattern for
9916 use by the unwind info. */
9917 else if (base != m->fs.cfa_reg)
9918 {
9919 addr = plus_constant (Pmode, m->fs.cfa_reg,
9920 m->fs.cfa_offset - cfa_offset);
9921 mem = gen_rtx_MEM (mode, addr);
9922 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9923 }
9924 }
9925
9926 /* Emit code to save registers using MOV insns.
9927 First register is stored at CFA - CFA_OFFSET. */
9928 static void
9929 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9930 {
9931 unsigned int regno;
9932
9933 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9934 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9935 {
9936 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9937 cfa_offset -= UNITS_PER_WORD;
9938 }
9939 }
9940
9941 /* Emit code to save SSE registers using MOV insns.
9942 First register is stored at CFA - CFA_OFFSET. */
9943 static void
9944 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9945 {
9946 unsigned int regno;
9947
9948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9949 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9950 {
9951 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9952 cfa_offset -= 16;
9953 }
9954 }
9955
9956 static GTY(()) rtx queued_cfa_restores;
9957
9958 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9959 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9960 Don't add the note if the previously saved value will be left untouched
9961 within stack red-zone till return, as unwinders can find the same value
9962 in the register and on the stack. */
9963
9964 static void
9965 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9966 {
9967 if (!crtl->shrink_wrapped
9968 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9969 return;
9970
9971 if (insn)
9972 {
9973 add_reg_note (insn, REG_CFA_RESTORE, reg);
9974 RTX_FRAME_RELATED_P (insn) = 1;
9975 }
9976 else
9977 queued_cfa_restores
9978 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9979 }
9980
9981 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9982
9983 static void
9984 ix86_add_queued_cfa_restore_notes (rtx insn)
9985 {
9986 rtx last;
9987 if (!queued_cfa_restores)
9988 return;
9989 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9990 ;
9991 XEXP (last, 1) = REG_NOTES (insn);
9992 REG_NOTES (insn) = queued_cfa_restores;
9993 queued_cfa_restores = NULL_RTX;
9994 RTX_FRAME_RELATED_P (insn) = 1;
9995 }
9996
9997 /* Expand prologue or epilogue stack adjustment.
9998 The pattern exist to put a dependency on all ebp-based memory accesses.
9999 STYLE should be negative if instructions should be marked as frame related,
10000 zero if %r11 register is live and cannot be freely used and positive
10001 otherwise. */
10002
10003 static void
10004 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10005 int style, bool set_cfa)
10006 {
10007 struct machine_function *m = cfun->machine;
10008 rtx insn;
10009 bool add_frame_related_expr = false;
10010
10011 if (Pmode == SImode)
10012 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10013 else if (x86_64_immediate_operand (offset, DImode))
10014 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10015 else
10016 {
10017 rtx tmp;
10018 /* r11 is used by indirect sibcall return as well, set before the
10019 epilogue and used after the epilogue. */
10020 if (style)
10021 tmp = gen_rtx_REG (DImode, R11_REG);
10022 else
10023 {
10024 gcc_assert (src != hard_frame_pointer_rtx
10025 && dest != hard_frame_pointer_rtx);
10026 tmp = hard_frame_pointer_rtx;
10027 }
10028 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10029 if (style < 0)
10030 add_frame_related_expr = true;
10031
10032 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10033 }
10034
10035 insn = emit_insn (insn);
10036 if (style >= 0)
10037 ix86_add_queued_cfa_restore_notes (insn);
10038
10039 if (set_cfa)
10040 {
10041 rtx r;
10042
10043 gcc_assert (m->fs.cfa_reg == src);
10044 m->fs.cfa_offset += INTVAL (offset);
10045 m->fs.cfa_reg = dest;
10046
10047 r = gen_rtx_PLUS (Pmode, src, offset);
10048 r = gen_rtx_SET (VOIDmode, dest, r);
10049 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10050 RTX_FRAME_RELATED_P (insn) = 1;
10051 }
10052 else if (style < 0)
10053 {
10054 RTX_FRAME_RELATED_P (insn) = 1;
10055 if (add_frame_related_expr)
10056 {
10057 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10058 r = gen_rtx_SET (VOIDmode, dest, r);
10059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10060 }
10061 }
10062
10063 if (dest == stack_pointer_rtx)
10064 {
10065 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10066 bool valid = m->fs.sp_valid;
10067
10068 if (src == hard_frame_pointer_rtx)
10069 {
10070 valid = m->fs.fp_valid;
10071 ooffset = m->fs.fp_offset;
10072 }
10073 else if (src == crtl->drap_reg)
10074 {
10075 valid = m->fs.drap_valid;
10076 ooffset = 0;
10077 }
10078 else
10079 {
10080 /* Else there are two possibilities: SP itself, which we set
10081 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10082 taken care of this by hand along the eh_return path. */
10083 gcc_checking_assert (src == stack_pointer_rtx
10084 || offset == const0_rtx);
10085 }
10086
10087 m->fs.sp_offset = ooffset - INTVAL (offset);
10088 m->fs.sp_valid = valid;
10089 }
10090 }
10091
10092 /* Find an available register to be used as dynamic realign argument
10093 pointer regsiter. Such a register will be written in prologue and
10094 used in begin of body, so it must not be
10095 1. parameter passing register.
10096 2. GOT pointer.
10097 We reuse static-chain register if it is available. Otherwise, we
10098 use DI for i386 and R13 for x86-64. We chose R13 since it has
10099 shorter encoding.
10100
10101 Return: the regno of chosen register. */
10102
10103 static unsigned int
10104 find_drap_reg (void)
10105 {
10106 tree decl = cfun->decl;
10107
10108 if (TARGET_64BIT)
10109 {
10110 /* Use R13 for nested function or function need static chain.
10111 Since function with tail call may use any caller-saved
10112 registers in epilogue, DRAP must not use caller-saved
10113 register in such case. */
10114 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10115 return R13_REG;
10116
10117 return R10_REG;
10118 }
10119 else
10120 {
10121 /* Use DI for nested function or function need static chain.
10122 Since function with tail call may use any caller-saved
10123 registers in epilogue, DRAP must not use caller-saved
10124 register in such case. */
10125 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10126 return DI_REG;
10127
10128 /* Reuse static chain register if it isn't used for parameter
10129 passing. */
10130 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10131 {
10132 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10133 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10134 return CX_REG;
10135 }
10136 return DI_REG;
10137 }
10138 }
10139
10140 /* Return minimum incoming stack alignment. */
10141
10142 static unsigned int
10143 ix86_minimum_incoming_stack_boundary (bool sibcall)
10144 {
10145 unsigned int incoming_stack_boundary;
10146
10147 /* Prefer the one specified at command line. */
10148 if (ix86_user_incoming_stack_boundary)
10149 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10150 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10151 if -mstackrealign is used, it isn't used for sibcall check and
10152 estimated stack alignment is 128bit. */
10153 else if (!sibcall
10154 && !TARGET_64BIT
10155 && ix86_force_align_arg_pointer
10156 && crtl->stack_alignment_estimated == 128)
10157 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10158 else
10159 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10160
10161 /* Incoming stack alignment can be changed on individual functions
10162 via force_align_arg_pointer attribute. We use the smallest
10163 incoming stack boundary. */
10164 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10165 && lookup_attribute (ix86_force_align_arg_pointer_string,
10166 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10167 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10168
10169 /* The incoming stack frame has to be aligned at least at
10170 parm_stack_boundary. */
10171 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10172 incoming_stack_boundary = crtl->parm_stack_boundary;
10173
10174 /* Stack at entrance of main is aligned by runtime. We use the
10175 smallest incoming stack boundary. */
10176 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10177 && DECL_NAME (current_function_decl)
10178 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10179 && DECL_FILE_SCOPE_P (current_function_decl))
10180 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10181
10182 return incoming_stack_boundary;
10183 }
10184
10185 /* Update incoming stack boundary and estimated stack alignment. */
10186
10187 static void
10188 ix86_update_stack_boundary (void)
10189 {
10190 ix86_incoming_stack_boundary
10191 = ix86_minimum_incoming_stack_boundary (false);
10192
10193 /* x86_64 vararg needs 16byte stack alignment for register save
10194 area. */
10195 if (TARGET_64BIT
10196 && cfun->stdarg
10197 && crtl->stack_alignment_estimated < 128)
10198 crtl->stack_alignment_estimated = 128;
10199 }
10200
10201 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10202 needed or an rtx for DRAP otherwise. */
10203
10204 static rtx
10205 ix86_get_drap_rtx (void)
10206 {
10207 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10208 crtl->need_drap = true;
10209
10210 if (stack_realign_drap)
10211 {
10212 /* Assign DRAP to vDRAP and returns vDRAP */
10213 unsigned int regno = find_drap_reg ();
10214 rtx drap_vreg;
10215 rtx arg_ptr;
10216 rtx_insn *seq, *insn;
10217
10218 arg_ptr = gen_rtx_REG (Pmode, regno);
10219 crtl->drap_reg = arg_ptr;
10220
10221 start_sequence ();
10222 drap_vreg = copy_to_reg (arg_ptr);
10223 seq = get_insns ();
10224 end_sequence ();
10225
10226 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10227 if (!optimize)
10228 {
10229 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10230 RTX_FRAME_RELATED_P (insn) = 1;
10231 }
10232 return drap_vreg;
10233 }
10234 else
10235 return NULL;
10236 }
10237
10238 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10239
10240 static rtx
10241 ix86_internal_arg_pointer (void)
10242 {
10243 return virtual_incoming_args_rtx;
10244 }
10245
10246 struct scratch_reg {
10247 rtx reg;
10248 bool saved;
10249 };
10250
10251 /* Return a short-lived scratch register for use on function entry.
10252 In 32-bit mode, it is valid only after the registers are saved
10253 in the prologue. This register must be released by means of
10254 release_scratch_register_on_entry once it is dead. */
10255
10256 static void
10257 get_scratch_register_on_entry (struct scratch_reg *sr)
10258 {
10259 int regno;
10260
10261 sr->saved = false;
10262
10263 if (TARGET_64BIT)
10264 {
10265 /* We always use R11 in 64-bit mode. */
10266 regno = R11_REG;
10267 }
10268 else
10269 {
10270 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10271 bool fastcall_p
10272 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10273 bool thiscall_p
10274 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10275 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10276 int regparm = ix86_function_regparm (fntype, decl);
10277 int drap_regno
10278 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10279
10280 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10281 for the static chain register. */
10282 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10283 && drap_regno != AX_REG)
10284 regno = AX_REG;
10285 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10286 for the static chain register. */
10287 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10288 regno = AX_REG;
10289 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10290 regno = DX_REG;
10291 /* ecx is the static chain register. */
10292 else if (regparm < 3 && !fastcall_p && !thiscall_p
10293 && !static_chain_p
10294 && drap_regno != CX_REG)
10295 regno = CX_REG;
10296 else if (ix86_save_reg (BX_REG, true))
10297 regno = BX_REG;
10298 /* esi is the static chain register. */
10299 else if (!(regparm == 3 && static_chain_p)
10300 && ix86_save_reg (SI_REG, true))
10301 regno = SI_REG;
10302 else if (ix86_save_reg (DI_REG, true))
10303 regno = DI_REG;
10304 else
10305 {
10306 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10307 sr->saved = true;
10308 }
10309 }
10310
10311 sr->reg = gen_rtx_REG (Pmode, regno);
10312 if (sr->saved)
10313 {
10314 rtx insn = emit_insn (gen_push (sr->reg));
10315 RTX_FRAME_RELATED_P (insn) = 1;
10316 }
10317 }
10318
10319 /* Release a scratch register obtained from the preceding function. */
10320
10321 static void
10322 release_scratch_register_on_entry (struct scratch_reg *sr)
10323 {
10324 if (sr->saved)
10325 {
10326 struct machine_function *m = cfun->machine;
10327 rtx x, insn = emit_insn (gen_pop (sr->reg));
10328
10329 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10330 RTX_FRAME_RELATED_P (insn) = 1;
10331 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10332 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10333 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10334 m->fs.sp_offset -= UNITS_PER_WORD;
10335 }
10336 }
10337
10338 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10339
10340 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10341
10342 static void
10343 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10344 {
10345 /* We skip the probe for the first interval + a small dope of 4 words and
10346 probe that many bytes past the specified size to maintain a protection
10347 area at the botton of the stack. */
10348 const int dope = 4 * UNITS_PER_WORD;
10349 rtx size_rtx = GEN_INT (size), last;
10350
10351 /* See if we have a constant small number of probes to generate. If so,
10352 that's the easy case. The run-time loop is made up of 11 insns in the
10353 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10354 for n # of intervals. */
10355 if (size <= 5 * PROBE_INTERVAL)
10356 {
10357 HOST_WIDE_INT i, adjust;
10358 bool first_probe = true;
10359
10360 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10361 values of N from 1 until it exceeds SIZE. If only one probe is
10362 needed, this will not generate any code. Then adjust and probe
10363 to PROBE_INTERVAL + SIZE. */
10364 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10365 {
10366 if (first_probe)
10367 {
10368 adjust = 2 * PROBE_INTERVAL + dope;
10369 first_probe = false;
10370 }
10371 else
10372 adjust = PROBE_INTERVAL;
10373
10374 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10375 plus_constant (Pmode, stack_pointer_rtx,
10376 -adjust)));
10377 emit_stack_probe (stack_pointer_rtx);
10378 }
10379
10380 if (first_probe)
10381 adjust = size + PROBE_INTERVAL + dope;
10382 else
10383 adjust = size + PROBE_INTERVAL - i;
10384
10385 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10386 plus_constant (Pmode, stack_pointer_rtx,
10387 -adjust)));
10388 emit_stack_probe (stack_pointer_rtx);
10389
10390 /* Adjust back to account for the additional first interval. */
10391 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10392 plus_constant (Pmode, stack_pointer_rtx,
10393 PROBE_INTERVAL + dope)));
10394 }
10395
10396 /* Otherwise, do the same as above, but in a loop. Note that we must be
10397 extra careful with variables wrapping around because we might be at
10398 the very top (or the very bottom) of the address space and we have
10399 to be able to handle this case properly; in particular, we use an
10400 equality test for the loop condition. */
10401 else
10402 {
10403 HOST_WIDE_INT rounded_size;
10404 struct scratch_reg sr;
10405
10406 get_scratch_register_on_entry (&sr);
10407
10408
10409 /* Step 1: round SIZE to the previous multiple of the interval. */
10410
10411 rounded_size = size & -PROBE_INTERVAL;
10412
10413
10414 /* Step 2: compute initial and final value of the loop counter. */
10415
10416 /* SP = SP_0 + PROBE_INTERVAL. */
10417 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10418 plus_constant (Pmode, stack_pointer_rtx,
10419 - (PROBE_INTERVAL + dope))));
10420
10421 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10422 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10423 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10424 gen_rtx_PLUS (Pmode, sr.reg,
10425 stack_pointer_rtx)));
10426
10427
10428 /* Step 3: the loop
10429
10430 while (SP != LAST_ADDR)
10431 {
10432 SP = SP + PROBE_INTERVAL
10433 probe at SP
10434 }
10435
10436 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10437 values of N from 1 until it is equal to ROUNDED_SIZE. */
10438
10439 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10440
10441
10442 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10443 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10444
10445 if (size != rounded_size)
10446 {
10447 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10448 plus_constant (Pmode, stack_pointer_rtx,
10449 rounded_size - size)));
10450 emit_stack_probe (stack_pointer_rtx);
10451 }
10452
10453 /* Adjust back to account for the additional first interval. */
10454 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10455 plus_constant (Pmode, stack_pointer_rtx,
10456 PROBE_INTERVAL + dope)));
10457
10458 release_scratch_register_on_entry (&sr);
10459 }
10460
10461 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10462
10463 /* Even if the stack pointer isn't the CFA register, we need to correctly
10464 describe the adjustments made to it, in particular differentiate the
10465 frame-related ones from the frame-unrelated ones. */
10466 if (size > 0)
10467 {
10468 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10469 XVECEXP (expr, 0, 0)
10470 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10471 plus_constant (Pmode, stack_pointer_rtx, -size));
10472 XVECEXP (expr, 0, 1)
10473 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10474 plus_constant (Pmode, stack_pointer_rtx,
10475 PROBE_INTERVAL + dope + size));
10476 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10477 RTX_FRAME_RELATED_P (last) = 1;
10478
10479 cfun->machine->fs.sp_offset += size;
10480 }
10481
10482 /* Make sure nothing is scheduled before we are done. */
10483 emit_insn (gen_blockage ());
10484 }
10485
10486 /* Adjust the stack pointer up to REG while probing it. */
10487
10488 const char *
10489 output_adjust_stack_and_probe (rtx reg)
10490 {
10491 static int labelno = 0;
10492 char loop_lab[32], end_lab[32];
10493 rtx xops[2];
10494
10495 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10496 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10497
10498 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10499
10500 /* Jump to END_LAB if SP == LAST_ADDR. */
10501 xops[0] = stack_pointer_rtx;
10502 xops[1] = reg;
10503 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10504 fputs ("\tje\t", asm_out_file);
10505 assemble_name_raw (asm_out_file, end_lab);
10506 fputc ('\n', asm_out_file);
10507
10508 /* SP = SP + PROBE_INTERVAL. */
10509 xops[1] = GEN_INT (PROBE_INTERVAL);
10510 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10511
10512 /* Probe at SP. */
10513 xops[1] = const0_rtx;
10514 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10515
10516 fprintf (asm_out_file, "\tjmp\t");
10517 assemble_name_raw (asm_out_file, loop_lab);
10518 fputc ('\n', asm_out_file);
10519
10520 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10521
10522 return "";
10523 }
10524
10525 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10526 inclusive. These are offsets from the current stack pointer. */
10527
10528 static void
10529 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10530 {
10531 /* See if we have a constant small number of probes to generate. If so,
10532 that's the easy case. The run-time loop is made up of 7 insns in the
10533 generic case while the compile-time loop is made up of n insns for n #
10534 of intervals. */
10535 if (size <= 7 * PROBE_INTERVAL)
10536 {
10537 HOST_WIDE_INT i;
10538
10539 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10540 it exceeds SIZE. If only one probe is needed, this will not
10541 generate any code. Then probe at FIRST + SIZE. */
10542 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10543 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10544 -(first + i)));
10545
10546 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10547 -(first + size)));
10548 }
10549
10550 /* Otherwise, do the same as above, but in a loop. Note that we must be
10551 extra careful with variables wrapping around because we might be at
10552 the very top (or the very bottom) of the address space and we have
10553 to be able to handle this case properly; in particular, we use an
10554 equality test for the loop condition. */
10555 else
10556 {
10557 HOST_WIDE_INT rounded_size, last;
10558 struct scratch_reg sr;
10559
10560 get_scratch_register_on_entry (&sr);
10561
10562
10563 /* Step 1: round SIZE to the previous multiple of the interval. */
10564
10565 rounded_size = size & -PROBE_INTERVAL;
10566
10567
10568 /* Step 2: compute initial and final value of the loop counter. */
10569
10570 /* TEST_OFFSET = FIRST. */
10571 emit_move_insn (sr.reg, GEN_INT (-first));
10572
10573 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10574 last = first + rounded_size;
10575
10576
10577 /* Step 3: the loop
10578
10579 while (TEST_ADDR != LAST_ADDR)
10580 {
10581 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10582 probe at TEST_ADDR
10583 }
10584
10585 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10586 until it is equal to ROUNDED_SIZE. */
10587
10588 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10589
10590
10591 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10592 that SIZE is equal to ROUNDED_SIZE. */
10593
10594 if (size != rounded_size)
10595 emit_stack_probe (plus_constant (Pmode,
10596 gen_rtx_PLUS (Pmode,
10597 stack_pointer_rtx,
10598 sr.reg),
10599 rounded_size - size));
10600
10601 release_scratch_register_on_entry (&sr);
10602 }
10603
10604 /* Make sure nothing is scheduled before we are done. */
10605 emit_insn (gen_blockage ());
10606 }
10607
10608 /* Probe a range of stack addresses from REG to END, inclusive. These are
10609 offsets from the current stack pointer. */
10610
10611 const char *
10612 output_probe_stack_range (rtx reg, rtx end)
10613 {
10614 static int labelno = 0;
10615 char loop_lab[32], end_lab[32];
10616 rtx xops[3];
10617
10618 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10619 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10620
10621 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10622
10623 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10624 xops[0] = reg;
10625 xops[1] = end;
10626 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10627 fputs ("\tje\t", asm_out_file);
10628 assemble_name_raw (asm_out_file, end_lab);
10629 fputc ('\n', asm_out_file);
10630
10631 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10632 xops[1] = GEN_INT (PROBE_INTERVAL);
10633 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10634
10635 /* Probe at TEST_ADDR. */
10636 xops[0] = stack_pointer_rtx;
10637 xops[1] = reg;
10638 xops[2] = const0_rtx;
10639 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10640
10641 fprintf (asm_out_file, "\tjmp\t");
10642 assemble_name_raw (asm_out_file, loop_lab);
10643 fputc ('\n', asm_out_file);
10644
10645 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10646
10647 return "";
10648 }
10649
10650 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10651 to be generated in correct form. */
10652 static void
10653 ix86_finalize_stack_realign_flags (void)
10654 {
10655 /* Check if stack realign is really needed after reload, and
10656 stores result in cfun */
10657 unsigned int incoming_stack_boundary
10658 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10659 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10660 unsigned int stack_realign = (incoming_stack_boundary
10661 < (crtl->is_leaf
10662 ? crtl->max_used_stack_slot_alignment
10663 : crtl->stack_alignment_needed));
10664
10665 if (crtl->stack_realign_finalized)
10666 {
10667 /* After stack_realign_needed is finalized, we can't no longer
10668 change it. */
10669 gcc_assert (crtl->stack_realign_needed == stack_realign);
10670 return;
10671 }
10672
10673 /* If the only reason for frame_pointer_needed is that we conservatively
10674 assumed stack realignment might be needed, but in the end nothing that
10675 needed the stack alignment had been spilled, clear frame_pointer_needed
10676 and say we don't need stack realignment. */
10677 if (stack_realign
10678 && frame_pointer_needed
10679 && crtl->is_leaf
10680 && flag_omit_frame_pointer
10681 && crtl->sp_is_unchanging
10682 && !ix86_current_function_calls_tls_descriptor
10683 && !crtl->accesses_prior_frames
10684 && !cfun->calls_alloca
10685 && !crtl->calls_eh_return
10686 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10687 && !ix86_frame_pointer_required ()
10688 && get_frame_size () == 0
10689 && ix86_nsaved_sseregs () == 0
10690 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10691 {
10692 HARD_REG_SET set_up_by_prologue, prologue_used;
10693 basic_block bb;
10694
10695 CLEAR_HARD_REG_SET (prologue_used);
10696 CLEAR_HARD_REG_SET (set_up_by_prologue);
10697 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10698 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10699 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10700 HARD_FRAME_POINTER_REGNUM);
10701 FOR_EACH_BB_FN (bb, cfun)
10702 {
10703 rtx_insn *insn;
10704 FOR_BB_INSNS (bb, insn)
10705 if (NONDEBUG_INSN_P (insn)
10706 && requires_stack_frame_p (insn, prologue_used,
10707 set_up_by_prologue))
10708 {
10709 crtl->stack_realign_needed = stack_realign;
10710 crtl->stack_realign_finalized = true;
10711 return;
10712 }
10713 }
10714
10715 /* If drap has been set, but it actually isn't live at the start
10716 of the function, there is no reason to set it up. */
10717 if (crtl->drap_reg)
10718 {
10719 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10720 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10721 {
10722 crtl->drap_reg = NULL_RTX;
10723 crtl->need_drap = false;
10724 }
10725 }
10726 else
10727 cfun->machine->no_drap_save_restore = true;
10728
10729 frame_pointer_needed = false;
10730 stack_realign = false;
10731 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10732 crtl->stack_alignment_needed = incoming_stack_boundary;
10733 crtl->stack_alignment_estimated = incoming_stack_boundary;
10734 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10735 crtl->preferred_stack_boundary = incoming_stack_boundary;
10736 df_finish_pass (true);
10737 df_scan_alloc (NULL);
10738 df_scan_blocks ();
10739 df_compute_regs_ever_live (true);
10740 df_analyze ();
10741 }
10742
10743 crtl->stack_realign_needed = stack_realign;
10744 crtl->stack_realign_finalized = true;
10745 }
10746
10747 /* Expand the prologue into a bunch of separate insns. */
10748
10749 void
10750 ix86_expand_prologue (void)
10751 {
10752 struct machine_function *m = cfun->machine;
10753 rtx insn, t;
10754 bool pic_reg_used;
10755 struct ix86_frame frame;
10756 HOST_WIDE_INT allocate;
10757 bool int_registers_saved;
10758 bool sse_registers_saved;
10759
10760 ix86_finalize_stack_realign_flags ();
10761
10762 /* DRAP should not coexist with stack_realign_fp */
10763 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10764
10765 memset (&m->fs, 0, sizeof (m->fs));
10766
10767 /* Initialize CFA state for before the prologue. */
10768 m->fs.cfa_reg = stack_pointer_rtx;
10769 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10770
10771 /* Track SP offset to the CFA. We continue tracking this after we've
10772 swapped the CFA register away from SP. In the case of re-alignment
10773 this is fudged; we're interested to offsets within the local frame. */
10774 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10775 m->fs.sp_valid = true;
10776
10777 ix86_compute_frame_layout (&frame);
10778
10779 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10780 {
10781 /* We should have already generated an error for any use of
10782 ms_hook on a nested function. */
10783 gcc_checking_assert (!ix86_static_chain_on_stack);
10784
10785 /* Check if profiling is active and we shall use profiling before
10786 prologue variant. If so sorry. */
10787 if (crtl->profile && flag_fentry != 0)
10788 sorry ("ms_hook_prologue attribute isn%'t compatible "
10789 "with -mfentry for 32-bit");
10790
10791 /* In ix86_asm_output_function_label we emitted:
10792 8b ff movl.s %edi,%edi
10793 55 push %ebp
10794 8b ec movl.s %esp,%ebp
10795
10796 This matches the hookable function prologue in Win32 API
10797 functions in Microsoft Windows XP Service Pack 2 and newer.
10798 Wine uses this to enable Windows apps to hook the Win32 API
10799 functions provided by Wine.
10800
10801 What that means is that we've already set up the frame pointer. */
10802
10803 if (frame_pointer_needed
10804 && !(crtl->drap_reg && crtl->stack_realign_needed))
10805 {
10806 rtx push, mov;
10807
10808 /* We've decided to use the frame pointer already set up.
10809 Describe this to the unwinder by pretending that both
10810 push and mov insns happen right here.
10811
10812 Putting the unwind info here at the end of the ms_hook
10813 is done so that we can make absolutely certain we get
10814 the required byte sequence at the start of the function,
10815 rather than relying on an assembler that can produce
10816 the exact encoding required.
10817
10818 However it does mean (in the unpatched case) that we have
10819 a 1 insn window where the asynchronous unwind info is
10820 incorrect. However, if we placed the unwind info at
10821 its correct location we would have incorrect unwind info
10822 in the patched case. Which is probably all moot since
10823 I don't expect Wine generates dwarf2 unwind info for the
10824 system libraries that use this feature. */
10825
10826 insn = emit_insn (gen_blockage ());
10827
10828 push = gen_push (hard_frame_pointer_rtx);
10829 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10830 stack_pointer_rtx);
10831 RTX_FRAME_RELATED_P (push) = 1;
10832 RTX_FRAME_RELATED_P (mov) = 1;
10833
10834 RTX_FRAME_RELATED_P (insn) = 1;
10835 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10836 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10837
10838 /* Note that gen_push incremented m->fs.cfa_offset, even
10839 though we didn't emit the push insn here. */
10840 m->fs.cfa_reg = hard_frame_pointer_rtx;
10841 m->fs.fp_offset = m->fs.cfa_offset;
10842 m->fs.fp_valid = true;
10843 }
10844 else
10845 {
10846 /* The frame pointer is not needed so pop %ebp again.
10847 This leaves us with a pristine state. */
10848 emit_insn (gen_pop (hard_frame_pointer_rtx));
10849 }
10850 }
10851
10852 /* The first insn of a function that accepts its static chain on the
10853 stack is to push the register that would be filled in by a direct
10854 call. This insn will be skipped by the trampoline. */
10855 else if (ix86_static_chain_on_stack)
10856 {
10857 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10858 emit_insn (gen_blockage ());
10859
10860 /* We don't want to interpret this push insn as a register save,
10861 only as a stack adjustment. The real copy of the register as
10862 a save will be done later, if needed. */
10863 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10864 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10865 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10866 RTX_FRAME_RELATED_P (insn) = 1;
10867 }
10868
10869 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10870 of DRAP is needed and stack realignment is really needed after reload */
10871 if (stack_realign_drap)
10872 {
10873 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10874
10875 /* Only need to push parameter pointer reg if it is caller saved. */
10876 if (!call_used_regs[REGNO (crtl->drap_reg)])
10877 {
10878 /* Push arg pointer reg */
10879 insn = emit_insn (gen_push (crtl->drap_reg));
10880 RTX_FRAME_RELATED_P (insn) = 1;
10881 }
10882
10883 /* Grab the argument pointer. */
10884 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10885 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10886 RTX_FRAME_RELATED_P (insn) = 1;
10887 m->fs.cfa_reg = crtl->drap_reg;
10888 m->fs.cfa_offset = 0;
10889
10890 /* Align the stack. */
10891 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10892 stack_pointer_rtx,
10893 GEN_INT (-align_bytes)));
10894 RTX_FRAME_RELATED_P (insn) = 1;
10895
10896 /* Replicate the return address on the stack so that return
10897 address can be reached via (argp - 1) slot. This is needed
10898 to implement macro RETURN_ADDR_RTX and intrinsic function
10899 expand_builtin_return_addr etc. */
10900 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10901 t = gen_frame_mem (word_mode, t);
10902 insn = emit_insn (gen_push (t));
10903 RTX_FRAME_RELATED_P (insn) = 1;
10904
10905 /* For the purposes of frame and register save area addressing,
10906 we've started over with a new frame. */
10907 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10908 m->fs.realigned = true;
10909 }
10910
10911 int_registers_saved = (frame.nregs == 0);
10912 sse_registers_saved = (frame.nsseregs == 0);
10913
10914 if (frame_pointer_needed && !m->fs.fp_valid)
10915 {
10916 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10917 slower on all targets. Also sdb doesn't like it. */
10918 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10919 RTX_FRAME_RELATED_P (insn) = 1;
10920
10921 /* Push registers now, before setting the frame pointer
10922 on SEH target. */
10923 if (!int_registers_saved
10924 && TARGET_SEH
10925 && !frame.save_regs_using_mov)
10926 {
10927 ix86_emit_save_regs ();
10928 int_registers_saved = true;
10929 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10930 }
10931
10932 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10933 {
10934 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10935 RTX_FRAME_RELATED_P (insn) = 1;
10936
10937 if (m->fs.cfa_reg == stack_pointer_rtx)
10938 m->fs.cfa_reg = hard_frame_pointer_rtx;
10939 m->fs.fp_offset = m->fs.sp_offset;
10940 m->fs.fp_valid = true;
10941 }
10942 }
10943
10944 if (!int_registers_saved)
10945 {
10946 /* If saving registers via PUSH, do so now. */
10947 if (!frame.save_regs_using_mov)
10948 {
10949 ix86_emit_save_regs ();
10950 int_registers_saved = true;
10951 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10952 }
10953
10954 /* When using red zone we may start register saving before allocating
10955 the stack frame saving one cycle of the prologue. However, avoid
10956 doing this if we have to probe the stack; at least on x86_64 the
10957 stack probe can turn into a call that clobbers a red zone location. */
10958 else if (ix86_using_red_zone ()
10959 && (! TARGET_STACK_PROBE
10960 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10961 {
10962 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10963 int_registers_saved = true;
10964 }
10965 }
10966
10967 if (stack_realign_fp)
10968 {
10969 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10970 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10971
10972 /* The computation of the size of the re-aligned stack frame means
10973 that we must allocate the size of the register save area before
10974 performing the actual alignment. Otherwise we cannot guarantee
10975 that there's enough storage above the realignment point. */
10976 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10977 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10978 GEN_INT (m->fs.sp_offset
10979 - frame.sse_reg_save_offset),
10980 -1, false);
10981
10982 /* Align the stack. */
10983 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10984 stack_pointer_rtx,
10985 GEN_INT (-align_bytes)));
10986
10987 /* For the purposes of register save area addressing, the stack
10988 pointer is no longer valid. As for the value of sp_offset,
10989 see ix86_compute_frame_layout, which we need to match in order
10990 to pass verification of stack_pointer_offset at the end. */
10991 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10992 m->fs.sp_valid = false;
10993 }
10994
10995 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10996
10997 if (flag_stack_usage_info)
10998 {
10999 /* We start to count from ARG_POINTER. */
11000 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11001
11002 /* If it was realigned, take into account the fake frame. */
11003 if (stack_realign_drap)
11004 {
11005 if (ix86_static_chain_on_stack)
11006 stack_size += UNITS_PER_WORD;
11007
11008 if (!call_used_regs[REGNO (crtl->drap_reg)])
11009 stack_size += UNITS_PER_WORD;
11010
11011 /* This over-estimates by 1 minimal-stack-alignment-unit but
11012 mitigates that by counting in the new return address slot. */
11013 current_function_dynamic_stack_size
11014 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11015 }
11016
11017 current_function_static_stack_size = stack_size;
11018 }
11019
11020 /* On SEH target with very large frame size, allocate an area to save
11021 SSE registers (as the very large allocation won't be described). */
11022 if (TARGET_SEH
11023 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11024 && !sse_registers_saved)
11025 {
11026 HOST_WIDE_INT sse_size =
11027 frame.sse_reg_save_offset - frame.reg_save_offset;
11028
11029 gcc_assert (int_registers_saved);
11030
11031 /* No need to do stack checking as the area will be immediately
11032 written. */
11033 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11034 GEN_INT (-sse_size), -1,
11035 m->fs.cfa_reg == stack_pointer_rtx);
11036 allocate -= sse_size;
11037 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11038 sse_registers_saved = true;
11039 }
11040
11041 /* The stack has already been decremented by the instruction calling us
11042 so probe if the size is non-negative to preserve the protection area. */
11043 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11044 {
11045 /* We expect the registers to be saved when probes are used. */
11046 gcc_assert (int_registers_saved);
11047
11048 if (STACK_CHECK_MOVING_SP)
11049 {
11050 if (!(crtl->is_leaf && !cfun->calls_alloca
11051 && allocate <= PROBE_INTERVAL))
11052 {
11053 ix86_adjust_stack_and_probe (allocate);
11054 allocate = 0;
11055 }
11056 }
11057 else
11058 {
11059 HOST_WIDE_INT size = allocate;
11060
11061 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11062 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11063
11064 if (TARGET_STACK_PROBE)
11065 {
11066 if (crtl->is_leaf && !cfun->calls_alloca)
11067 {
11068 if (size > PROBE_INTERVAL)
11069 ix86_emit_probe_stack_range (0, size);
11070 }
11071 else
11072 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11073 }
11074 else
11075 {
11076 if (crtl->is_leaf && !cfun->calls_alloca)
11077 {
11078 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11079 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11080 size - STACK_CHECK_PROTECT);
11081 }
11082 else
11083 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11084 }
11085 }
11086 }
11087
11088 if (allocate == 0)
11089 ;
11090 else if (!ix86_target_stack_probe ()
11091 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11092 {
11093 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11094 GEN_INT (-allocate), -1,
11095 m->fs.cfa_reg == stack_pointer_rtx);
11096 }
11097 else
11098 {
11099 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11100 rtx r10 = NULL;
11101 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11102 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11103 bool eax_live = ix86_eax_live_at_start_p ();
11104 bool r10_live = false;
11105
11106 if (TARGET_64BIT)
11107 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11108
11109 if (eax_live)
11110 {
11111 insn = emit_insn (gen_push (eax));
11112 allocate -= UNITS_PER_WORD;
11113 /* Note that SEH directives need to continue tracking the stack
11114 pointer even after the frame pointer has been set up. */
11115 if (sp_is_cfa_reg || TARGET_SEH)
11116 {
11117 if (sp_is_cfa_reg)
11118 m->fs.cfa_offset += UNITS_PER_WORD;
11119 RTX_FRAME_RELATED_P (insn) = 1;
11120 }
11121 }
11122
11123 if (r10_live)
11124 {
11125 r10 = gen_rtx_REG (Pmode, R10_REG);
11126 insn = emit_insn (gen_push (r10));
11127 allocate -= UNITS_PER_WORD;
11128 if (sp_is_cfa_reg || TARGET_SEH)
11129 {
11130 if (sp_is_cfa_reg)
11131 m->fs.cfa_offset += UNITS_PER_WORD;
11132 RTX_FRAME_RELATED_P (insn) = 1;
11133 }
11134 }
11135
11136 emit_move_insn (eax, GEN_INT (allocate));
11137 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11138
11139 /* Use the fact that AX still contains ALLOCATE. */
11140 adjust_stack_insn = (Pmode == DImode
11141 ? gen_pro_epilogue_adjust_stack_di_sub
11142 : gen_pro_epilogue_adjust_stack_si_sub);
11143
11144 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11145 stack_pointer_rtx, eax));
11146
11147 if (sp_is_cfa_reg || TARGET_SEH)
11148 {
11149 if (sp_is_cfa_reg)
11150 m->fs.cfa_offset += allocate;
11151 RTX_FRAME_RELATED_P (insn) = 1;
11152 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11153 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11154 plus_constant (Pmode, stack_pointer_rtx,
11155 -allocate)));
11156 }
11157 m->fs.sp_offset += allocate;
11158
11159 /* Use stack_pointer_rtx for relative addressing so that code
11160 works for realigned stack, too. */
11161 if (r10_live && eax_live)
11162 {
11163 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11164 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11165 gen_frame_mem (word_mode, t));
11166 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11167 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11168 gen_frame_mem (word_mode, t));
11169 }
11170 else if (eax_live || r10_live)
11171 {
11172 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11173 emit_move_insn (gen_rtx_REG (word_mode,
11174 (eax_live ? AX_REG : R10_REG)),
11175 gen_frame_mem (word_mode, t));
11176 }
11177 }
11178 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11179
11180 /* If we havn't already set up the frame pointer, do so now. */
11181 if (frame_pointer_needed && !m->fs.fp_valid)
11182 {
11183 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11184 GEN_INT (frame.stack_pointer_offset
11185 - frame.hard_frame_pointer_offset));
11186 insn = emit_insn (insn);
11187 RTX_FRAME_RELATED_P (insn) = 1;
11188 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11189
11190 if (m->fs.cfa_reg == stack_pointer_rtx)
11191 m->fs.cfa_reg = hard_frame_pointer_rtx;
11192 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11193 m->fs.fp_valid = true;
11194 }
11195
11196 if (!int_registers_saved)
11197 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11198 if (!sse_registers_saved)
11199 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11200
11201 pic_reg_used = false;
11202 /* We don't use pic-register for pe-coff target. */
11203 if (pic_offset_table_rtx
11204 && !TARGET_PECOFF
11205 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11206 || crtl->profile))
11207 {
11208 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11209
11210 if (alt_pic_reg_used != INVALID_REGNUM)
11211 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11212
11213 pic_reg_used = true;
11214 }
11215
11216 if (pic_reg_used)
11217 {
11218 if (TARGET_64BIT)
11219 {
11220 if (ix86_cmodel == CM_LARGE_PIC)
11221 {
11222 rtx_code_label *label;
11223 rtx tmp_reg;
11224
11225 gcc_assert (Pmode == DImode);
11226 label = gen_label_rtx ();
11227 emit_label (label);
11228 LABEL_PRESERVE_P (label) = 1;
11229 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11230 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11231 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11232 label));
11233 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11234 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11235 pic_offset_table_rtx, tmp_reg));
11236 }
11237 else
11238 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11239 }
11240 else
11241 {
11242 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11243 RTX_FRAME_RELATED_P (insn) = 1;
11244 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11245 }
11246 }
11247
11248 /* In the pic_reg_used case, make sure that the got load isn't deleted
11249 when mcount needs it. Blockage to avoid call movement across mcount
11250 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11251 note. */
11252 if (crtl->profile && !flag_fentry && pic_reg_used)
11253 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11254
11255 if (crtl->drap_reg && !crtl->stack_realign_needed)
11256 {
11257 /* vDRAP is setup but after reload it turns out stack realign
11258 isn't necessary, here we will emit prologue to setup DRAP
11259 without stack realign adjustment */
11260 t = choose_baseaddr (0);
11261 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11262 }
11263
11264 /* Prevent instructions from being scheduled into register save push
11265 sequence when access to the redzone area is done through frame pointer.
11266 The offset between the frame pointer and the stack pointer is calculated
11267 relative to the value of the stack pointer at the end of the function
11268 prologue, and moving instructions that access redzone area via frame
11269 pointer inside push sequence violates this assumption. */
11270 if (frame_pointer_needed && frame.red_zone_size)
11271 emit_insn (gen_memory_blockage ());
11272
11273 /* Emit cld instruction if stringops are used in the function. */
11274 if (TARGET_CLD && ix86_current_function_needs_cld)
11275 emit_insn (gen_cld ());
11276
11277 /* SEH requires that the prologue end within 256 bytes of the start of
11278 the function. Prevent instruction schedules that would extend that.
11279 Further, prevent alloca modifications to the stack pointer from being
11280 combined with prologue modifications. */
11281 if (TARGET_SEH)
11282 emit_insn (gen_prologue_use (stack_pointer_rtx));
11283 }
11284
11285 /* Emit code to restore REG using a POP insn. */
11286
11287 static void
11288 ix86_emit_restore_reg_using_pop (rtx reg)
11289 {
11290 struct machine_function *m = cfun->machine;
11291 rtx insn = emit_insn (gen_pop (reg));
11292
11293 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11294 m->fs.sp_offset -= UNITS_PER_WORD;
11295
11296 if (m->fs.cfa_reg == crtl->drap_reg
11297 && REGNO (reg) == REGNO (crtl->drap_reg))
11298 {
11299 /* Previously we'd represented the CFA as an expression
11300 like *(%ebp - 8). We've just popped that value from
11301 the stack, which means we need to reset the CFA to
11302 the drap register. This will remain until we restore
11303 the stack pointer. */
11304 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11305 RTX_FRAME_RELATED_P (insn) = 1;
11306
11307 /* This means that the DRAP register is valid for addressing too. */
11308 m->fs.drap_valid = true;
11309 return;
11310 }
11311
11312 if (m->fs.cfa_reg == stack_pointer_rtx)
11313 {
11314 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11315 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11316 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11317 RTX_FRAME_RELATED_P (insn) = 1;
11318
11319 m->fs.cfa_offset -= UNITS_PER_WORD;
11320 }
11321
11322 /* When the frame pointer is the CFA, and we pop it, we are
11323 swapping back to the stack pointer as the CFA. This happens
11324 for stack frames that don't allocate other data, so we assume
11325 the stack pointer is now pointing at the return address, i.e.
11326 the function entry state, which makes the offset be 1 word. */
11327 if (reg == hard_frame_pointer_rtx)
11328 {
11329 m->fs.fp_valid = false;
11330 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11331 {
11332 m->fs.cfa_reg = stack_pointer_rtx;
11333 m->fs.cfa_offset -= UNITS_PER_WORD;
11334
11335 add_reg_note (insn, REG_CFA_DEF_CFA,
11336 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11337 GEN_INT (m->fs.cfa_offset)));
11338 RTX_FRAME_RELATED_P (insn) = 1;
11339 }
11340 }
11341 }
11342
11343 /* Emit code to restore saved registers using POP insns. */
11344
11345 static void
11346 ix86_emit_restore_regs_using_pop (void)
11347 {
11348 unsigned int regno;
11349
11350 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11351 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11352 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11353 }
11354
11355 /* Emit code and notes for the LEAVE instruction. */
11356
11357 static void
11358 ix86_emit_leave (void)
11359 {
11360 struct machine_function *m = cfun->machine;
11361 rtx insn = emit_insn (ix86_gen_leave ());
11362
11363 ix86_add_queued_cfa_restore_notes (insn);
11364
11365 gcc_assert (m->fs.fp_valid);
11366 m->fs.sp_valid = true;
11367 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11368 m->fs.fp_valid = false;
11369
11370 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11371 {
11372 m->fs.cfa_reg = stack_pointer_rtx;
11373 m->fs.cfa_offset = m->fs.sp_offset;
11374
11375 add_reg_note (insn, REG_CFA_DEF_CFA,
11376 plus_constant (Pmode, stack_pointer_rtx,
11377 m->fs.sp_offset));
11378 RTX_FRAME_RELATED_P (insn) = 1;
11379 }
11380 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11381 m->fs.fp_offset);
11382 }
11383
11384 /* Emit code to restore saved registers using MOV insns.
11385 First register is restored from CFA - CFA_OFFSET. */
11386 static void
11387 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11388 bool maybe_eh_return)
11389 {
11390 struct machine_function *m = cfun->machine;
11391 unsigned int regno;
11392
11393 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11394 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11395 {
11396 rtx reg = gen_rtx_REG (word_mode, regno);
11397 rtx insn, mem;
11398
11399 mem = choose_baseaddr (cfa_offset);
11400 mem = gen_frame_mem (word_mode, mem);
11401 insn = emit_move_insn (reg, mem);
11402
11403 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11404 {
11405 /* Previously we'd represented the CFA as an expression
11406 like *(%ebp - 8). We've just popped that value from
11407 the stack, which means we need to reset the CFA to
11408 the drap register. This will remain until we restore
11409 the stack pointer. */
11410 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11411 RTX_FRAME_RELATED_P (insn) = 1;
11412
11413 /* This means that the DRAP register is valid for addressing. */
11414 m->fs.drap_valid = true;
11415 }
11416 else
11417 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11418
11419 cfa_offset -= UNITS_PER_WORD;
11420 }
11421 }
11422
11423 /* Emit code to restore saved registers using MOV insns.
11424 First register is restored from CFA - CFA_OFFSET. */
11425 static void
11426 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11427 bool maybe_eh_return)
11428 {
11429 unsigned int regno;
11430
11431 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11432 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11433 {
11434 rtx reg = gen_rtx_REG (V4SFmode, regno);
11435 rtx mem;
11436
11437 mem = choose_baseaddr (cfa_offset);
11438 mem = gen_rtx_MEM (V4SFmode, mem);
11439 set_mem_align (mem, 128);
11440 emit_move_insn (reg, mem);
11441
11442 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11443
11444 cfa_offset -= 16;
11445 }
11446 }
11447
11448 /* Restore function stack, frame, and registers. */
11449
11450 void
11451 ix86_expand_epilogue (int style)
11452 {
11453 struct machine_function *m = cfun->machine;
11454 struct machine_frame_state frame_state_save = m->fs;
11455 struct ix86_frame frame;
11456 bool restore_regs_via_mov;
11457 bool using_drap;
11458
11459 ix86_finalize_stack_realign_flags ();
11460 ix86_compute_frame_layout (&frame);
11461
11462 m->fs.sp_valid = (!frame_pointer_needed
11463 || (crtl->sp_is_unchanging
11464 && !stack_realign_fp));
11465 gcc_assert (!m->fs.sp_valid
11466 || m->fs.sp_offset == frame.stack_pointer_offset);
11467
11468 /* The FP must be valid if the frame pointer is present. */
11469 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11470 gcc_assert (!m->fs.fp_valid
11471 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11472
11473 /* We must have *some* valid pointer to the stack frame. */
11474 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11475
11476 /* The DRAP is never valid at this point. */
11477 gcc_assert (!m->fs.drap_valid);
11478
11479 /* See the comment about red zone and frame
11480 pointer usage in ix86_expand_prologue. */
11481 if (frame_pointer_needed && frame.red_zone_size)
11482 emit_insn (gen_memory_blockage ());
11483
11484 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11485 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11486
11487 /* Determine the CFA offset of the end of the red-zone. */
11488 m->fs.red_zone_offset = 0;
11489 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11490 {
11491 /* The red-zone begins below the return address. */
11492 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11493
11494 /* When the register save area is in the aligned portion of
11495 the stack, determine the maximum runtime displacement that
11496 matches up with the aligned frame. */
11497 if (stack_realign_drap)
11498 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11499 + UNITS_PER_WORD);
11500 }
11501
11502 /* Special care must be taken for the normal return case of a function
11503 using eh_return: the eax and edx registers are marked as saved, but
11504 not restored along this path. Adjust the save location to match. */
11505 if (crtl->calls_eh_return && style != 2)
11506 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11507
11508 /* EH_RETURN requires the use of moves to function properly. */
11509 if (crtl->calls_eh_return)
11510 restore_regs_via_mov = true;
11511 /* SEH requires the use of pops to identify the epilogue. */
11512 else if (TARGET_SEH)
11513 restore_regs_via_mov = false;
11514 /* If we're only restoring one register and sp is not valid then
11515 using a move instruction to restore the register since it's
11516 less work than reloading sp and popping the register. */
11517 else if (!m->fs.sp_valid && frame.nregs <= 1)
11518 restore_regs_via_mov = true;
11519 else if (TARGET_EPILOGUE_USING_MOVE
11520 && cfun->machine->use_fast_prologue_epilogue
11521 && (frame.nregs > 1
11522 || m->fs.sp_offset != frame.reg_save_offset))
11523 restore_regs_via_mov = true;
11524 else if (frame_pointer_needed
11525 && !frame.nregs
11526 && m->fs.sp_offset != frame.reg_save_offset)
11527 restore_regs_via_mov = true;
11528 else if (frame_pointer_needed
11529 && TARGET_USE_LEAVE
11530 && cfun->machine->use_fast_prologue_epilogue
11531 && frame.nregs == 1)
11532 restore_regs_via_mov = true;
11533 else
11534 restore_regs_via_mov = false;
11535
11536 if (restore_regs_via_mov || frame.nsseregs)
11537 {
11538 /* Ensure that the entire register save area is addressable via
11539 the stack pointer, if we will restore via sp. */
11540 if (TARGET_64BIT
11541 && m->fs.sp_offset > 0x7fffffff
11542 && !(m->fs.fp_valid || m->fs.drap_valid)
11543 && (frame.nsseregs + frame.nregs) != 0)
11544 {
11545 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11546 GEN_INT (m->fs.sp_offset
11547 - frame.sse_reg_save_offset),
11548 style,
11549 m->fs.cfa_reg == stack_pointer_rtx);
11550 }
11551 }
11552
11553 /* If there are any SSE registers to restore, then we have to do it
11554 via moves, since there's obviously no pop for SSE regs. */
11555 if (frame.nsseregs)
11556 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11557 style == 2);
11558
11559 if (restore_regs_via_mov)
11560 {
11561 rtx t;
11562
11563 if (frame.nregs)
11564 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11565
11566 /* eh_return epilogues need %ecx added to the stack pointer. */
11567 if (style == 2)
11568 {
11569 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11570
11571 /* Stack align doesn't work with eh_return. */
11572 gcc_assert (!stack_realign_drap);
11573 /* Neither does regparm nested functions. */
11574 gcc_assert (!ix86_static_chain_on_stack);
11575
11576 if (frame_pointer_needed)
11577 {
11578 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11579 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11580 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11581
11582 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11583 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11584
11585 /* Note that we use SA as a temporary CFA, as the return
11586 address is at the proper place relative to it. We
11587 pretend this happens at the FP restore insn because
11588 prior to this insn the FP would be stored at the wrong
11589 offset relative to SA, and after this insn we have no
11590 other reasonable register to use for the CFA. We don't
11591 bother resetting the CFA to the SP for the duration of
11592 the return insn. */
11593 add_reg_note (insn, REG_CFA_DEF_CFA,
11594 plus_constant (Pmode, sa, UNITS_PER_WORD));
11595 ix86_add_queued_cfa_restore_notes (insn);
11596 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11597 RTX_FRAME_RELATED_P (insn) = 1;
11598
11599 m->fs.cfa_reg = sa;
11600 m->fs.cfa_offset = UNITS_PER_WORD;
11601 m->fs.fp_valid = false;
11602
11603 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11604 const0_rtx, style, false);
11605 }
11606 else
11607 {
11608 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11609 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11610 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11611 ix86_add_queued_cfa_restore_notes (insn);
11612
11613 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11614 if (m->fs.cfa_offset != UNITS_PER_WORD)
11615 {
11616 m->fs.cfa_offset = UNITS_PER_WORD;
11617 add_reg_note (insn, REG_CFA_DEF_CFA,
11618 plus_constant (Pmode, stack_pointer_rtx,
11619 UNITS_PER_WORD));
11620 RTX_FRAME_RELATED_P (insn) = 1;
11621 }
11622 }
11623 m->fs.sp_offset = UNITS_PER_WORD;
11624 m->fs.sp_valid = true;
11625 }
11626 }
11627 else
11628 {
11629 /* SEH requires that the function end with (1) a stack adjustment
11630 if necessary, (2) a sequence of pops, and (3) a return or
11631 jump instruction. Prevent insns from the function body from
11632 being scheduled into this sequence. */
11633 if (TARGET_SEH)
11634 {
11635 /* Prevent a catch region from being adjacent to the standard
11636 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11637 several other flags that would be interesting to test are
11638 not yet set up. */
11639 if (flag_non_call_exceptions)
11640 emit_insn (gen_nops (const1_rtx));
11641 else
11642 emit_insn (gen_blockage ());
11643 }
11644
11645 /* First step is to deallocate the stack frame so that we can
11646 pop the registers. Also do it on SEH target for very large
11647 frame as the emitted instructions aren't allowed by the ABI in
11648 epilogues. */
11649 if (!m->fs.sp_valid
11650 || (TARGET_SEH
11651 && (m->fs.sp_offset - frame.reg_save_offset
11652 >= SEH_MAX_FRAME_SIZE)))
11653 {
11654 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11655 GEN_INT (m->fs.fp_offset
11656 - frame.reg_save_offset),
11657 style, false);
11658 }
11659 else if (m->fs.sp_offset != frame.reg_save_offset)
11660 {
11661 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11662 GEN_INT (m->fs.sp_offset
11663 - frame.reg_save_offset),
11664 style,
11665 m->fs.cfa_reg == stack_pointer_rtx);
11666 }
11667
11668 ix86_emit_restore_regs_using_pop ();
11669 }
11670
11671 /* If we used a stack pointer and haven't already got rid of it,
11672 then do so now. */
11673 if (m->fs.fp_valid)
11674 {
11675 /* If the stack pointer is valid and pointing at the frame
11676 pointer store address, then we only need a pop. */
11677 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11678 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11679 /* Leave results in shorter dependency chains on CPUs that are
11680 able to grok it fast. */
11681 else if (TARGET_USE_LEAVE
11682 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11683 || !cfun->machine->use_fast_prologue_epilogue)
11684 ix86_emit_leave ();
11685 else
11686 {
11687 pro_epilogue_adjust_stack (stack_pointer_rtx,
11688 hard_frame_pointer_rtx,
11689 const0_rtx, style, !using_drap);
11690 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11691 }
11692 }
11693
11694 if (using_drap)
11695 {
11696 int param_ptr_offset = UNITS_PER_WORD;
11697 rtx insn;
11698
11699 gcc_assert (stack_realign_drap);
11700
11701 if (ix86_static_chain_on_stack)
11702 param_ptr_offset += UNITS_PER_WORD;
11703 if (!call_used_regs[REGNO (crtl->drap_reg)])
11704 param_ptr_offset += UNITS_PER_WORD;
11705
11706 insn = emit_insn (gen_rtx_SET
11707 (VOIDmode, stack_pointer_rtx,
11708 gen_rtx_PLUS (Pmode,
11709 crtl->drap_reg,
11710 GEN_INT (-param_ptr_offset))));
11711 m->fs.cfa_reg = stack_pointer_rtx;
11712 m->fs.cfa_offset = param_ptr_offset;
11713 m->fs.sp_offset = param_ptr_offset;
11714 m->fs.realigned = false;
11715
11716 add_reg_note (insn, REG_CFA_DEF_CFA,
11717 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11718 GEN_INT (param_ptr_offset)));
11719 RTX_FRAME_RELATED_P (insn) = 1;
11720
11721 if (!call_used_regs[REGNO (crtl->drap_reg)])
11722 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11723 }
11724
11725 /* At this point the stack pointer must be valid, and we must have
11726 restored all of the registers. We may not have deallocated the
11727 entire stack frame. We've delayed this until now because it may
11728 be possible to merge the local stack deallocation with the
11729 deallocation forced by ix86_static_chain_on_stack. */
11730 gcc_assert (m->fs.sp_valid);
11731 gcc_assert (!m->fs.fp_valid);
11732 gcc_assert (!m->fs.realigned);
11733 if (m->fs.sp_offset != UNITS_PER_WORD)
11734 {
11735 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11736 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11737 style, true);
11738 }
11739 else
11740 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11741
11742 /* Sibcall epilogues don't want a return instruction. */
11743 if (style == 0)
11744 {
11745 m->fs = frame_state_save;
11746 return;
11747 }
11748
11749 if (crtl->args.pops_args && crtl->args.size)
11750 {
11751 rtx popc = GEN_INT (crtl->args.pops_args);
11752
11753 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11754 address, do explicit add, and jump indirectly to the caller. */
11755
11756 if (crtl->args.pops_args >= 65536)
11757 {
11758 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11759 rtx insn;
11760
11761 /* There is no "pascal" calling convention in any 64bit ABI. */
11762 gcc_assert (!TARGET_64BIT);
11763
11764 insn = emit_insn (gen_pop (ecx));
11765 m->fs.cfa_offset -= UNITS_PER_WORD;
11766 m->fs.sp_offset -= UNITS_PER_WORD;
11767
11768 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11769 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11770 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11771 add_reg_note (insn, REG_CFA_REGISTER,
11772 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11773 RTX_FRAME_RELATED_P (insn) = 1;
11774
11775 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11776 popc, -1, true);
11777 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11778 }
11779 else
11780 emit_jump_insn (gen_simple_return_pop_internal (popc));
11781 }
11782 else
11783 emit_jump_insn (gen_simple_return_internal ());
11784
11785 /* Restore the state back to the state from the prologue,
11786 so that it's correct for the next epilogue. */
11787 m->fs = frame_state_save;
11788 }
11789
11790 /* Reset from the function's potential modifications. */
11791
11792 static void
11793 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11794 {
11795 if (pic_offset_table_rtx)
11796 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11797 #if TARGET_MACHO
11798 /* Mach-O doesn't support labels at the end of objects, so if
11799 it looks like we might want one, insert a NOP. */
11800 {
11801 rtx_insn *insn = get_last_insn ();
11802 rtx_insn *deleted_debug_label = NULL;
11803 while (insn
11804 && NOTE_P (insn)
11805 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11806 {
11807 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11808 notes only, instead set their CODE_LABEL_NUMBER to -1,
11809 otherwise there would be code generation differences
11810 in between -g and -g0. */
11811 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11812 deleted_debug_label = insn;
11813 insn = PREV_INSN (insn);
11814 }
11815 if (insn
11816 && (LABEL_P (insn)
11817 || (NOTE_P (insn)
11818 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11819 fputs ("\tnop\n", file);
11820 else if (deleted_debug_label)
11821 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11822 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11823 CODE_LABEL_NUMBER (insn) = -1;
11824 }
11825 #endif
11826
11827 }
11828
11829 /* Return a scratch register to use in the split stack prologue. The
11830 split stack prologue is used for -fsplit-stack. It is the first
11831 instructions in the function, even before the regular prologue.
11832 The scratch register can be any caller-saved register which is not
11833 used for parameters or for the static chain. */
11834
11835 static unsigned int
11836 split_stack_prologue_scratch_regno (void)
11837 {
11838 if (TARGET_64BIT)
11839 return R11_REG;
11840 else
11841 {
11842 bool is_fastcall, is_thiscall;
11843 int regparm;
11844
11845 is_fastcall = (lookup_attribute ("fastcall",
11846 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11847 != NULL);
11848 is_thiscall = (lookup_attribute ("thiscall",
11849 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11850 != NULL);
11851 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11852
11853 if (is_fastcall)
11854 {
11855 if (DECL_STATIC_CHAIN (cfun->decl))
11856 {
11857 sorry ("-fsplit-stack does not support fastcall with "
11858 "nested function");
11859 return INVALID_REGNUM;
11860 }
11861 return AX_REG;
11862 }
11863 else if (is_thiscall)
11864 {
11865 if (!DECL_STATIC_CHAIN (cfun->decl))
11866 return DX_REG;
11867 return AX_REG;
11868 }
11869 else if (regparm < 3)
11870 {
11871 if (!DECL_STATIC_CHAIN (cfun->decl))
11872 return CX_REG;
11873 else
11874 {
11875 if (regparm >= 2)
11876 {
11877 sorry ("-fsplit-stack does not support 2 register "
11878 "parameters for a nested function");
11879 return INVALID_REGNUM;
11880 }
11881 return DX_REG;
11882 }
11883 }
11884 else
11885 {
11886 /* FIXME: We could make this work by pushing a register
11887 around the addition and comparison. */
11888 sorry ("-fsplit-stack does not support 3 register parameters");
11889 return INVALID_REGNUM;
11890 }
11891 }
11892 }
11893
11894 /* A SYMBOL_REF for the function which allocates new stackspace for
11895 -fsplit-stack. */
11896
11897 static GTY(()) rtx split_stack_fn;
11898
11899 /* A SYMBOL_REF for the more stack function when using the large
11900 model. */
11901
11902 static GTY(()) rtx split_stack_fn_large;
11903
11904 /* Handle -fsplit-stack. These are the first instructions in the
11905 function, even before the regular prologue. */
11906
11907 void
11908 ix86_expand_split_stack_prologue (void)
11909 {
11910 struct ix86_frame frame;
11911 HOST_WIDE_INT allocate;
11912 unsigned HOST_WIDE_INT args_size;
11913 rtx_code_label *label;
11914 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11915 rtx scratch_reg = NULL_RTX;
11916 rtx_code_label *varargs_label = NULL;
11917 rtx fn;
11918
11919 gcc_assert (flag_split_stack && reload_completed);
11920
11921 ix86_finalize_stack_realign_flags ();
11922 ix86_compute_frame_layout (&frame);
11923 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11924
11925 /* This is the label we will branch to if we have enough stack
11926 space. We expect the basic block reordering pass to reverse this
11927 branch if optimizing, so that we branch in the unlikely case. */
11928 label = gen_label_rtx ();
11929
11930 /* We need to compare the stack pointer minus the frame size with
11931 the stack boundary in the TCB. The stack boundary always gives
11932 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11933 can compare directly. Otherwise we need to do an addition. */
11934
11935 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11936 UNSPEC_STACK_CHECK);
11937 limit = gen_rtx_CONST (Pmode, limit);
11938 limit = gen_rtx_MEM (Pmode, limit);
11939 if (allocate < SPLIT_STACK_AVAILABLE)
11940 current = stack_pointer_rtx;
11941 else
11942 {
11943 unsigned int scratch_regno;
11944 rtx offset;
11945
11946 /* We need a scratch register to hold the stack pointer minus
11947 the required frame size. Since this is the very start of the
11948 function, the scratch register can be any caller-saved
11949 register which is not used for parameters. */
11950 offset = GEN_INT (- allocate);
11951 scratch_regno = split_stack_prologue_scratch_regno ();
11952 if (scratch_regno == INVALID_REGNUM)
11953 return;
11954 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11955 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11956 {
11957 /* We don't use ix86_gen_add3 in this case because it will
11958 want to split to lea, but when not optimizing the insn
11959 will not be split after this point. */
11960 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11961 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11962 offset)));
11963 }
11964 else
11965 {
11966 emit_move_insn (scratch_reg, offset);
11967 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11968 stack_pointer_rtx));
11969 }
11970 current = scratch_reg;
11971 }
11972
11973 ix86_expand_branch (GEU, current, limit, label);
11974 jump_insn = get_last_insn ();
11975 JUMP_LABEL (jump_insn) = label;
11976
11977 /* Mark the jump as very likely to be taken. */
11978 add_int_reg_note (jump_insn, REG_BR_PROB,
11979 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11980
11981 if (split_stack_fn == NULL_RTX)
11982 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11983 fn = split_stack_fn;
11984
11985 /* Get more stack space. We pass in the desired stack space and the
11986 size of the arguments to copy to the new stack. In 32-bit mode
11987 we push the parameters; __morestack will return on a new stack
11988 anyhow. In 64-bit mode we pass the parameters in r10 and
11989 r11. */
11990 allocate_rtx = GEN_INT (allocate);
11991 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11992 call_fusage = NULL_RTX;
11993 if (TARGET_64BIT)
11994 {
11995 rtx reg10, reg11;
11996
11997 reg10 = gen_rtx_REG (Pmode, R10_REG);
11998 reg11 = gen_rtx_REG (Pmode, R11_REG);
11999
12000 /* If this function uses a static chain, it will be in %r10.
12001 Preserve it across the call to __morestack. */
12002 if (DECL_STATIC_CHAIN (cfun->decl))
12003 {
12004 rtx rax;
12005
12006 rax = gen_rtx_REG (word_mode, AX_REG);
12007 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12008 use_reg (&call_fusage, rax);
12009 }
12010
12011 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12012 && !TARGET_PECOFF)
12013 {
12014 HOST_WIDE_INT argval;
12015
12016 gcc_assert (Pmode == DImode);
12017 /* When using the large model we need to load the address
12018 into a register, and we've run out of registers. So we
12019 switch to a different calling convention, and we call a
12020 different function: __morestack_large. We pass the
12021 argument size in the upper 32 bits of r10 and pass the
12022 frame size in the lower 32 bits. */
12023 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12024 gcc_assert ((args_size & 0xffffffff) == args_size);
12025
12026 if (split_stack_fn_large == NULL_RTX)
12027 split_stack_fn_large =
12028 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12029
12030 if (ix86_cmodel == CM_LARGE_PIC)
12031 {
12032 rtx_code_label *label;
12033 rtx x;
12034
12035 label = gen_label_rtx ();
12036 emit_label (label);
12037 LABEL_PRESERVE_P (label) = 1;
12038 emit_insn (gen_set_rip_rex64 (reg10, label));
12039 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12040 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12041 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12042 UNSPEC_GOT);
12043 x = gen_rtx_CONST (Pmode, x);
12044 emit_move_insn (reg11, x);
12045 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12046 x = gen_const_mem (Pmode, x);
12047 emit_move_insn (reg11, x);
12048 }
12049 else
12050 emit_move_insn (reg11, split_stack_fn_large);
12051
12052 fn = reg11;
12053
12054 argval = ((args_size << 16) << 16) + allocate;
12055 emit_move_insn (reg10, GEN_INT (argval));
12056 }
12057 else
12058 {
12059 emit_move_insn (reg10, allocate_rtx);
12060 emit_move_insn (reg11, GEN_INT (args_size));
12061 use_reg (&call_fusage, reg11);
12062 }
12063
12064 use_reg (&call_fusage, reg10);
12065 }
12066 else
12067 {
12068 emit_insn (gen_push (GEN_INT (args_size)));
12069 emit_insn (gen_push (allocate_rtx));
12070 }
12071 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12072 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12073 NULL_RTX, false);
12074 add_function_usage_to (call_insn, call_fusage);
12075
12076 /* In order to make call/return prediction work right, we now need
12077 to execute a return instruction. See
12078 libgcc/config/i386/morestack.S for the details on how this works.
12079
12080 For flow purposes gcc must not see this as a return
12081 instruction--we need control flow to continue at the subsequent
12082 label. Therefore, we use an unspec. */
12083 gcc_assert (crtl->args.pops_args < 65536);
12084 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12085
12086 /* If we are in 64-bit mode and this function uses a static chain,
12087 we saved %r10 in %rax before calling _morestack. */
12088 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12089 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12090 gen_rtx_REG (word_mode, AX_REG));
12091
12092 /* If this function calls va_start, we need to store a pointer to
12093 the arguments on the old stack, because they may not have been
12094 all copied to the new stack. At this point the old stack can be
12095 found at the frame pointer value used by __morestack, because
12096 __morestack has set that up before calling back to us. Here we
12097 store that pointer in a scratch register, and in
12098 ix86_expand_prologue we store the scratch register in a stack
12099 slot. */
12100 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12101 {
12102 unsigned int scratch_regno;
12103 rtx frame_reg;
12104 int words;
12105
12106 scratch_regno = split_stack_prologue_scratch_regno ();
12107 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12108 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12109
12110 /* 64-bit:
12111 fp -> old fp value
12112 return address within this function
12113 return address of caller of this function
12114 stack arguments
12115 So we add three words to get to the stack arguments.
12116
12117 32-bit:
12118 fp -> old fp value
12119 return address within this function
12120 first argument to __morestack
12121 second argument to __morestack
12122 return address of caller of this function
12123 stack arguments
12124 So we add five words to get to the stack arguments.
12125 */
12126 words = TARGET_64BIT ? 3 : 5;
12127 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12128 gen_rtx_PLUS (Pmode, frame_reg,
12129 GEN_INT (words * UNITS_PER_WORD))));
12130
12131 varargs_label = gen_label_rtx ();
12132 emit_jump_insn (gen_jump (varargs_label));
12133 JUMP_LABEL (get_last_insn ()) = varargs_label;
12134
12135 emit_barrier ();
12136 }
12137
12138 emit_label (label);
12139 LABEL_NUSES (label) = 1;
12140
12141 /* If this function calls va_start, we now have to set the scratch
12142 register for the case where we do not call __morestack. In this
12143 case we need to set it based on the stack pointer. */
12144 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12145 {
12146 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12147 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12148 GEN_INT (UNITS_PER_WORD))));
12149
12150 emit_label (varargs_label);
12151 LABEL_NUSES (varargs_label) = 1;
12152 }
12153 }
12154
12155 /* We may have to tell the dataflow pass that the split stack prologue
12156 is initializing a scratch register. */
12157
12158 static void
12159 ix86_live_on_entry (bitmap regs)
12160 {
12161 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12162 {
12163 gcc_assert (flag_split_stack);
12164 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12165 }
12166 }
12167 \f
12168 /* Extract the parts of an RTL expression that is a valid memory address
12169 for an instruction. Return 0 if the structure of the address is
12170 grossly off. Return -1 if the address contains ASHIFT, so it is not
12171 strictly valid, but still used for computing length of lea instruction. */
12172
12173 int
12174 ix86_decompose_address (rtx addr, struct ix86_address *out)
12175 {
12176 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12177 rtx base_reg, index_reg;
12178 HOST_WIDE_INT scale = 1;
12179 rtx scale_rtx = NULL_RTX;
12180 rtx tmp;
12181 int retval = 1;
12182 enum ix86_address_seg seg = SEG_DEFAULT;
12183
12184 /* Allow zero-extended SImode addresses,
12185 they will be emitted with addr32 prefix. */
12186 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12187 {
12188 if (GET_CODE (addr) == ZERO_EXTEND
12189 && GET_MODE (XEXP (addr, 0)) == SImode)
12190 {
12191 addr = XEXP (addr, 0);
12192 if (CONST_INT_P (addr))
12193 return 0;
12194 }
12195 else if (GET_CODE (addr) == AND
12196 && const_32bit_mask (XEXP (addr, 1), DImode))
12197 {
12198 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12199 if (addr == NULL_RTX)
12200 return 0;
12201
12202 if (CONST_INT_P (addr))
12203 return 0;
12204 }
12205 }
12206
12207 /* Allow SImode subregs of DImode addresses,
12208 they will be emitted with addr32 prefix. */
12209 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12210 {
12211 if (GET_CODE (addr) == SUBREG
12212 && GET_MODE (SUBREG_REG (addr)) == DImode)
12213 {
12214 addr = SUBREG_REG (addr);
12215 if (CONST_INT_P (addr))
12216 return 0;
12217 }
12218 }
12219
12220 if (REG_P (addr))
12221 base = addr;
12222 else if (GET_CODE (addr) == SUBREG)
12223 {
12224 if (REG_P (SUBREG_REG (addr)))
12225 base = addr;
12226 else
12227 return 0;
12228 }
12229 else if (GET_CODE (addr) == PLUS)
12230 {
12231 rtx addends[4], op;
12232 int n = 0, i;
12233
12234 op = addr;
12235 do
12236 {
12237 if (n >= 4)
12238 return 0;
12239 addends[n++] = XEXP (op, 1);
12240 op = XEXP (op, 0);
12241 }
12242 while (GET_CODE (op) == PLUS);
12243 if (n >= 4)
12244 return 0;
12245 addends[n] = op;
12246
12247 for (i = n; i >= 0; --i)
12248 {
12249 op = addends[i];
12250 switch (GET_CODE (op))
12251 {
12252 case MULT:
12253 if (index)
12254 return 0;
12255 index = XEXP (op, 0);
12256 scale_rtx = XEXP (op, 1);
12257 break;
12258
12259 case ASHIFT:
12260 if (index)
12261 return 0;
12262 index = XEXP (op, 0);
12263 tmp = XEXP (op, 1);
12264 if (!CONST_INT_P (tmp))
12265 return 0;
12266 scale = INTVAL (tmp);
12267 if ((unsigned HOST_WIDE_INT) scale > 3)
12268 return 0;
12269 scale = 1 << scale;
12270 break;
12271
12272 case ZERO_EXTEND:
12273 op = XEXP (op, 0);
12274 if (GET_CODE (op) != UNSPEC)
12275 return 0;
12276 /* FALLTHRU */
12277
12278 case UNSPEC:
12279 if (XINT (op, 1) == UNSPEC_TP
12280 && TARGET_TLS_DIRECT_SEG_REFS
12281 && seg == SEG_DEFAULT)
12282 seg = DEFAULT_TLS_SEG_REG;
12283 else
12284 return 0;
12285 break;
12286
12287 case SUBREG:
12288 if (!REG_P (SUBREG_REG (op)))
12289 return 0;
12290 /* FALLTHRU */
12291
12292 case REG:
12293 if (!base)
12294 base = op;
12295 else if (!index)
12296 index = op;
12297 else
12298 return 0;
12299 break;
12300
12301 case CONST:
12302 case CONST_INT:
12303 case SYMBOL_REF:
12304 case LABEL_REF:
12305 if (disp)
12306 return 0;
12307 disp = op;
12308 break;
12309
12310 default:
12311 return 0;
12312 }
12313 }
12314 }
12315 else if (GET_CODE (addr) == MULT)
12316 {
12317 index = XEXP (addr, 0); /* index*scale */
12318 scale_rtx = XEXP (addr, 1);
12319 }
12320 else if (GET_CODE (addr) == ASHIFT)
12321 {
12322 /* We're called for lea too, which implements ashift on occasion. */
12323 index = XEXP (addr, 0);
12324 tmp = XEXP (addr, 1);
12325 if (!CONST_INT_P (tmp))
12326 return 0;
12327 scale = INTVAL (tmp);
12328 if ((unsigned HOST_WIDE_INT) scale > 3)
12329 return 0;
12330 scale = 1 << scale;
12331 retval = -1;
12332 }
12333 else
12334 disp = addr; /* displacement */
12335
12336 if (index)
12337 {
12338 if (REG_P (index))
12339 ;
12340 else if (GET_CODE (index) == SUBREG
12341 && REG_P (SUBREG_REG (index)))
12342 ;
12343 else
12344 return 0;
12345 }
12346
12347 /* Extract the integral value of scale. */
12348 if (scale_rtx)
12349 {
12350 if (!CONST_INT_P (scale_rtx))
12351 return 0;
12352 scale = INTVAL (scale_rtx);
12353 }
12354
12355 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12356 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12357
12358 /* Avoid useless 0 displacement. */
12359 if (disp == const0_rtx && (base || index))
12360 disp = NULL_RTX;
12361
12362 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12363 if (base_reg && index_reg && scale == 1
12364 && (index_reg == arg_pointer_rtx
12365 || index_reg == frame_pointer_rtx
12366 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12367 {
12368 rtx tmp;
12369 tmp = base, base = index, index = tmp;
12370 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12371 }
12372
12373 /* Special case: %ebp cannot be encoded as a base without a displacement.
12374 Similarly %r13. */
12375 if (!disp
12376 && base_reg
12377 && (base_reg == hard_frame_pointer_rtx
12378 || base_reg == frame_pointer_rtx
12379 || base_reg == arg_pointer_rtx
12380 || (REG_P (base_reg)
12381 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12382 || REGNO (base_reg) == R13_REG))))
12383 disp = const0_rtx;
12384
12385 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12386 Avoid this by transforming to [%esi+0].
12387 Reload calls address legitimization without cfun defined, so we need
12388 to test cfun for being non-NULL. */
12389 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12390 && base_reg && !index_reg && !disp
12391 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12392 disp = const0_rtx;
12393
12394 /* Special case: encode reg+reg instead of reg*2. */
12395 if (!base && index && scale == 2)
12396 base = index, base_reg = index_reg, scale = 1;
12397
12398 /* Special case: scaling cannot be encoded without base or displacement. */
12399 if (!base && !disp && index && scale != 1)
12400 disp = const0_rtx;
12401
12402 out->base = base;
12403 out->index = index;
12404 out->disp = disp;
12405 out->scale = scale;
12406 out->seg = seg;
12407
12408 return retval;
12409 }
12410 \f
12411 /* Return cost of the memory address x.
12412 For i386, it is better to use a complex address than let gcc copy
12413 the address into a reg and make a new pseudo. But not if the address
12414 requires to two regs - that would mean more pseudos with longer
12415 lifetimes. */
12416 static int
12417 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12418 {
12419 struct ix86_address parts;
12420 int cost = 1;
12421 int ok = ix86_decompose_address (x, &parts);
12422
12423 gcc_assert (ok);
12424
12425 if (parts.base && GET_CODE (parts.base) == SUBREG)
12426 parts.base = SUBREG_REG (parts.base);
12427 if (parts.index && GET_CODE (parts.index) == SUBREG)
12428 parts.index = SUBREG_REG (parts.index);
12429
12430 /* Attempt to minimize number of registers in the address. */
12431 if ((parts.base
12432 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12433 || (parts.index
12434 && (!REG_P (parts.index)
12435 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12436 cost++;
12437
12438 if (parts.base
12439 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12440 && parts.index
12441 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12442 && parts.base != parts.index)
12443 cost++;
12444
12445 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12446 since it's predecode logic can't detect the length of instructions
12447 and it degenerates to vector decoded. Increase cost of such
12448 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12449 to split such addresses or even refuse such addresses at all.
12450
12451 Following addressing modes are affected:
12452 [base+scale*index]
12453 [scale*index+disp]
12454 [base+index]
12455
12456 The first and last case may be avoidable by explicitly coding the zero in
12457 memory address, but I don't have AMD-K6 machine handy to check this
12458 theory. */
12459
12460 if (TARGET_K6
12461 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12462 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12463 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12464 cost += 10;
12465
12466 return cost;
12467 }
12468 \f
12469 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12470 this is used for to form addresses to local data when -fPIC is in
12471 use. */
12472
12473 static bool
12474 darwin_local_data_pic (rtx disp)
12475 {
12476 return (GET_CODE (disp) == UNSPEC
12477 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12478 }
12479
12480 /* Determine if a given RTX is a valid constant. We already know this
12481 satisfies CONSTANT_P. */
12482
12483 static bool
12484 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12485 {
12486 switch (GET_CODE (x))
12487 {
12488 case CONST:
12489 x = XEXP (x, 0);
12490
12491 if (GET_CODE (x) == PLUS)
12492 {
12493 if (!CONST_INT_P (XEXP (x, 1)))
12494 return false;
12495 x = XEXP (x, 0);
12496 }
12497
12498 if (TARGET_MACHO && darwin_local_data_pic (x))
12499 return true;
12500
12501 /* Only some unspecs are valid as "constants". */
12502 if (GET_CODE (x) == UNSPEC)
12503 switch (XINT (x, 1))
12504 {
12505 case UNSPEC_GOT:
12506 case UNSPEC_GOTOFF:
12507 case UNSPEC_PLTOFF:
12508 return TARGET_64BIT;
12509 case UNSPEC_TPOFF:
12510 case UNSPEC_NTPOFF:
12511 x = XVECEXP (x, 0, 0);
12512 return (GET_CODE (x) == SYMBOL_REF
12513 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12514 case UNSPEC_DTPOFF:
12515 x = XVECEXP (x, 0, 0);
12516 return (GET_CODE (x) == SYMBOL_REF
12517 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12518 default:
12519 return false;
12520 }
12521
12522 /* We must have drilled down to a symbol. */
12523 if (GET_CODE (x) == LABEL_REF)
12524 return true;
12525 if (GET_CODE (x) != SYMBOL_REF)
12526 return false;
12527 /* FALLTHRU */
12528
12529 case SYMBOL_REF:
12530 /* TLS symbols are never valid. */
12531 if (SYMBOL_REF_TLS_MODEL (x))
12532 return false;
12533
12534 /* DLLIMPORT symbols are never valid. */
12535 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12536 && SYMBOL_REF_DLLIMPORT_P (x))
12537 return false;
12538
12539 #if TARGET_MACHO
12540 /* mdynamic-no-pic */
12541 if (MACHO_DYNAMIC_NO_PIC_P)
12542 return machopic_symbol_defined_p (x);
12543 #endif
12544 break;
12545
12546 case CONST_DOUBLE:
12547 if (GET_MODE (x) == TImode
12548 && x != CONST0_RTX (TImode)
12549 && !TARGET_64BIT)
12550 return false;
12551 break;
12552
12553 case CONST_VECTOR:
12554 if (!standard_sse_constant_p (x))
12555 return false;
12556
12557 default:
12558 break;
12559 }
12560
12561 /* Otherwise we handle everything else in the move patterns. */
12562 return true;
12563 }
12564
12565 /* Determine if it's legal to put X into the constant pool. This
12566 is not possible for the address of thread-local symbols, which
12567 is checked above. */
12568
12569 static bool
12570 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12571 {
12572 /* We can always put integral constants and vectors in memory. */
12573 switch (GET_CODE (x))
12574 {
12575 case CONST_INT:
12576 case CONST_DOUBLE:
12577 case CONST_VECTOR:
12578 return false;
12579
12580 default:
12581 break;
12582 }
12583 return !ix86_legitimate_constant_p (mode, x);
12584 }
12585
12586 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12587 otherwise zero. */
12588
12589 static bool
12590 is_imported_p (rtx x)
12591 {
12592 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12593 || GET_CODE (x) != SYMBOL_REF)
12594 return false;
12595
12596 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12597 }
12598
12599
12600 /* Nonzero if the constant value X is a legitimate general operand
12601 when generating PIC code. It is given that flag_pic is on and
12602 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12603
12604 bool
12605 legitimate_pic_operand_p (rtx x)
12606 {
12607 rtx inner;
12608
12609 switch (GET_CODE (x))
12610 {
12611 case CONST:
12612 inner = XEXP (x, 0);
12613 if (GET_CODE (inner) == PLUS
12614 && CONST_INT_P (XEXP (inner, 1)))
12615 inner = XEXP (inner, 0);
12616
12617 /* Only some unspecs are valid as "constants". */
12618 if (GET_CODE (inner) == UNSPEC)
12619 switch (XINT (inner, 1))
12620 {
12621 case UNSPEC_GOT:
12622 case UNSPEC_GOTOFF:
12623 case UNSPEC_PLTOFF:
12624 return TARGET_64BIT;
12625 case UNSPEC_TPOFF:
12626 x = XVECEXP (inner, 0, 0);
12627 return (GET_CODE (x) == SYMBOL_REF
12628 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12629 case UNSPEC_MACHOPIC_OFFSET:
12630 return legitimate_pic_address_disp_p (x);
12631 default:
12632 return false;
12633 }
12634 /* FALLTHRU */
12635
12636 case SYMBOL_REF:
12637 case LABEL_REF:
12638 return legitimate_pic_address_disp_p (x);
12639
12640 default:
12641 return true;
12642 }
12643 }
12644
12645 /* Determine if a given CONST RTX is a valid memory displacement
12646 in PIC mode. */
12647
12648 bool
12649 legitimate_pic_address_disp_p (rtx disp)
12650 {
12651 bool saw_plus;
12652
12653 /* In 64bit mode we can allow direct addresses of symbols and labels
12654 when they are not dynamic symbols. */
12655 if (TARGET_64BIT)
12656 {
12657 rtx op0 = disp, op1;
12658
12659 switch (GET_CODE (disp))
12660 {
12661 case LABEL_REF:
12662 return true;
12663
12664 case CONST:
12665 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12666 break;
12667 op0 = XEXP (XEXP (disp, 0), 0);
12668 op1 = XEXP (XEXP (disp, 0), 1);
12669 if (!CONST_INT_P (op1)
12670 || INTVAL (op1) >= 16*1024*1024
12671 || INTVAL (op1) < -16*1024*1024)
12672 break;
12673 if (GET_CODE (op0) == LABEL_REF)
12674 return true;
12675 if (GET_CODE (op0) == CONST
12676 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12677 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12678 return true;
12679 if (GET_CODE (op0) == UNSPEC
12680 && XINT (op0, 1) == UNSPEC_PCREL)
12681 return true;
12682 if (GET_CODE (op0) != SYMBOL_REF)
12683 break;
12684 /* FALLTHRU */
12685
12686 case SYMBOL_REF:
12687 /* TLS references should always be enclosed in UNSPEC.
12688 The dllimported symbol needs always to be resolved. */
12689 if (SYMBOL_REF_TLS_MODEL (op0)
12690 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12691 return false;
12692
12693 if (TARGET_PECOFF)
12694 {
12695 if (is_imported_p (op0))
12696 return true;
12697
12698 if (SYMBOL_REF_FAR_ADDR_P (op0)
12699 || !SYMBOL_REF_LOCAL_P (op0))
12700 break;
12701
12702 /* Function-symbols need to be resolved only for
12703 large-model.
12704 For the small-model we don't need to resolve anything
12705 here. */
12706 if ((ix86_cmodel != CM_LARGE_PIC
12707 && SYMBOL_REF_FUNCTION_P (op0))
12708 || ix86_cmodel == CM_SMALL_PIC)
12709 return true;
12710 /* Non-external symbols don't need to be resolved for
12711 large, and medium-model. */
12712 if ((ix86_cmodel == CM_LARGE_PIC
12713 || ix86_cmodel == CM_MEDIUM_PIC)
12714 && !SYMBOL_REF_EXTERNAL_P (op0))
12715 return true;
12716 }
12717 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12718 && SYMBOL_REF_LOCAL_P (op0)
12719 && ix86_cmodel != CM_LARGE_PIC)
12720 return true;
12721 break;
12722
12723 default:
12724 break;
12725 }
12726 }
12727 if (GET_CODE (disp) != CONST)
12728 return false;
12729 disp = XEXP (disp, 0);
12730
12731 if (TARGET_64BIT)
12732 {
12733 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12734 of GOT tables. We should not need these anyway. */
12735 if (GET_CODE (disp) != UNSPEC
12736 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12737 && XINT (disp, 1) != UNSPEC_GOTOFF
12738 && XINT (disp, 1) != UNSPEC_PCREL
12739 && XINT (disp, 1) != UNSPEC_PLTOFF))
12740 return false;
12741
12742 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12743 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12744 return false;
12745 return true;
12746 }
12747
12748 saw_plus = false;
12749 if (GET_CODE (disp) == PLUS)
12750 {
12751 if (!CONST_INT_P (XEXP (disp, 1)))
12752 return false;
12753 disp = XEXP (disp, 0);
12754 saw_plus = true;
12755 }
12756
12757 if (TARGET_MACHO && darwin_local_data_pic (disp))
12758 return true;
12759
12760 if (GET_CODE (disp) != UNSPEC)
12761 return false;
12762
12763 switch (XINT (disp, 1))
12764 {
12765 case UNSPEC_GOT:
12766 if (saw_plus)
12767 return false;
12768 /* We need to check for both symbols and labels because VxWorks loads
12769 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12770 details. */
12771 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12772 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12773 case UNSPEC_GOTOFF:
12774 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12775 While ABI specify also 32bit relocation but we don't produce it in
12776 small PIC model at all. */
12777 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12778 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12779 && !TARGET_64BIT)
12780 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12781 return false;
12782 case UNSPEC_GOTTPOFF:
12783 case UNSPEC_GOTNTPOFF:
12784 case UNSPEC_INDNTPOFF:
12785 if (saw_plus)
12786 return false;
12787 disp = XVECEXP (disp, 0, 0);
12788 return (GET_CODE (disp) == SYMBOL_REF
12789 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12790 case UNSPEC_NTPOFF:
12791 disp = XVECEXP (disp, 0, 0);
12792 return (GET_CODE (disp) == SYMBOL_REF
12793 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12794 case UNSPEC_DTPOFF:
12795 disp = XVECEXP (disp, 0, 0);
12796 return (GET_CODE (disp) == SYMBOL_REF
12797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12798 }
12799
12800 return false;
12801 }
12802
12803 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12804 replace the input X, or the original X if no replacement is called for.
12805 The output parameter *WIN is 1 if the calling macro should goto WIN,
12806 0 if it should not. */
12807
12808 bool
12809 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12810 int)
12811 {
12812 /* Reload can generate:
12813
12814 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12815 (reg:DI 97))
12816 (reg:DI 2 cx))
12817
12818 This RTX is rejected from ix86_legitimate_address_p due to
12819 non-strictness of base register 97. Following this rejection,
12820 reload pushes all three components into separate registers,
12821 creating invalid memory address RTX.
12822
12823 Following code reloads only the invalid part of the
12824 memory address RTX. */
12825
12826 if (GET_CODE (x) == PLUS
12827 && REG_P (XEXP (x, 1))
12828 && GET_CODE (XEXP (x, 0)) == PLUS
12829 && REG_P (XEXP (XEXP (x, 0), 1)))
12830 {
12831 rtx base, index;
12832 bool something_reloaded = false;
12833
12834 base = XEXP (XEXP (x, 0), 1);
12835 if (!REG_OK_FOR_BASE_STRICT_P (base))
12836 {
12837 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12838 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12839 opnum, (enum reload_type) type);
12840 something_reloaded = true;
12841 }
12842
12843 index = XEXP (x, 1);
12844 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12845 {
12846 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12847 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12848 opnum, (enum reload_type) type);
12849 something_reloaded = true;
12850 }
12851
12852 gcc_assert (something_reloaded);
12853 return true;
12854 }
12855
12856 return false;
12857 }
12858
12859 /* Determine if op is suitable RTX for an address register.
12860 Return naked register if a register or a register subreg is
12861 found, otherwise return NULL_RTX. */
12862
12863 static rtx
12864 ix86_validate_address_register (rtx op)
12865 {
12866 enum machine_mode mode = GET_MODE (op);
12867
12868 /* Only SImode or DImode registers can form the address. */
12869 if (mode != SImode && mode != DImode)
12870 return NULL_RTX;
12871
12872 if (REG_P (op))
12873 return op;
12874 else if (GET_CODE (op) == SUBREG)
12875 {
12876 rtx reg = SUBREG_REG (op);
12877
12878 if (!REG_P (reg))
12879 return NULL_RTX;
12880
12881 mode = GET_MODE (reg);
12882
12883 /* Don't allow SUBREGs that span more than a word. It can
12884 lead to spill failures when the register is one word out
12885 of a two word structure. */
12886 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12887 return NULL_RTX;
12888
12889 /* Allow only SUBREGs of non-eliminable hard registers. */
12890 if (register_no_elim_operand (reg, mode))
12891 return reg;
12892 }
12893
12894 /* Op is not a register. */
12895 return NULL_RTX;
12896 }
12897
12898 /* Recognizes RTL expressions that are valid memory addresses for an
12899 instruction. The MODE argument is the machine mode for the MEM
12900 expression that wants to use this address.
12901
12902 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12903 convert common non-canonical forms to canonical form so that they will
12904 be recognized. */
12905
12906 static bool
12907 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12908 {
12909 struct ix86_address parts;
12910 rtx base, index, disp;
12911 HOST_WIDE_INT scale;
12912 enum ix86_address_seg seg;
12913
12914 if (ix86_decompose_address (addr, &parts) <= 0)
12915 /* Decomposition failed. */
12916 return false;
12917
12918 base = parts.base;
12919 index = parts.index;
12920 disp = parts.disp;
12921 scale = parts.scale;
12922 seg = parts.seg;
12923
12924 /* Validate base register. */
12925 if (base)
12926 {
12927 rtx reg = ix86_validate_address_register (base);
12928
12929 if (reg == NULL_RTX)
12930 return false;
12931
12932 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12933 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12934 /* Base is not valid. */
12935 return false;
12936 }
12937
12938 /* Validate index register. */
12939 if (index)
12940 {
12941 rtx reg = ix86_validate_address_register (index);
12942
12943 if (reg == NULL_RTX)
12944 return false;
12945
12946 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12947 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12948 /* Index is not valid. */
12949 return false;
12950 }
12951
12952 /* Index and base should have the same mode. */
12953 if (base && index
12954 && GET_MODE (base) != GET_MODE (index))
12955 return false;
12956
12957 /* Address override works only on the (%reg) part of %fs:(%reg). */
12958 if (seg != SEG_DEFAULT
12959 && ((base && GET_MODE (base) != word_mode)
12960 || (index && GET_MODE (index) != word_mode)))
12961 return false;
12962
12963 /* Validate scale factor. */
12964 if (scale != 1)
12965 {
12966 if (!index)
12967 /* Scale without index. */
12968 return false;
12969
12970 if (scale != 2 && scale != 4 && scale != 8)
12971 /* Scale is not a valid multiplier. */
12972 return false;
12973 }
12974
12975 /* Validate displacement. */
12976 if (disp)
12977 {
12978 if (GET_CODE (disp) == CONST
12979 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12980 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12981 switch (XINT (XEXP (disp, 0), 1))
12982 {
12983 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12984 used. While ABI specify also 32bit relocations, we don't produce
12985 them at all and use IP relative instead. */
12986 case UNSPEC_GOT:
12987 case UNSPEC_GOTOFF:
12988 gcc_assert (flag_pic);
12989 if (!TARGET_64BIT)
12990 goto is_legitimate_pic;
12991
12992 /* 64bit address unspec. */
12993 return false;
12994
12995 case UNSPEC_GOTPCREL:
12996 case UNSPEC_PCREL:
12997 gcc_assert (flag_pic);
12998 goto is_legitimate_pic;
12999
13000 case UNSPEC_GOTTPOFF:
13001 case UNSPEC_GOTNTPOFF:
13002 case UNSPEC_INDNTPOFF:
13003 case UNSPEC_NTPOFF:
13004 case UNSPEC_DTPOFF:
13005 break;
13006
13007 case UNSPEC_STACK_CHECK:
13008 gcc_assert (flag_split_stack);
13009 break;
13010
13011 default:
13012 /* Invalid address unspec. */
13013 return false;
13014 }
13015
13016 else if (SYMBOLIC_CONST (disp)
13017 && (flag_pic
13018 || (TARGET_MACHO
13019 #if TARGET_MACHO
13020 && MACHOPIC_INDIRECT
13021 && !machopic_operand_p (disp)
13022 #endif
13023 )))
13024 {
13025
13026 is_legitimate_pic:
13027 if (TARGET_64BIT && (index || base))
13028 {
13029 /* foo@dtpoff(%rX) is ok. */
13030 if (GET_CODE (disp) != CONST
13031 || GET_CODE (XEXP (disp, 0)) != PLUS
13032 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13033 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13034 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13035 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13036 /* Non-constant pic memory reference. */
13037 return false;
13038 }
13039 else if ((!TARGET_MACHO || flag_pic)
13040 && ! legitimate_pic_address_disp_p (disp))
13041 /* Displacement is an invalid pic construct. */
13042 return false;
13043 #if TARGET_MACHO
13044 else if (MACHO_DYNAMIC_NO_PIC_P
13045 && !ix86_legitimate_constant_p (Pmode, disp))
13046 /* displacment must be referenced via non_lazy_pointer */
13047 return false;
13048 #endif
13049
13050 /* This code used to verify that a symbolic pic displacement
13051 includes the pic_offset_table_rtx register.
13052
13053 While this is good idea, unfortunately these constructs may
13054 be created by "adds using lea" optimization for incorrect
13055 code like:
13056
13057 int a;
13058 int foo(int i)
13059 {
13060 return *(&a+i);
13061 }
13062
13063 This code is nonsensical, but results in addressing
13064 GOT table with pic_offset_table_rtx base. We can't
13065 just refuse it easily, since it gets matched by
13066 "addsi3" pattern, that later gets split to lea in the
13067 case output register differs from input. While this
13068 can be handled by separate addsi pattern for this case
13069 that never results in lea, this seems to be easier and
13070 correct fix for crash to disable this test. */
13071 }
13072 else if (GET_CODE (disp) != LABEL_REF
13073 && !CONST_INT_P (disp)
13074 && (GET_CODE (disp) != CONST
13075 || !ix86_legitimate_constant_p (Pmode, disp))
13076 && (GET_CODE (disp) != SYMBOL_REF
13077 || !ix86_legitimate_constant_p (Pmode, disp)))
13078 /* Displacement is not constant. */
13079 return false;
13080 else if (TARGET_64BIT
13081 && !x86_64_immediate_operand (disp, VOIDmode))
13082 /* Displacement is out of range. */
13083 return false;
13084 /* In x32 mode, constant addresses are sign extended to 64bit, so
13085 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13086 else if (TARGET_X32 && !(index || base)
13087 && CONST_INT_P (disp)
13088 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13089 return false;
13090 }
13091
13092 /* Everything looks valid. */
13093 return true;
13094 }
13095
13096 /* Determine if a given RTX is a valid constant address. */
13097
13098 bool
13099 constant_address_p (rtx x)
13100 {
13101 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13102 }
13103 \f
13104 /* Return a unique alias set for the GOT. */
13105
13106 static alias_set_type
13107 ix86_GOT_alias_set (void)
13108 {
13109 static alias_set_type set = -1;
13110 if (set == -1)
13111 set = new_alias_set ();
13112 return set;
13113 }
13114
13115 /* Return a legitimate reference for ORIG (an address) using the
13116 register REG. If REG is 0, a new pseudo is generated.
13117
13118 There are two types of references that must be handled:
13119
13120 1. Global data references must load the address from the GOT, via
13121 the PIC reg. An insn is emitted to do this load, and the reg is
13122 returned.
13123
13124 2. Static data references, constant pool addresses, and code labels
13125 compute the address as an offset from the GOT, whose base is in
13126 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13127 differentiate them from global data objects. The returned
13128 address is the PIC reg + an unspec constant.
13129
13130 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13131 reg also appears in the address. */
13132
13133 static rtx
13134 legitimize_pic_address (rtx orig, rtx reg)
13135 {
13136 rtx addr = orig;
13137 rtx new_rtx = orig;
13138
13139 #if TARGET_MACHO
13140 if (TARGET_MACHO && !TARGET_64BIT)
13141 {
13142 if (reg == 0)
13143 reg = gen_reg_rtx (Pmode);
13144 /* Use the generic Mach-O PIC machinery. */
13145 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13146 }
13147 #endif
13148
13149 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13150 {
13151 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13152 if (tmp)
13153 return tmp;
13154 }
13155
13156 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13157 new_rtx = addr;
13158 else if (TARGET_64BIT && !TARGET_PECOFF
13159 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13160 {
13161 rtx tmpreg;
13162 /* This symbol may be referenced via a displacement from the PIC
13163 base address (@GOTOFF). */
13164
13165 if (reload_in_progress)
13166 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13167 if (GET_CODE (addr) == CONST)
13168 addr = XEXP (addr, 0);
13169 if (GET_CODE (addr) == PLUS)
13170 {
13171 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13172 UNSPEC_GOTOFF);
13173 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13174 }
13175 else
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13177 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13178 if (!reg)
13179 tmpreg = gen_reg_rtx (Pmode);
13180 else
13181 tmpreg = reg;
13182 emit_move_insn (tmpreg, new_rtx);
13183
13184 if (reg != 0)
13185 {
13186 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13187 tmpreg, 1, OPTAB_DIRECT);
13188 new_rtx = reg;
13189 }
13190 else
13191 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13192 }
13193 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13194 {
13195 /* This symbol may be referenced via a displacement from the PIC
13196 base address (@GOTOFF). */
13197
13198 if (reload_in_progress)
13199 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13200 if (GET_CODE (addr) == CONST)
13201 addr = XEXP (addr, 0);
13202 if (GET_CODE (addr) == PLUS)
13203 {
13204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13205 UNSPEC_GOTOFF);
13206 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13207 }
13208 else
13209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13210 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13211 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13212
13213 if (reg != 0)
13214 {
13215 emit_move_insn (reg, new_rtx);
13216 new_rtx = reg;
13217 }
13218 }
13219 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13220 /* We can't use @GOTOFF for text labels on VxWorks;
13221 see gotoff_operand. */
13222 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13223 {
13224 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13225 if (tmp)
13226 return tmp;
13227
13228 /* For x64 PE-COFF there is no GOT table. So we use address
13229 directly. */
13230 if (TARGET_64BIT && TARGET_PECOFF)
13231 {
13232 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13233 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13234
13235 if (reg == 0)
13236 reg = gen_reg_rtx (Pmode);
13237 emit_move_insn (reg, new_rtx);
13238 new_rtx = reg;
13239 }
13240 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13241 {
13242 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13243 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13244 new_rtx = gen_const_mem (Pmode, new_rtx);
13245 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13246
13247 if (reg == 0)
13248 reg = gen_reg_rtx (Pmode);
13249 /* Use directly gen_movsi, otherwise the address is loaded
13250 into register for CSE. We don't want to CSE this addresses,
13251 instead we CSE addresses from the GOT table, so skip this. */
13252 emit_insn (gen_movsi (reg, new_rtx));
13253 new_rtx = reg;
13254 }
13255 else
13256 {
13257 /* This symbol must be referenced via a load from the
13258 Global Offset Table (@GOT). */
13259
13260 if (reload_in_progress)
13261 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13262 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13263 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13264 if (TARGET_64BIT)
13265 new_rtx = force_reg (Pmode, new_rtx);
13266 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13267 new_rtx = gen_const_mem (Pmode, new_rtx);
13268 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13269
13270 if (reg == 0)
13271 reg = gen_reg_rtx (Pmode);
13272 emit_move_insn (reg, new_rtx);
13273 new_rtx = reg;
13274 }
13275 }
13276 else
13277 {
13278 if (CONST_INT_P (addr)
13279 && !x86_64_immediate_operand (addr, VOIDmode))
13280 {
13281 if (reg)
13282 {
13283 emit_move_insn (reg, addr);
13284 new_rtx = reg;
13285 }
13286 else
13287 new_rtx = force_reg (Pmode, addr);
13288 }
13289 else if (GET_CODE (addr) == CONST)
13290 {
13291 addr = XEXP (addr, 0);
13292
13293 /* We must match stuff we generate before. Assume the only
13294 unspecs that can get here are ours. Not that we could do
13295 anything with them anyway.... */
13296 if (GET_CODE (addr) == UNSPEC
13297 || (GET_CODE (addr) == PLUS
13298 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13299 return orig;
13300 gcc_assert (GET_CODE (addr) == PLUS);
13301 }
13302 if (GET_CODE (addr) == PLUS)
13303 {
13304 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13305
13306 /* Check first to see if this is a constant offset from a @GOTOFF
13307 symbol reference. */
13308 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13309 && CONST_INT_P (op1))
13310 {
13311 if (!TARGET_64BIT)
13312 {
13313 if (reload_in_progress)
13314 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13315 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13316 UNSPEC_GOTOFF);
13317 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13318 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13319 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13320
13321 if (reg != 0)
13322 {
13323 emit_move_insn (reg, new_rtx);
13324 new_rtx = reg;
13325 }
13326 }
13327 else
13328 {
13329 if (INTVAL (op1) < -16*1024*1024
13330 || INTVAL (op1) >= 16*1024*1024)
13331 {
13332 if (!x86_64_immediate_operand (op1, Pmode))
13333 op1 = force_reg (Pmode, op1);
13334 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13335 }
13336 }
13337 }
13338 else
13339 {
13340 rtx base = legitimize_pic_address (op0, reg);
13341 enum machine_mode mode = GET_MODE (base);
13342 new_rtx
13343 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13344
13345 if (CONST_INT_P (new_rtx))
13346 {
13347 if (INTVAL (new_rtx) < -16*1024*1024
13348 || INTVAL (new_rtx) >= 16*1024*1024)
13349 {
13350 if (!x86_64_immediate_operand (new_rtx, mode))
13351 new_rtx = force_reg (mode, new_rtx);
13352 new_rtx
13353 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13354 }
13355 else
13356 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13357 }
13358 else
13359 {
13360 if (GET_CODE (new_rtx) == PLUS
13361 && CONSTANT_P (XEXP (new_rtx, 1)))
13362 {
13363 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13364 new_rtx = XEXP (new_rtx, 1);
13365 }
13366 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13367 }
13368 }
13369 }
13370 }
13371 return new_rtx;
13372 }
13373 \f
13374 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13375
13376 static rtx
13377 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13378 {
13379 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13380
13381 if (GET_MODE (tp) != tp_mode)
13382 {
13383 gcc_assert (GET_MODE (tp) == SImode);
13384 gcc_assert (tp_mode == DImode);
13385
13386 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13387 }
13388
13389 if (to_reg)
13390 tp = copy_to_mode_reg (tp_mode, tp);
13391
13392 return tp;
13393 }
13394
13395 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13396
13397 static GTY(()) rtx ix86_tls_symbol;
13398
13399 static rtx
13400 ix86_tls_get_addr (void)
13401 {
13402 if (!ix86_tls_symbol)
13403 {
13404 const char *sym
13405 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13406 ? "___tls_get_addr" : "__tls_get_addr");
13407
13408 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13409 }
13410
13411 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13412 {
13413 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13414 UNSPEC_PLTOFF);
13415 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13416 gen_rtx_CONST (Pmode, unspec));
13417 }
13418
13419 return ix86_tls_symbol;
13420 }
13421
13422 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13423
13424 static GTY(()) rtx ix86_tls_module_base_symbol;
13425
13426 rtx
13427 ix86_tls_module_base (void)
13428 {
13429 if (!ix86_tls_module_base_symbol)
13430 {
13431 ix86_tls_module_base_symbol
13432 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13433
13434 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13435 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13436 }
13437
13438 return ix86_tls_module_base_symbol;
13439 }
13440
13441 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13442 false if we expect this to be used for a memory address and true if
13443 we expect to load the address into a register. */
13444
13445 static rtx
13446 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13447 {
13448 rtx dest, base, off;
13449 rtx pic = NULL_RTX, tp = NULL_RTX;
13450 enum machine_mode tp_mode = Pmode;
13451 int type;
13452
13453 /* Fall back to global dynamic model if tool chain cannot support local
13454 dynamic. */
13455 if (TARGET_SUN_TLS && !TARGET_64BIT
13456 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13457 && model == TLS_MODEL_LOCAL_DYNAMIC)
13458 model = TLS_MODEL_GLOBAL_DYNAMIC;
13459
13460 switch (model)
13461 {
13462 case TLS_MODEL_GLOBAL_DYNAMIC:
13463 dest = gen_reg_rtx (Pmode);
13464
13465 if (!TARGET_64BIT)
13466 {
13467 if (flag_pic && !TARGET_PECOFF)
13468 pic = pic_offset_table_rtx;
13469 else
13470 {
13471 pic = gen_reg_rtx (Pmode);
13472 emit_insn (gen_set_got (pic));
13473 }
13474 }
13475
13476 if (TARGET_GNU2_TLS)
13477 {
13478 if (TARGET_64BIT)
13479 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13480 else
13481 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13482
13483 tp = get_thread_pointer (Pmode, true);
13484 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13485
13486 if (GET_MODE (x) != Pmode)
13487 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13488
13489 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13490 }
13491 else
13492 {
13493 rtx caddr = ix86_tls_get_addr ();
13494
13495 if (TARGET_64BIT)
13496 {
13497 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13498 rtx_insn *insns;
13499
13500 start_sequence ();
13501 emit_call_insn
13502 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13503 insns = get_insns ();
13504 end_sequence ();
13505
13506 if (GET_MODE (x) != Pmode)
13507 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13508
13509 RTL_CONST_CALL_P (insns) = 1;
13510 emit_libcall_block (insns, dest, rax, x);
13511 }
13512 else
13513 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13514 }
13515 break;
13516
13517 case TLS_MODEL_LOCAL_DYNAMIC:
13518 base = gen_reg_rtx (Pmode);
13519
13520 if (!TARGET_64BIT)
13521 {
13522 if (flag_pic)
13523 pic = pic_offset_table_rtx;
13524 else
13525 {
13526 pic = gen_reg_rtx (Pmode);
13527 emit_insn (gen_set_got (pic));
13528 }
13529 }
13530
13531 if (TARGET_GNU2_TLS)
13532 {
13533 rtx tmp = ix86_tls_module_base ();
13534
13535 if (TARGET_64BIT)
13536 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13537 else
13538 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13539
13540 tp = get_thread_pointer (Pmode, true);
13541 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13542 gen_rtx_MINUS (Pmode, tmp, tp));
13543 }
13544 else
13545 {
13546 rtx caddr = ix86_tls_get_addr ();
13547
13548 if (TARGET_64BIT)
13549 {
13550 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13551 rtx_insn *insns;
13552 rtx eqv;
13553
13554 start_sequence ();
13555 emit_call_insn
13556 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13557 insns = get_insns ();
13558 end_sequence ();
13559
13560 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13561 share the LD_BASE result with other LD model accesses. */
13562 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13563 UNSPEC_TLS_LD_BASE);
13564
13565 RTL_CONST_CALL_P (insns) = 1;
13566 emit_libcall_block (insns, base, rax, eqv);
13567 }
13568 else
13569 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13570 }
13571
13572 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13573 off = gen_rtx_CONST (Pmode, off);
13574
13575 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13576
13577 if (TARGET_GNU2_TLS)
13578 {
13579 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13580
13581 if (GET_MODE (x) != Pmode)
13582 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13583
13584 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13585 }
13586 break;
13587
13588 case TLS_MODEL_INITIAL_EXEC:
13589 if (TARGET_64BIT)
13590 {
13591 if (TARGET_SUN_TLS && !TARGET_X32)
13592 {
13593 /* The Sun linker took the AMD64 TLS spec literally
13594 and can only handle %rax as destination of the
13595 initial executable code sequence. */
13596
13597 dest = gen_reg_rtx (DImode);
13598 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13599 return dest;
13600 }
13601
13602 /* Generate DImode references to avoid %fs:(%reg32)
13603 problems and linker IE->LE relaxation bug. */
13604 tp_mode = DImode;
13605 pic = NULL;
13606 type = UNSPEC_GOTNTPOFF;
13607 }
13608 else if (flag_pic)
13609 {
13610 if (reload_in_progress)
13611 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13612 pic = pic_offset_table_rtx;
13613 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13614 }
13615 else if (!TARGET_ANY_GNU_TLS)
13616 {
13617 pic = gen_reg_rtx (Pmode);
13618 emit_insn (gen_set_got (pic));
13619 type = UNSPEC_GOTTPOFF;
13620 }
13621 else
13622 {
13623 pic = NULL;
13624 type = UNSPEC_INDNTPOFF;
13625 }
13626
13627 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13628 off = gen_rtx_CONST (tp_mode, off);
13629 if (pic)
13630 off = gen_rtx_PLUS (tp_mode, pic, off);
13631 off = gen_const_mem (tp_mode, off);
13632 set_mem_alias_set (off, ix86_GOT_alias_set ());
13633
13634 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13635 {
13636 base = get_thread_pointer (tp_mode,
13637 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13638 off = force_reg (tp_mode, off);
13639 return gen_rtx_PLUS (tp_mode, base, off);
13640 }
13641 else
13642 {
13643 base = get_thread_pointer (Pmode, true);
13644 dest = gen_reg_rtx (Pmode);
13645 emit_insn (ix86_gen_sub3 (dest, base, off));
13646 }
13647 break;
13648
13649 case TLS_MODEL_LOCAL_EXEC:
13650 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13651 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13652 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13653 off = gen_rtx_CONST (Pmode, off);
13654
13655 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13656 {
13657 base = get_thread_pointer (Pmode,
13658 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13659 return gen_rtx_PLUS (Pmode, base, off);
13660 }
13661 else
13662 {
13663 base = get_thread_pointer (Pmode, true);
13664 dest = gen_reg_rtx (Pmode);
13665 emit_insn (ix86_gen_sub3 (dest, base, off));
13666 }
13667 break;
13668
13669 default:
13670 gcc_unreachable ();
13671 }
13672
13673 return dest;
13674 }
13675
13676 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13677 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13678 unique refptr-DECL symbol corresponding to symbol DECL. */
13679
13680 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13681 htab_t dllimport_map;
13682
13683 static tree
13684 get_dllimport_decl (tree decl, bool beimport)
13685 {
13686 struct tree_map *h, in;
13687 void **loc;
13688 const char *name;
13689 const char *prefix;
13690 size_t namelen, prefixlen;
13691 char *imp_name;
13692 tree to;
13693 rtx rtl;
13694
13695 if (!dllimport_map)
13696 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13697
13698 in.hash = htab_hash_pointer (decl);
13699 in.base.from = decl;
13700 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13701 h = (struct tree_map *) *loc;
13702 if (h)
13703 return h->to;
13704
13705 *loc = h = ggc_alloc<tree_map> ();
13706 h->hash = in.hash;
13707 h->base.from = decl;
13708 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13709 VAR_DECL, NULL, ptr_type_node);
13710 DECL_ARTIFICIAL (to) = 1;
13711 DECL_IGNORED_P (to) = 1;
13712 DECL_EXTERNAL (to) = 1;
13713 TREE_READONLY (to) = 1;
13714
13715 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13716 name = targetm.strip_name_encoding (name);
13717 if (beimport)
13718 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13719 ? "*__imp_" : "*__imp__";
13720 else
13721 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13722 namelen = strlen (name);
13723 prefixlen = strlen (prefix);
13724 imp_name = (char *) alloca (namelen + prefixlen + 1);
13725 memcpy (imp_name, prefix, prefixlen);
13726 memcpy (imp_name + prefixlen, name, namelen + 1);
13727
13728 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13729 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13730 SET_SYMBOL_REF_DECL (rtl, to);
13731 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13732 if (!beimport)
13733 {
13734 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13735 #ifdef SUB_TARGET_RECORD_STUB
13736 SUB_TARGET_RECORD_STUB (name);
13737 #endif
13738 }
13739
13740 rtl = gen_const_mem (Pmode, rtl);
13741 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13742
13743 SET_DECL_RTL (to, rtl);
13744 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13745
13746 return to;
13747 }
13748
13749 /* Expand SYMBOL into its corresponding far-addresse symbol.
13750 WANT_REG is true if we require the result be a register. */
13751
13752 static rtx
13753 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13754 {
13755 tree imp_decl;
13756 rtx x;
13757
13758 gcc_assert (SYMBOL_REF_DECL (symbol));
13759 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13760
13761 x = DECL_RTL (imp_decl);
13762 if (want_reg)
13763 x = force_reg (Pmode, x);
13764 return x;
13765 }
13766
13767 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13768 true if we require the result be a register. */
13769
13770 static rtx
13771 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13772 {
13773 tree imp_decl;
13774 rtx x;
13775
13776 gcc_assert (SYMBOL_REF_DECL (symbol));
13777 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13778
13779 x = DECL_RTL (imp_decl);
13780 if (want_reg)
13781 x = force_reg (Pmode, x);
13782 return x;
13783 }
13784
13785 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13786 is true if we require the result be a register. */
13787
13788 static rtx
13789 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13790 {
13791 if (!TARGET_PECOFF)
13792 return NULL_RTX;
13793
13794 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13795 {
13796 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13797 return legitimize_dllimport_symbol (addr, inreg);
13798 if (GET_CODE (addr) == CONST
13799 && GET_CODE (XEXP (addr, 0)) == PLUS
13800 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13801 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13802 {
13803 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13804 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13805 }
13806 }
13807
13808 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13809 return NULL_RTX;
13810 if (GET_CODE (addr) == SYMBOL_REF
13811 && !is_imported_p (addr)
13812 && SYMBOL_REF_EXTERNAL_P (addr)
13813 && SYMBOL_REF_DECL (addr))
13814 return legitimize_pe_coff_extern_decl (addr, inreg);
13815
13816 if (GET_CODE (addr) == CONST
13817 && GET_CODE (XEXP (addr, 0)) == PLUS
13818 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13819 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13820 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13821 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13822 {
13823 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13824 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13825 }
13826 return NULL_RTX;
13827 }
13828
13829 /* Try machine-dependent ways of modifying an illegitimate address
13830 to be legitimate. If we find one, return the new, valid address.
13831 This macro is used in only one place: `memory_address' in explow.c.
13832
13833 OLDX is the address as it was before break_out_memory_refs was called.
13834 In some cases it is useful to look at this to decide what needs to be done.
13835
13836 It is always safe for this macro to do nothing. It exists to recognize
13837 opportunities to optimize the output.
13838
13839 For the 80386, we handle X+REG by loading X into a register R and
13840 using R+REG. R will go in a general reg and indexing will be used.
13841 However, if REG is a broken-out memory address or multiplication,
13842 nothing needs to be done because REG can certainly go in a general reg.
13843
13844 When -fpic is used, special handling is needed for symbolic references.
13845 See comments by legitimize_pic_address in i386.c for details. */
13846
13847 static rtx
13848 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13849 {
13850 int changed = 0;
13851 unsigned log;
13852
13853 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13854 if (log)
13855 return legitimize_tls_address (x, (enum tls_model) log, false);
13856 if (GET_CODE (x) == CONST
13857 && GET_CODE (XEXP (x, 0)) == PLUS
13858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13859 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13860 {
13861 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13862 (enum tls_model) log, false);
13863 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13864 }
13865
13866 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13867 {
13868 rtx tmp = legitimize_pe_coff_symbol (x, true);
13869 if (tmp)
13870 return tmp;
13871 }
13872
13873 if (flag_pic && SYMBOLIC_CONST (x))
13874 return legitimize_pic_address (x, 0);
13875
13876 #if TARGET_MACHO
13877 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13878 return machopic_indirect_data_reference (x, 0);
13879 #endif
13880
13881 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13882 if (GET_CODE (x) == ASHIFT
13883 && CONST_INT_P (XEXP (x, 1))
13884 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13885 {
13886 changed = 1;
13887 log = INTVAL (XEXP (x, 1));
13888 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13889 GEN_INT (1 << log));
13890 }
13891
13892 if (GET_CODE (x) == PLUS)
13893 {
13894 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13895
13896 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13897 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13898 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13899 {
13900 changed = 1;
13901 log = INTVAL (XEXP (XEXP (x, 0), 1));
13902 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13903 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13904 GEN_INT (1 << log));
13905 }
13906
13907 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13908 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13909 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13910 {
13911 changed = 1;
13912 log = INTVAL (XEXP (XEXP (x, 1), 1));
13913 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13914 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13915 GEN_INT (1 << log));
13916 }
13917
13918 /* Put multiply first if it isn't already. */
13919 if (GET_CODE (XEXP (x, 1)) == MULT)
13920 {
13921 rtx tmp = XEXP (x, 0);
13922 XEXP (x, 0) = XEXP (x, 1);
13923 XEXP (x, 1) = tmp;
13924 changed = 1;
13925 }
13926
13927 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13928 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13929 created by virtual register instantiation, register elimination, and
13930 similar optimizations. */
13931 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13932 {
13933 changed = 1;
13934 x = gen_rtx_PLUS (Pmode,
13935 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13936 XEXP (XEXP (x, 1), 0)),
13937 XEXP (XEXP (x, 1), 1));
13938 }
13939
13940 /* Canonicalize
13941 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13942 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13943 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13944 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13945 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13946 && CONSTANT_P (XEXP (x, 1)))
13947 {
13948 rtx constant;
13949 rtx other = NULL_RTX;
13950
13951 if (CONST_INT_P (XEXP (x, 1)))
13952 {
13953 constant = XEXP (x, 1);
13954 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13955 }
13956 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13957 {
13958 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13959 other = XEXP (x, 1);
13960 }
13961 else
13962 constant = 0;
13963
13964 if (constant)
13965 {
13966 changed = 1;
13967 x = gen_rtx_PLUS (Pmode,
13968 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13969 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13970 plus_constant (Pmode, other,
13971 INTVAL (constant)));
13972 }
13973 }
13974
13975 if (changed && ix86_legitimate_address_p (mode, x, false))
13976 return x;
13977
13978 if (GET_CODE (XEXP (x, 0)) == MULT)
13979 {
13980 changed = 1;
13981 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13982 }
13983
13984 if (GET_CODE (XEXP (x, 1)) == MULT)
13985 {
13986 changed = 1;
13987 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13988 }
13989
13990 if (changed
13991 && REG_P (XEXP (x, 1))
13992 && REG_P (XEXP (x, 0)))
13993 return x;
13994
13995 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13996 {
13997 changed = 1;
13998 x = legitimize_pic_address (x, 0);
13999 }
14000
14001 if (changed && ix86_legitimate_address_p (mode, x, false))
14002 return x;
14003
14004 if (REG_P (XEXP (x, 0)))
14005 {
14006 rtx temp = gen_reg_rtx (Pmode);
14007 rtx val = force_operand (XEXP (x, 1), temp);
14008 if (val != temp)
14009 {
14010 val = convert_to_mode (Pmode, val, 1);
14011 emit_move_insn (temp, val);
14012 }
14013
14014 XEXP (x, 1) = temp;
14015 return x;
14016 }
14017
14018 else if (REG_P (XEXP (x, 1)))
14019 {
14020 rtx temp = gen_reg_rtx (Pmode);
14021 rtx val = force_operand (XEXP (x, 0), temp);
14022 if (val != temp)
14023 {
14024 val = convert_to_mode (Pmode, val, 1);
14025 emit_move_insn (temp, val);
14026 }
14027
14028 XEXP (x, 0) = temp;
14029 return x;
14030 }
14031 }
14032
14033 return x;
14034 }
14035 \f
14036 /* Print an integer constant expression in assembler syntax. Addition
14037 and subtraction are the only arithmetic that may appear in these
14038 expressions. FILE is the stdio stream to write to, X is the rtx, and
14039 CODE is the operand print code from the output string. */
14040
14041 static void
14042 output_pic_addr_const (FILE *file, rtx x, int code)
14043 {
14044 char buf[256];
14045
14046 switch (GET_CODE (x))
14047 {
14048 case PC:
14049 gcc_assert (flag_pic);
14050 putc ('.', file);
14051 break;
14052
14053 case SYMBOL_REF:
14054 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14055 output_addr_const (file, x);
14056 else
14057 {
14058 const char *name = XSTR (x, 0);
14059
14060 /* Mark the decl as referenced so that cgraph will
14061 output the function. */
14062 if (SYMBOL_REF_DECL (x))
14063 mark_decl_referenced (SYMBOL_REF_DECL (x));
14064
14065 #if TARGET_MACHO
14066 if (MACHOPIC_INDIRECT
14067 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14068 name = machopic_indirection_name (x, /*stub_p=*/true);
14069 #endif
14070 assemble_name (file, name);
14071 }
14072 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14073 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14074 fputs ("@PLT", file);
14075 break;
14076
14077 case LABEL_REF:
14078 x = XEXP (x, 0);
14079 /* FALLTHRU */
14080 case CODE_LABEL:
14081 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14082 assemble_name (asm_out_file, buf);
14083 break;
14084
14085 case CONST_INT:
14086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14087 break;
14088
14089 case CONST:
14090 /* This used to output parentheses around the expression,
14091 but that does not work on the 386 (either ATT or BSD assembler). */
14092 output_pic_addr_const (file, XEXP (x, 0), code);
14093 break;
14094
14095 case CONST_DOUBLE:
14096 if (GET_MODE (x) == VOIDmode)
14097 {
14098 /* We can use %d if the number is <32 bits and positive. */
14099 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14100 fprintf (file, "0x%lx%08lx",
14101 (unsigned long) CONST_DOUBLE_HIGH (x),
14102 (unsigned long) CONST_DOUBLE_LOW (x));
14103 else
14104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14105 }
14106 else
14107 /* We can't handle floating point constants;
14108 TARGET_PRINT_OPERAND must handle them. */
14109 output_operand_lossage ("floating constant misused");
14110 break;
14111
14112 case PLUS:
14113 /* Some assemblers need integer constants to appear first. */
14114 if (CONST_INT_P (XEXP (x, 0)))
14115 {
14116 output_pic_addr_const (file, XEXP (x, 0), code);
14117 putc ('+', file);
14118 output_pic_addr_const (file, XEXP (x, 1), code);
14119 }
14120 else
14121 {
14122 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14123 output_pic_addr_const (file, XEXP (x, 1), code);
14124 putc ('+', file);
14125 output_pic_addr_const (file, XEXP (x, 0), code);
14126 }
14127 break;
14128
14129 case MINUS:
14130 if (!TARGET_MACHO)
14131 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14132 output_pic_addr_const (file, XEXP (x, 0), code);
14133 putc ('-', file);
14134 output_pic_addr_const (file, XEXP (x, 1), code);
14135 if (!TARGET_MACHO)
14136 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14137 break;
14138
14139 case UNSPEC:
14140 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14141 {
14142 bool f = i386_asm_output_addr_const_extra (file, x);
14143 gcc_assert (f);
14144 break;
14145 }
14146
14147 gcc_assert (XVECLEN (x, 0) == 1);
14148 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14149 switch (XINT (x, 1))
14150 {
14151 case UNSPEC_GOT:
14152 fputs ("@GOT", file);
14153 break;
14154 case UNSPEC_GOTOFF:
14155 fputs ("@GOTOFF", file);
14156 break;
14157 case UNSPEC_PLTOFF:
14158 fputs ("@PLTOFF", file);
14159 break;
14160 case UNSPEC_PCREL:
14161 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14162 "(%rip)" : "[rip]", file);
14163 break;
14164 case UNSPEC_GOTPCREL:
14165 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14166 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14167 break;
14168 case UNSPEC_GOTTPOFF:
14169 /* FIXME: This might be @TPOFF in Sun ld too. */
14170 fputs ("@gottpoff", file);
14171 break;
14172 case UNSPEC_TPOFF:
14173 fputs ("@tpoff", file);
14174 break;
14175 case UNSPEC_NTPOFF:
14176 if (TARGET_64BIT)
14177 fputs ("@tpoff", file);
14178 else
14179 fputs ("@ntpoff", file);
14180 break;
14181 case UNSPEC_DTPOFF:
14182 fputs ("@dtpoff", file);
14183 break;
14184 case UNSPEC_GOTNTPOFF:
14185 if (TARGET_64BIT)
14186 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14187 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14188 else
14189 fputs ("@gotntpoff", file);
14190 break;
14191 case UNSPEC_INDNTPOFF:
14192 fputs ("@indntpoff", file);
14193 break;
14194 #if TARGET_MACHO
14195 case UNSPEC_MACHOPIC_OFFSET:
14196 putc ('-', file);
14197 machopic_output_function_base_name (file);
14198 break;
14199 #endif
14200 default:
14201 output_operand_lossage ("invalid UNSPEC as operand");
14202 break;
14203 }
14204 break;
14205
14206 default:
14207 output_operand_lossage ("invalid expression as operand");
14208 }
14209 }
14210
14211 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14212 We need to emit DTP-relative relocations. */
14213
14214 static void ATTRIBUTE_UNUSED
14215 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14216 {
14217 fputs (ASM_LONG, file);
14218 output_addr_const (file, x);
14219 fputs ("@dtpoff", file);
14220 switch (size)
14221 {
14222 case 4:
14223 break;
14224 case 8:
14225 fputs (", 0", file);
14226 break;
14227 default:
14228 gcc_unreachable ();
14229 }
14230 }
14231
14232 /* Return true if X is a representation of the PIC register. This copes
14233 with calls from ix86_find_base_term, where the register might have
14234 been replaced by a cselib value. */
14235
14236 static bool
14237 ix86_pic_register_p (rtx x)
14238 {
14239 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14240 return (pic_offset_table_rtx
14241 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14242 else
14243 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14244 }
14245
14246 /* Helper function for ix86_delegitimize_address.
14247 Attempt to delegitimize TLS local-exec accesses. */
14248
14249 static rtx
14250 ix86_delegitimize_tls_address (rtx orig_x)
14251 {
14252 rtx x = orig_x, unspec;
14253 struct ix86_address addr;
14254
14255 if (!TARGET_TLS_DIRECT_SEG_REFS)
14256 return orig_x;
14257 if (MEM_P (x))
14258 x = XEXP (x, 0);
14259 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14260 return orig_x;
14261 if (ix86_decompose_address (x, &addr) == 0
14262 || addr.seg != DEFAULT_TLS_SEG_REG
14263 || addr.disp == NULL_RTX
14264 || GET_CODE (addr.disp) != CONST)
14265 return orig_x;
14266 unspec = XEXP (addr.disp, 0);
14267 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14268 unspec = XEXP (unspec, 0);
14269 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14270 return orig_x;
14271 x = XVECEXP (unspec, 0, 0);
14272 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14273 if (unspec != XEXP (addr.disp, 0))
14274 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14275 if (addr.index)
14276 {
14277 rtx idx = addr.index;
14278 if (addr.scale != 1)
14279 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14280 x = gen_rtx_PLUS (Pmode, idx, x);
14281 }
14282 if (addr.base)
14283 x = gen_rtx_PLUS (Pmode, addr.base, x);
14284 if (MEM_P (orig_x))
14285 x = replace_equiv_address_nv (orig_x, x);
14286 return x;
14287 }
14288
14289 /* In the name of slightly smaller debug output, and to cater to
14290 general assembler lossage, recognize PIC+GOTOFF and turn it back
14291 into a direct symbol reference.
14292
14293 On Darwin, this is necessary to avoid a crash, because Darwin
14294 has a different PIC label for each routine but the DWARF debugging
14295 information is not associated with any particular routine, so it's
14296 necessary to remove references to the PIC label from RTL stored by
14297 the DWARF output code. */
14298
14299 static rtx
14300 ix86_delegitimize_address (rtx x)
14301 {
14302 rtx orig_x = delegitimize_mem_from_attrs (x);
14303 /* addend is NULL or some rtx if x is something+GOTOFF where
14304 something doesn't include the PIC register. */
14305 rtx addend = NULL_RTX;
14306 /* reg_addend is NULL or a multiple of some register. */
14307 rtx reg_addend = NULL_RTX;
14308 /* const_addend is NULL or a const_int. */
14309 rtx const_addend = NULL_RTX;
14310 /* This is the result, or NULL. */
14311 rtx result = NULL_RTX;
14312
14313 x = orig_x;
14314
14315 if (MEM_P (x))
14316 x = XEXP (x, 0);
14317
14318 if (TARGET_64BIT)
14319 {
14320 if (GET_CODE (x) == CONST
14321 && GET_CODE (XEXP (x, 0)) == PLUS
14322 && GET_MODE (XEXP (x, 0)) == Pmode
14323 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14324 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14325 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14326 {
14327 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14328 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14329 if (MEM_P (orig_x))
14330 x = replace_equiv_address_nv (orig_x, x);
14331 return x;
14332 }
14333
14334 if (GET_CODE (x) == CONST
14335 && GET_CODE (XEXP (x, 0)) == UNSPEC
14336 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14337 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14338 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14339 {
14340 x = XVECEXP (XEXP (x, 0), 0, 0);
14341 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14342 {
14343 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14344 GET_MODE (x), 0);
14345 if (x == NULL_RTX)
14346 return orig_x;
14347 }
14348 return x;
14349 }
14350
14351 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14352 return ix86_delegitimize_tls_address (orig_x);
14353
14354 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14355 and -mcmodel=medium -fpic. */
14356 }
14357
14358 if (GET_CODE (x) != PLUS
14359 || GET_CODE (XEXP (x, 1)) != CONST)
14360 return ix86_delegitimize_tls_address (orig_x);
14361
14362 if (ix86_pic_register_p (XEXP (x, 0)))
14363 /* %ebx + GOT/GOTOFF */
14364 ;
14365 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14366 {
14367 /* %ebx + %reg * scale + GOT/GOTOFF */
14368 reg_addend = XEXP (x, 0);
14369 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14370 reg_addend = XEXP (reg_addend, 1);
14371 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14372 reg_addend = XEXP (reg_addend, 0);
14373 else
14374 {
14375 reg_addend = NULL_RTX;
14376 addend = XEXP (x, 0);
14377 }
14378 }
14379 else
14380 addend = XEXP (x, 0);
14381
14382 x = XEXP (XEXP (x, 1), 0);
14383 if (GET_CODE (x) == PLUS
14384 && CONST_INT_P (XEXP (x, 1)))
14385 {
14386 const_addend = XEXP (x, 1);
14387 x = XEXP (x, 0);
14388 }
14389
14390 if (GET_CODE (x) == UNSPEC
14391 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14392 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14393 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14394 && !MEM_P (orig_x) && !addend)))
14395 result = XVECEXP (x, 0, 0);
14396
14397 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14398 && !MEM_P (orig_x))
14399 result = XVECEXP (x, 0, 0);
14400
14401 if (! result)
14402 return ix86_delegitimize_tls_address (orig_x);
14403
14404 if (const_addend)
14405 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14406 if (reg_addend)
14407 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14408 if (addend)
14409 {
14410 /* If the rest of original X doesn't involve the PIC register, add
14411 addend and subtract pic_offset_table_rtx. This can happen e.g.
14412 for code like:
14413 leal (%ebx, %ecx, 4), %ecx
14414 ...
14415 movl foo@GOTOFF(%ecx), %edx
14416 in which case we return (%ecx - %ebx) + foo. */
14417 if (pic_offset_table_rtx)
14418 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14419 pic_offset_table_rtx),
14420 result);
14421 else
14422 return orig_x;
14423 }
14424 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14425 {
14426 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14427 if (result == NULL_RTX)
14428 return orig_x;
14429 }
14430 return result;
14431 }
14432
14433 /* If X is a machine specific address (i.e. a symbol or label being
14434 referenced as a displacement from the GOT implemented using an
14435 UNSPEC), then return the base term. Otherwise return X. */
14436
14437 rtx
14438 ix86_find_base_term (rtx x)
14439 {
14440 rtx term;
14441
14442 if (TARGET_64BIT)
14443 {
14444 if (GET_CODE (x) != CONST)
14445 return x;
14446 term = XEXP (x, 0);
14447 if (GET_CODE (term) == PLUS
14448 && (CONST_INT_P (XEXP (term, 1))
14449 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14450 term = XEXP (term, 0);
14451 if (GET_CODE (term) != UNSPEC
14452 || (XINT (term, 1) != UNSPEC_GOTPCREL
14453 && XINT (term, 1) != UNSPEC_PCREL))
14454 return x;
14455
14456 return XVECEXP (term, 0, 0);
14457 }
14458
14459 return ix86_delegitimize_address (x);
14460 }
14461 \f
14462 static void
14463 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14464 bool fp, FILE *file)
14465 {
14466 const char *suffix;
14467
14468 if (mode == CCFPmode || mode == CCFPUmode)
14469 {
14470 code = ix86_fp_compare_code_to_integer (code);
14471 mode = CCmode;
14472 }
14473 if (reverse)
14474 code = reverse_condition (code);
14475
14476 switch (code)
14477 {
14478 case EQ:
14479 switch (mode)
14480 {
14481 case CCAmode:
14482 suffix = "a";
14483 break;
14484
14485 case CCCmode:
14486 suffix = "c";
14487 break;
14488
14489 case CCOmode:
14490 suffix = "o";
14491 break;
14492
14493 case CCSmode:
14494 suffix = "s";
14495 break;
14496
14497 default:
14498 suffix = "e";
14499 }
14500 break;
14501 case NE:
14502 switch (mode)
14503 {
14504 case CCAmode:
14505 suffix = "na";
14506 break;
14507
14508 case CCCmode:
14509 suffix = "nc";
14510 break;
14511
14512 case CCOmode:
14513 suffix = "no";
14514 break;
14515
14516 case CCSmode:
14517 suffix = "ns";
14518 break;
14519
14520 default:
14521 suffix = "ne";
14522 }
14523 break;
14524 case GT:
14525 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14526 suffix = "g";
14527 break;
14528 case GTU:
14529 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14530 Those same assemblers have the same but opposite lossage on cmov. */
14531 if (mode == CCmode)
14532 suffix = fp ? "nbe" : "a";
14533 else
14534 gcc_unreachable ();
14535 break;
14536 case LT:
14537 switch (mode)
14538 {
14539 case CCNOmode:
14540 case CCGOCmode:
14541 suffix = "s";
14542 break;
14543
14544 case CCmode:
14545 case CCGCmode:
14546 suffix = "l";
14547 break;
14548
14549 default:
14550 gcc_unreachable ();
14551 }
14552 break;
14553 case LTU:
14554 if (mode == CCmode)
14555 suffix = "b";
14556 else if (mode == CCCmode)
14557 suffix = "c";
14558 else
14559 gcc_unreachable ();
14560 break;
14561 case GE:
14562 switch (mode)
14563 {
14564 case CCNOmode:
14565 case CCGOCmode:
14566 suffix = "ns";
14567 break;
14568
14569 case CCmode:
14570 case CCGCmode:
14571 suffix = "ge";
14572 break;
14573
14574 default:
14575 gcc_unreachable ();
14576 }
14577 break;
14578 case GEU:
14579 if (mode == CCmode)
14580 suffix = fp ? "nb" : "ae";
14581 else if (mode == CCCmode)
14582 suffix = "nc";
14583 else
14584 gcc_unreachable ();
14585 break;
14586 case LE:
14587 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14588 suffix = "le";
14589 break;
14590 case LEU:
14591 if (mode == CCmode)
14592 suffix = "be";
14593 else
14594 gcc_unreachable ();
14595 break;
14596 case UNORDERED:
14597 suffix = fp ? "u" : "p";
14598 break;
14599 case ORDERED:
14600 suffix = fp ? "nu" : "np";
14601 break;
14602 default:
14603 gcc_unreachable ();
14604 }
14605 fputs (suffix, file);
14606 }
14607
14608 /* Print the name of register X to FILE based on its machine mode and number.
14609 If CODE is 'w', pretend the mode is HImode.
14610 If CODE is 'b', pretend the mode is QImode.
14611 If CODE is 'k', pretend the mode is SImode.
14612 If CODE is 'q', pretend the mode is DImode.
14613 If CODE is 'x', pretend the mode is V4SFmode.
14614 If CODE is 't', pretend the mode is V8SFmode.
14615 If CODE is 'g', pretend the mode is V16SFmode.
14616 If CODE is 'h', pretend the reg is the 'high' byte register.
14617 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14618 If CODE is 'd', duplicate the operand for AVX instruction.
14619 */
14620
14621 void
14622 print_reg (rtx x, int code, FILE *file)
14623 {
14624 const char *reg;
14625 unsigned int regno;
14626 bool duplicated = code == 'd' && TARGET_AVX;
14627
14628 if (ASSEMBLER_DIALECT == ASM_ATT)
14629 putc ('%', file);
14630
14631 if (x == pc_rtx)
14632 {
14633 gcc_assert (TARGET_64BIT);
14634 fputs ("rip", file);
14635 return;
14636 }
14637
14638 regno = true_regnum (x);
14639 gcc_assert (regno != ARG_POINTER_REGNUM
14640 && regno != FRAME_POINTER_REGNUM
14641 && regno != FLAGS_REG
14642 && regno != FPSR_REG
14643 && regno != FPCR_REG);
14644
14645 if (code == 'w' || MMX_REG_P (x))
14646 code = 2;
14647 else if (code == 'b')
14648 code = 1;
14649 else if (code == 'k')
14650 code = 4;
14651 else if (code == 'q')
14652 code = 8;
14653 else if (code == 'y')
14654 code = 3;
14655 else if (code == 'h')
14656 code = 0;
14657 else if (code == 'x')
14658 code = 16;
14659 else if (code == 't')
14660 code = 32;
14661 else if (code == 'g')
14662 code = 64;
14663 else
14664 code = GET_MODE_SIZE (GET_MODE (x));
14665
14666 /* Irritatingly, AMD extended registers use different naming convention
14667 from the normal registers: "r%d[bwd]" */
14668 if (REX_INT_REGNO_P (regno))
14669 {
14670 gcc_assert (TARGET_64BIT);
14671 putc ('r', file);
14672 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14673 switch (code)
14674 {
14675 case 0:
14676 error ("extended registers have no high halves");
14677 break;
14678 case 1:
14679 putc ('b', file);
14680 break;
14681 case 2:
14682 putc ('w', file);
14683 break;
14684 case 4:
14685 putc ('d', file);
14686 break;
14687 case 8:
14688 /* no suffix */
14689 break;
14690 default:
14691 error ("unsupported operand size for extended register");
14692 break;
14693 }
14694 return;
14695 }
14696
14697 reg = NULL;
14698 switch (code)
14699 {
14700 case 3:
14701 if (STACK_TOP_P (x))
14702 {
14703 reg = "st(0)";
14704 break;
14705 }
14706 /* FALLTHRU */
14707 case 8:
14708 case 4:
14709 case 12:
14710 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14711 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14712 /* FALLTHRU */
14713 case 16:
14714 case 2:
14715 normal:
14716 reg = hi_reg_name[regno];
14717 break;
14718 case 1:
14719 if (regno >= ARRAY_SIZE (qi_reg_name))
14720 goto normal;
14721 reg = qi_reg_name[regno];
14722 break;
14723 case 0:
14724 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14725 goto normal;
14726 reg = qi_high_reg_name[regno];
14727 break;
14728 case 32:
14729 if (SSE_REG_P (x))
14730 {
14731 gcc_assert (!duplicated);
14732 putc ('y', file);
14733 fputs (hi_reg_name[regno] + 1, file);
14734 return;
14735 }
14736 case 64:
14737 if (SSE_REG_P (x))
14738 {
14739 gcc_assert (!duplicated);
14740 putc ('z', file);
14741 fputs (hi_reg_name[REGNO (x)] + 1, file);
14742 return;
14743 }
14744 break;
14745 default:
14746 gcc_unreachable ();
14747 }
14748
14749 fputs (reg, file);
14750 if (duplicated)
14751 {
14752 if (ASSEMBLER_DIALECT == ASM_ATT)
14753 fprintf (file, ", %%%s", reg);
14754 else
14755 fprintf (file, ", %s", reg);
14756 }
14757 }
14758
14759 /* Meaning of CODE:
14760 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14761 C -- print opcode suffix for set/cmov insn.
14762 c -- like C, but print reversed condition
14763 F,f -- likewise, but for floating-point.
14764 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14765 otherwise nothing
14766 R -- print embeded rounding and sae.
14767 r -- print only sae.
14768 z -- print the opcode suffix for the size of the current operand.
14769 Z -- likewise, with special suffixes for x87 instructions.
14770 * -- print a star (in certain assembler syntax)
14771 A -- print an absolute memory reference.
14772 E -- print address with DImode register names if TARGET_64BIT.
14773 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14774 s -- print a shift double count, followed by the assemblers argument
14775 delimiter.
14776 b -- print the QImode name of the register for the indicated operand.
14777 %b0 would print %al if operands[0] is reg 0.
14778 w -- likewise, print the HImode name of the register.
14779 k -- likewise, print the SImode name of the register.
14780 q -- likewise, print the DImode name of the register.
14781 x -- likewise, print the V4SFmode name of the register.
14782 t -- likewise, print the V8SFmode name of the register.
14783 g -- likewise, print the V16SFmode name of the register.
14784 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14785 y -- print "st(0)" instead of "st" as a register.
14786 d -- print duplicated register operand for AVX instruction.
14787 D -- print condition for SSE cmp instruction.
14788 P -- if PIC, print an @PLT suffix.
14789 p -- print raw symbol name.
14790 X -- don't print any sort of PIC '@' suffix for a symbol.
14791 & -- print some in-use local-dynamic symbol name.
14792 H -- print a memory address offset by 8; used for sse high-parts
14793 Y -- print condition for XOP pcom* instruction.
14794 + -- print a branch hint as 'cs' or 'ds' prefix
14795 ; -- print a semicolon (after prefixes due to bug in older gas).
14796 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14797 @ -- print a segment register of thread base pointer load
14798 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14799 */
14800
14801 void
14802 ix86_print_operand (FILE *file, rtx x, int code)
14803 {
14804 if (code)
14805 {
14806 switch (code)
14807 {
14808 case 'A':
14809 switch (ASSEMBLER_DIALECT)
14810 {
14811 case ASM_ATT:
14812 putc ('*', file);
14813 break;
14814
14815 case ASM_INTEL:
14816 /* Intel syntax. For absolute addresses, registers should not
14817 be surrounded by braces. */
14818 if (!REG_P (x))
14819 {
14820 putc ('[', file);
14821 ix86_print_operand (file, x, 0);
14822 putc (']', file);
14823 return;
14824 }
14825 break;
14826
14827 default:
14828 gcc_unreachable ();
14829 }
14830
14831 ix86_print_operand (file, x, 0);
14832 return;
14833
14834 case 'E':
14835 /* Wrap address in an UNSPEC to declare special handling. */
14836 if (TARGET_64BIT)
14837 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14838
14839 output_address (x);
14840 return;
14841
14842 case 'L':
14843 if (ASSEMBLER_DIALECT == ASM_ATT)
14844 putc ('l', file);
14845 return;
14846
14847 case 'W':
14848 if (ASSEMBLER_DIALECT == ASM_ATT)
14849 putc ('w', file);
14850 return;
14851
14852 case 'B':
14853 if (ASSEMBLER_DIALECT == ASM_ATT)
14854 putc ('b', file);
14855 return;
14856
14857 case 'Q':
14858 if (ASSEMBLER_DIALECT == ASM_ATT)
14859 putc ('l', file);
14860 return;
14861
14862 case 'S':
14863 if (ASSEMBLER_DIALECT == ASM_ATT)
14864 putc ('s', file);
14865 return;
14866
14867 case 'T':
14868 if (ASSEMBLER_DIALECT == ASM_ATT)
14869 putc ('t', file);
14870 return;
14871
14872 case 'O':
14873 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14874 if (ASSEMBLER_DIALECT != ASM_ATT)
14875 return;
14876
14877 switch (GET_MODE_SIZE (GET_MODE (x)))
14878 {
14879 case 2:
14880 putc ('w', file);
14881 break;
14882
14883 case 4:
14884 putc ('l', file);
14885 break;
14886
14887 case 8:
14888 putc ('q', file);
14889 break;
14890
14891 default:
14892 output_operand_lossage
14893 ("invalid operand size for operand code 'O'");
14894 return;
14895 }
14896
14897 putc ('.', file);
14898 #endif
14899 return;
14900
14901 case 'z':
14902 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14903 {
14904 /* Opcodes don't get size suffixes if using Intel opcodes. */
14905 if (ASSEMBLER_DIALECT == ASM_INTEL)
14906 return;
14907
14908 switch (GET_MODE_SIZE (GET_MODE (x)))
14909 {
14910 case 1:
14911 putc ('b', file);
14912 return;
14913
14914 case 2:
14915 putc ('w', file);
14916 return;
14917
14918 case 4:
14919 putc ('l', file);
14920 return;
14921
14922 case 8:
14923 putc ('q', file);
14924 return;
14925
14926 default:
14927 output_operand_lossage
14928 ("invalid operand size for operand code 'z'");
14929 return;
14930 }
14931 }
14932
14933 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14934 warning
14935 (0, "non-integer operand used with operand code 'z'");
14936 /* FALLTHRU */
14937
14938 case 'Z':
14939 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14940 if (ASSEMBLER_DIALECT == ASM_INTEL)
14941 return;
14942
14943 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14944 {
14945 switch (GET_MODE_SIZE (GET_MODE (x)))
14946 {
14947 case 2:
14948 #ifdef HAVE_AS_IX86_FILDS
14949 putc ('s', file);
14950 #endif
14951 return;
14952
14953 case 4:
14954 putc ('l', file);
14955 return;
14956
14957 case 8:
14958 #ifdef HAVE_AS_IX86_FILDQ
14959 putc ('q', file);
14960 #else
14961 fputs ("ll", file);
14962 #endif
14963 return;
14964
14965 default:
14966 break;
14967 }
14968 }
14969 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14970 {
14971 /* 387 opcodes don't get size suffixes
14972 if the operands are registers. */
14973 if (STACK_REG_P (x))
14974 return;
14975
14976 switch (GET_MODE_SIZE (GET_MODE (x)))
14977 {
14978 case 4:
14979 putc ('s', file);
14980 return;
14981
14982 case 8:
14983 putc ('l', file);
14984 return;
14985
14986 case 12:
14987 case 16:
14988 putc ('t', file);
14989 return;
14990
14991 default:
14992 break;
14993 }
14994 }
14995 else
14996 {
14997 output_operand_lossage
14998 ("invalid operand type used with operand code 'Z'");
14999 return;
15000 }
15001
15002 output_operand_lossage
15003 ("invalid operand size for operand code 'Z'");
15004 return;
15005
15006 case 'd':
15007 case 'b':
15008 case 'w':
15009 case 'k':
15010 case 'q':
15011 case 'h':
15012 case 't':
15013 case 'g':
15014 case 'y':
15015 case 'x':
15016 case 'X':
15017 case 'P':
15018 case 'p':
15019 break;
15020
15021 case 's':
15022 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15023 {
15024 ix86_print_operand (file, x, 0);
15025 fputs (", ", file);
15026 }
15027 return;
15028
15029 case 'Y':
15030 switch (GET_CODE (x))
15031 {
15032 case NE:
15033 fputs ("neq", file);
15034 break;
15035 case EQ:
15036 fputs ("eq", file);
15037 break;
15038 case GE:
15039 case GEU:
15040 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15041 break;
15042 case GT:
15043 case GTU:
15044 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15045 break;
15046 case LE:
15047 case LEU:
15048 fputs ("le", file);
15049 break;
15050 case LT:
15051 case LTU:
15052 fputs ("lt", file);
15053 break;
15054 case UNORDERED:
15055 fputs ("unord", file);
15056 break;
15057 case ORDERED:
15058 fputs ("ord", file);
15059 break;
15060 case UNEQ:
15061 fputs ("ueq", file);
15062 break;
15063 case UNGE:
15064 fputs ("nlt", file);
15065 break;
15066 case UNGT:
15067 fputs ("nle", file);
15068 break;
15069 case UNLE:
15070 fputs ("ule", file);
15071 break;
15072 case UNLT:
15073 fputs ("ult", file);
15074 break;
15075 case LTGT:
15076 fputs ("une", file);
15077 break;
15078 default:
15079 output_operand_lossage ("operand is not a condition code, "
15080 "invalid operand code 'Y'");
15081 return;
15082 }
15083 return;
15084
15085 case 'D':
15086 /* Little bit of braindamage here. The SSE compare instructions
15087 does use completely different names for the comparisons that the
15088 fp conditional moves. */
15089 switch (GET_CODE (x))
15090 {
15091 case UNEQ:
15092 if (TARGET_AVX)
15093 {
15094 fputs ("eq_us", file);
15095 break;
15096 }
15097 case EQ:
15098 fputs ("eq", file);
15099 break;
15100 case UNLT:
15101 if (TARGET_AVX)
15102 {
15103 fputs ("nge", file);
15104 break;
15105 }
15106 case LT:
15107 fputs ("lt", file);
15108 break;
15109 case UNLE:
15110 if (TARGET_AVX)
15111 {
15112 fputs ("ngt", file);
15113 break;
15114 }
15115 case LE:
15116 fputs ("le", file);
15117 break;
15118 case UNORDERED:
15119 fputs ("unord", file);
15120 break;
15121 case LTGT:
15122 if (TARGET_AVX)
15123 {
15124 fputs ("neq_oq", file);
15125 break;
15126 }
15127 case NE:
15128 fputs ("neq", file);
15129 break;
15130 case GE:
15131 if (TARGET_AVX)
15132 {
15133 fputs ("ge", file);
15134 break;
15135 }
15136 case UNGE:
15137 fputs ("nlt", file);
15138 break;
15139 case GT:
15140 if (TARGET_AVX)
15141 {
15142 fputs ("gt", file);
15143 break;
15144 }
15145 case UNGT:
15146 fputs ("nle", file);
15147 break;
15148 case ORDERED:
15149 fputs ("ord", file);
15150 break;
15151 default:
15152 output_operand_lossage ("operand is not a condition code, "
15153 "invalid operand code 'D'");
15154 return;
15155 }
15156 return;
15157
15158 case 'F':
15159 case 'f':
15160 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15161 if (ASSEMBLER_DIALECT == ASM_ATT)
15162 putc ('.', file);
15163 #endif
15164
15165 case 'C':
15166 case 'c':
15167 if (!COMPARISON_P (x))
15168 {
15169 output_operand_lossage ("operand is not a condition code, "
15170 "invalid operand code '%c'", code);
15171 return;
15172 }
15173 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15174 code == 'c' || code == 'f',
15175 code == 'F' || code == 'f',
15176 file);
15177 return;
15178
15179 case 'H':
15180 if (!offsettable_memref_p (x))
15181 {
15182 output_operand_lossage ("operand is not an offsettable memory "
15183 "reference, invalid operand code 'H'");
15184 return;
15185 }
15186 /* It doesn't actually matter what mode we use here, as we're
15187 only going to use this for printing. */
15188 x = adjust_address_nv (x, DImode, 8);
15189 /* Output 'qword ptr' for intel assembler dialect. */
15190 if (ASSEMBLER_DIALECT == ASM_INTEL)
15191 code = 'q';
15192 break;
15193
15194 case 'K':
15195 gcc_assert (CONST_INT_P (x));
15196
15197 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15198 #ifdef HAVE_AS_IX86_HLE
15199 fputs ("xacquire ", file);
15200 #else
15201 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15202 #endif
15203 else if (INTVAL (x) & IX86_HLE_RELEASE)
15204 #ifdef HAVE_AS_IX86_HLE
15205 fputs ("xrelease ", file);
15206 #else
15207 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15208 #endif
15209 /* We do not want to print value of the operand. */
15210 return;
15211
15212 case 'N':
15213 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15214 fputs ("{z}", file);
15215 return;
15216
15217 case 'r':
15218 gcc_assert (CONST_INT_P (x));
15219 gcc_assert (INTVAL (x) == ROUND_SAE);
15220
15221 if (ASSEMBLER_DIALECT == ASM_INTEL)
15222 fputs (", ", file);
15223
15224 fputs ("{sae}", file);
15225
15226 if (ASSEMBLER_DIALECT == ASM_ATT)
15227 fputs (", ", file);
15228
15229 return;
15230
15231 case 'R':
15232 gcc_assert (CONST_INT_P (x));
15233
15234 if (ASSEMBLER_DIALECT == ASM_INTEL)
15235 fputs (", ", file);
15236
15237 switch (INTVAL (x))
15238 {
15239 case ROUND_NEAREST_INT | ROUND_SAE:
15240 fputs ("{rn-sae}", file);
15241 break;
15242 case ROUND_NEG_INF | ROUND_SAE:
15243 fputs ("{rd-sae}", file);
15244 break;
15245 case ROUND_POS_INF | ROUND_SAE:
15246 fputs ("{ru-sae}", file);
15247 break;
15248 case ROUND_ZERO | ROUND_SAE:
15249 fputs ("{rz-sae}", file);
15250 break;
15251 default:
15252 gcc_unreachable ();
15253 }
15254
15255 if (ASSEMBLER_DIALECT == ASM_ATT)
15256 fputs (", ", file);
15257
15258 return;
15259
15260 case '*':
15261 if (ASSEMBLER_DIALECT == ASM_ATT)
15262 putc ('*', file);
15263 return;
15264
15265 case '&':
15266 {
15267 const char *name = get_some_local_dynamic_name ();
15268 if (name == NULL)
15269 output_operand_lossage ("'%%&' used without any "
15270 "local dynamic TLS references");
15271 else
15272 assemble_name (file, name);
15273 return;
15274 }
15275
15276 case '+':
15277 {
15278 rtx x;
15279
15280 if (!optimize
15281 || optimize_function_for_size_p (cfun)
15282 || !TARGET_BRANCH_PREDICTION_HINTS)
15283 return;
15284
15285 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15286 if (x)
15287 {
15288 int pred_val = XINT (x, 0);
15289
15290 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15291 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15292 {
15293 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15294 bool cputaken
15295 = final_forward_branch_p (current_output_insn) == 0;
15296
15297 /* Emit hints only in the case default branch prediction
15298 heuristics would fail. */
15299 if (taken != cputaken)
15300 {
15301 /* We use 3e (DS) prefix for taken branches and
15302 2e (CS) prefix for not taken branches. */
15303 if (taken)
15304 fputs ("ds ; ", file);
15305 else
15306 fputs ("cs ; ", file);
15307 }
15308 }
15309 }
15310 return;
15311 }
15312
15313 case ';':
15314 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15315 putc (';', file);
15316 #endif
15317 return;
15318
15319 case '@':
15320 if (ASSEMBLER_DIALECT == ASM_ATT)
15321 putc ('%', file);
15322
15323 /* The kernel uses a different segment register for performance
15324 reasons; a system call would not have to trash the userspace
15325 segment register, which would be expensive. */
15326 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15327 fputs ("fs", file);
15328 else
15329 fputs ("gs", file);
15330 return;
15331
15332 case '~':
15333 putc (TARGET_AVX2 ? 'i' : 'f', file);
15334 return;
15335
15336 case '^':
15337 if (TARGET_64BIT && Pmode != word_mode)
15338 fputs ("addr32 ", file);
15339 return;
15340
15341 default:
15342 output_operand_lossage ("invalid operand code '%c'", code);
15343 }
15344 }
15345
15346 if (REG_P (x))
15347 print_reg (x, code, file);
15348
15349 else if (MEM_P (x))
15350 {
15351 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15352 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15353 && GET_MODE (x) != BLKmode)
15354 {
15355 const char * size;
15356 switch (GET_MODE_SIZE (GET_MODE (x)))
15357 {
15358 case 1: size = "BYTE"; break;
15359 case 2: size = "WORD"; break;
15360 case 4: size = "DWORD"; break;
15361 case 8: size = "QWORD"; break;
15362 case 12: size = "TBYTE"; break;
15363 case 16:
15364 if (GET_MODE (x) == XFmode)
15365 size = "TBYTE";
15366 else
15367 size = "XMMWORD";
15368 break;
15369 case 32: size = "YMMWORD"; break;
15370 case 64: size = "ZMMWORD"; break;
15371 default:
15372 gcc_unreachable ();
15373 }
15374
15375 /* Check for explicit size override (codes 'b', 'w', 'k',
15376 'q' and 'x') */
15377 if (code == 'b')
15378 size = "BYTE";
15379 else if (code == 'w')
15380 size = "WORD";
15381 else if (code == 'k')
15382 size = "DWORD";
15383 else if (code == 'q')
15384 size = "QWORD";
15385 else if (code == 'x')
15386 size = "XMMWORD";
15387
15388 fputs (size, file);
15389 fputs (" PTR ", file);
15390 }
15391
15392 x = XEXP (x, 0);
15393 /* Avoid (%rip) for call operands. */
15394 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15395 && !CONST_INT_P (x))
15396 output_addr_const (file, x);
15397 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15398 output_operand_lossage ("invalid constraints for operand");
15399 else
15400 output_address (x);
15401 }
15402
15403 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15404 {
15405 REAL_VALUE_TYPE r;
15406 long l;
15407
15408 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15409 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15410
15411 if (ASSEMBLER_DIALECT == ASM_ATT)
15412 putc ('$', file);
15413 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15414 if (code == 'q')
15415 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15416 (unsigned long long) (int) l);
15417 else
15418 fprintf (file, "0x%08x", (unsigned int) l);
15419 }
15420
15421 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15422 {
15423 REAL_VALUE_TYPE r;
15424 long l[2];
15425
15426 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15427 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15428
15429 if (ASSEMBLER_DIALECT == ASM_ATT)
15430 putc ('$', file);
15431 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15432 }
15433
15434 /* These float cases don't actually occur as immediate operands. */
15435 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15436 {
15437 char dstr[30];
15438
15439 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15440 fputs (dstr, file);
15441 }
15442
15443 else
15444 {
15445 /* We have patterns that allow zero sets of memory, for instance.
15446 In 64-bit mode, we should probably support all 8-byte vectors,
15447 since we can in fact encode that into an immediate. */
15448 if (GET_CODE (x) == CONST_VECTOR)
15449 {
15450 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15451 x = const0_rtx;
15452 }
15453
15454 if (code != 'P' && code != 'p')
15455 {
15456 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15457 {
15458 if (ASSEMBLER_DIALECT == ASM_ATT)
15459 putc ('$', file);
15460 }
15461 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15462 || GET_CODE (x) == LABEL_REF)
15463 {
15464 if (ASSEMBLER_DIALECT == ASM_ATT)
15465 putc ('$', file);
15466 else
15467 fputs ("OFFSET FLAT:", file);
15468 }
15469 }
15470 if (CONST_INT_P (x))
15471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15472 else if (flag_pic || MACHOPIC_INDIRECT)
15473 output_pic_addr_const (file, x, code);
15474 else
15475 output_addr_const (file, x);
15476 }
15477 }
15478
15479 static bool
15480 ix86_print_operand_punct_valid_p (unsigned char code)
15481 {
15482 return (code == '@' || code == '*' || code == '+' || code == '&'
15483 || code == ';' || code == '~' || code == '^');
15484 }
15485 \f
15486 /* Print a memory operand whose address is ADDR. */
15487
15488 static void
15489 ix86_print_operand_address (FILE *file, rtx addr)
15490 {
15491 struct ix86_address parts;
15492 rtx base, index, disp;
15493 int scale;
15494 int ok;
15495 bool vsib = false;
15496 int code = 0;
15497
15498 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15499 {
15500 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15501 gcc_assert (parts.index == NULL_RTX);
15502 parts.index = XVECEXP (addr, 0, 1);
15503 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15504 addr = XVECEXP (addr, 0, 0);
15505 vsib = true;
15506 }
15507 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15508 {
15509 gcc_assert (TARGET_64BIT);
15510 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15511 code = 'q';
15512 }
15513 else
15514 ok = ix86_decompose_address (addr, &parts);
15515
15516 gcc_assert (ok);
15517
15518 base = parts.base;
15519 index = parts.index;
15520 disp = parts.disp;
15521 scale = parts.scale;
15522
15523 switch (parts.seg)
15524 {
15525 case SEG_DEFAULT:
15526 break;
15527 case SEG_FS:
15528 case SEG_GS:
15529 if (ASSEMBLER_DIALECT == ASM_ATT)
15530 putc ('%', file);
15531 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15532 break;
15533 default:
15534 gcc_unreachable ();
15535 }
15536
15537 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15538 if (TARGET_64BIT && !base && !index)
15539 {
15540 rtx symbol = disp;
15541
15542 if (GET_CODE (disp) == CONST
15543 && GET_CODE (XEXP (disp, 0)) == PLUS
15544 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15545 symbol = XEXP (XEXP (disp, 0), 0);
15546
15547 if (GET_CODE (symbol) == LABEL_REF
15548 || (GET_CODE (symbol) == SYMBOL_REF
15549 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15550 base = pc_rtx;
15551 }
15552 if (!base && !index)
15553 {
15554 /* Displacement only requires special attention. */
15555
15556 if (CONST_INT_P (disp))
15557 {
15558 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15559 fputs ("ds:", file);
15560 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15561 }
15562 else if (flag_pic)
15563 output_pic_addr_const (file, disp, 0);
15564 else
15565 output_addr_const (file, disp);
15566 }
15567 else
15568 {
15569 /* Print SImode register names to force addr32 prefix. */
15570 if (SImode_address_operand (addr, VOIDmode))
15571 {
15572 #ifdef ENABLE_CHECKING
15573 gcc_assert (TARGET_64BIT);
15574 switch (GET_CODE (addr))
15575 {
15576 case SUBREG:
15577 gcc_assert (GET_MODE (addr) == SImode);
15578 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15579 break;
15580 case ZERO_EXTEND:
15581 case AND:
15582 gcc_assert (GET_MODE (addr) == DImode);
15583 break;
15584 default:
15585 gcc_unreachable ();
15586 }
15587 #endif
15588 gcc_assert (!code);
15589 code = 'k';
15590 }
15591 else if (code == 0
15592 && TARGET_X32
15593 && disp
15594 && CONST_INT_P (disp)
15595 && INTVAL (disp) < -16*1024*1024)
15596 {
15597 /* X32 runs in 64-bit mode, where displacement, DISP, in
15598 address DISP(%r64), is encoded as 32-bit immediate sign-
15599 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15600 address is %r64 + 0xffffffffbffffd00. When %r64 <
15601 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15602 which is invalid for x32. The correct address is %r64
15603 - 0x40000300 == 0xf7ffdd64. To properly encode
15604 -0x40000300(%r64) for x32, we zero-extend negative
15605 displacement by forcing addr32 prefix which truncates
15606 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15607 zero-extend all negative displacements, including -1(%rsp).
15608 However, for small negative displacements, sign-extension
15609 won't cause overflow. We only zero-extend negative
15610 displacements if they < -16*1024*1024, which is also used
15611 to check legitimate address displacements for PIC. */
15612 code = 'k';
15613 }
15614
15615 if (ASSEMBLER_DIALECT == ASM_ATT)
15616 {
15617 if (disp)
15618 {
15619 if (flag_pic)
15620 output_pic_addr_const (file, disp, 0);
15621 else if (GET_CODE (disp) == LABEL_REF)
15622 output_asm_label (disp);
15623 else
15624 output_addr_const (file, disp);
15625 }
15626
15627 putc ('(', file);
15628 if (base)
15629 print_reg (base, code, file);
15630 if (index)
15631 {
15632 putc (',', file);
15633 print_reg (index, vsib ? 0 : code, file);
15634 if (scale != 1 || vsib)
15635 fprintf (file, ",%d", scale);
15636 }
15637 putc (')', file);
15638 }
15639 else
15640 {
15641 rtx offset = NULL_RTX;
15642
15643 if (disp)
15644 {
15645 /* Pull out the offset of a symbol; print any symbol itself. */
15646 if (GET_CODE (disp) == CONST
15647 && GET_CODE (XEXP (disp, 0)) == PLUS
15648 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15649 {
15650 offset = XEXP (XEXP (disp, 0), 1);
15651 disp = gen_rtx_CONST (VOIDmode,
15652 XEXP (XEXP (disp, 0), 0));
15653 }
15654
15655 if (flag_pic)
15656 output_pic_addr_const (file, disp, 0);
15657 else if (GET_CODE (disp) == LABEL_REF)
15658 output_asm_label (disp);
15659 else if (CONST_INT_P (disp))
15660 offset = disp;
15661 else
15662 output_addr_const (file, disp);
15663 }
15664
15665 putc ('[', file);
15666 if (base)
15667 {
15668 print_reg (base, code, file);
15669 if (offset)
15670 {
15671 if (INTVAL (offset) >= 0)
15672 putc ('+', file);
15673 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15674 }
15675 }
15676 else if (offset)
15677 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15678 else
15679 putc ('0', file);
15680
15681 if (index)
15682 {
15683 putc ('+', file);
15684 print_reg (index, vsib ? 0 : code, file);
15685 if (scale != 1 || vsib)
15686 fprintf (file, "*%d", scale);
15687 }
15688 putc (']', file);
15689 }
15690 }
15691 }
15692
15693 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15694
15695 static bool
15696 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15697 {
15698 rtx op;
15699
15700 if (GET_CODE (x) != UNSPEC)
15701 return false;
15702
15703 op = XVECEXP (x, 0, 0);
15704 switch (XINT (x, 1))
15705 {
15706 case UNSPEC_GOTTPOFF:
15707 output_addr_const (file, op);
15708 /* FIXME: This might be @TPOFF in Sun ld. */
15709 fputs ("@gottpoff", file);
15710 break;
15711 case UNSPEC_TPOFF:
15712 output_addr_const (file, op);
15713 fputs ("@tpoff", file);
15714 break;
15715 case UNSPEC_NTPOFF:
15716 output_addr_const (file, op);
15717 if (TARGET_64BIT)
15718 fputs ("@tpoff", file);
15719 else
15720 fputs ("@ntpoff", file);
15721 break;
15722 case UNSPEC_DTPOFF:
15723 output_addr_const (file, op);
15724 fputs ("@dtpoff", file);
15725 break;
15726 case UNSPEC_GOTNTPOFF:
15727 output_addr_const (file, op);
15728 if (TARGET_64BIT)
15729 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15730 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15731 else
15732 fputs ("@gotntpoff", file);
15733 break;
15734 case UNSPEC_INDNTPOFF:
15735 output_addr_const (file, op);
15736 fputs ("@indntpoff", file);
15737 break;
15738 #if TARGET_MACHO
15739 case UNSPEC_MACHOPIC_OFFSET:
15740 output_addr_const (file, op);
15741 putc ('-', file);
15742 machopic_output_function_base_name (file);
15743 break;
15744 #endif
15745
15746 case UNSPEC_STACK_CHECK:
15747 {
15748 int offset;
15749
15750 gcc_assert (flag_split_stack);
15751
15752 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15753 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15754 #else
15755 gcc_unreachable ();
15756 #endif
15757
15758 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15759 }
15760 break;
15761
15762 default:
15763 return false;
15764 }
15765
15766 return true;
15767 }
15768 \f
15769 /* Split one or more double-mode RTL references into pairs of half-mode
15770 references. The RTL can be REG, offsettable MEM, integer constant, or
15771 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15772 split and "num" is its length. lo_half and hi_half are output arrays
15773 that parallel "operands". */
15774
15775 void
15776 split_double_mode (enum machine_mode mode, rtx operands[],
15777 int num, rtx lo_half[], rtx hi_half[])
15778 {
15779 enum machine_mode half_mode;
15780 unsigned int byte;
15781
15782 switch (mode)
15783 {
15784 case TImode:
15785 half_mode = DImode;
15786 break;
15787 case DImode:
15788 half_mode = SImode;
15789 break;
15790 default:
15791 gcc_unreachable ();
15792 }
15793
15794 byte = GET_MODE_SIZE (half_mode);
15795
15796 while (num--)
15797 {
15798 rtx op = operands[num];
15799
15800 /* simplify_subreg refuse to split volatile memory addresses,
15801 but we still have to handle it. */
15802 if (MEM_P (op))
15803 {
15804 lo_half[num] = adjust_address (op, half_mode, 0);
15805 hi_half[num] = adjust_address (op, half_mode, byte);
15806 }
15807 else
15808 {
15809 lo_half[num] = simplify_gen_subreg (half_mode, op,
15810 GET_MODE (op) == VOIDmode
15811 ? mode : GET_MODE (op), 0);
15812 hi_half[num] = simplify_gen_subreg (half_mode, op,
15813 GET_MODE (op) == VOIDmode
15814 ? mode : GET_MODE (op), byte);
15815 }
15816 }
15817 }
15818 \f
15819 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15820 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15821 is the expression of the binary operation. The output may either be
15822 emitted here, or returned to the caller, like all output_* functions.
15823
15824 There is no guarantee that the operands are the same mode, as they
15825 might be within FLOAT or FLOAT_EXTEND expressions. */
15826
15827 #ifndef SYSV386_COMPAT
15828 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15829 wants to fix the assemblers because that causes incompatibility
15830 with gcc. No-one wants to fix gcc because that causes
15831 incompatibility with assemblers... You can use the option of
15832 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15833 #define SYSV386_COMPAT 1
15834 #endif
15835
15836 const char *
15837 output_387_binary_op (rtx insn, rtx *operands)
15838 {
15839 static char buf[40];
15840 const char *p;
15841 const char *ssep;
15842 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15843
15844 #ifdef ENABLE_CHECKING
15845 /* Even if we do not want to check the inputs, this documents input
15846 constraints. Which helps in understanding the following code. */
15847 if (STACK_REG_P (operands[0])
15848 && ((REG_P (operands[1])
15849 && REGNO (operands[0]) == REGNO (operands[1])
15850 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15851 || (REG_P (operands[2])
15852 && REGNO (operands[0]) == REGNO (operands[2])
15853 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15854 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15855 ; /* ok */
15856 else
15857 gcc_assert (is_sse);
15858 #endif
15859
15860 switch (GET_CODE (operands[3]))
15861 {
15862 case PLUS:
15863 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15864 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15865 p = "fiadd";
15866 else
15867 p = "fadd";
15868 ssep = "vadd";
15869 break;
15870
15871 case MINUS:
15872 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15873 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15874 p = "fisub";
15875 else
15876 p = "fsub";
15877 ssep = "vsub";
15878 break;
15879
15880 case MULT:
15881 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15882 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15883 p = "fimul";
15884 else
15885 p = "fmul";
15886 ssep = "vmul";
15887 break;
15888
15889 case DIV:
15890 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15891 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15892 p = "fidiv";
15893 else
15894 p = "fdiv";
15895 ssep = "vdiv";
15896 break;
15897
15898 default:
15899 gcc_unreachable ();
15900 }
15901
15902 if (is_sse)
15903 {
15904 if (TARGET_AVX)
15905 {
15906 strcpy (buf, ssep);
15907 if (GET_MODE (operands[0]) == SFmode)
15908 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15909 else
15910 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15911 }
15912 else
15913 {
15914 strcpy (buf, ssep + 1);
15915 if (GET_MODE (operands[0]) == SFmode)
15916 strcat (buf, "ss\t{%2, %0|%0, %2}");
15917 else
15918 strcat (buf, "sd\t{%2, %0|%0, %2}");
15919 }
15920 return buf;
15921 }
15922 strcpy (buf, p);
15923
15924 switch (GET_CODE (operands[3]))
15925 {
15926 case MULT:
15927 case PLUS:
15928 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15929 {
15930 rtx temp = operands[2];
15931 operands[2] = operands[1];
15932 operands[1] = temp;
15933 }
15934
15935 /* know operands[0] == operands[1]. */
15936
15937 if (MEM_P (operands[2]))
15938 {
15939 p = "%Z2\t%2";
15940 break;
15941 }
15942
15943 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15944 {
15945 if (STACK_TOP_P (operands[0]))
15946 /* How is it that we are storing to a dead operand[2]?
15947 Well, presumably operands[1] is dead too. We can't
15948 store the result to st(0) as st(0) gets popped on this
15949 instruction. Instead store to operands[2] (which I
15950 think has to be st(1)). st(1) will be popped later.
15951 gcc <= 2.8.1 didn't have this check and generated
15952 assembly code that the Unixware assembler rejected. */
15953 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15954 else
15955 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15956 break;
15957 }
15958
15959 if (STACK_TOP_P (operands[0]))
15960 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15961 else
15962 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15963 break;
15964
15965 case MINUS:
15966 case DIV:
15967 if (MEM_P (operands[1]))
15968 {
15969 p = "r%Z1\t%1";
15970 break;
15971 }
15972
15973 if (MEM_P (operands[2]))
15974 {
15975 p = "%Z2\t%2";
15976 break;
15977 }
15978
15979 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15980 {
15981 #if SYSV386_COMPAT
15982 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15983 derived assemblers, confusingly reverse the direction of
15984 the operation for fsub{r} and fdiv{r} when the
15985 destination register is not st(0). The Intel assembler
15986 doesn't have this brain damage. Read !SYSV386_COMPAT to
15987 figure out what the hardware really does. */
15988 if (STACK_TOP_P (operands[0]))
15989 p = "{p\t%0, %2|rp\t%2, %0}";
15990 else
15991 p = "{rp\t%2, %0|p\t%0, %2}";
15992 #else
15993 if (STACK_TOP_P (operands[0]))
15994 /* As above for fmul/fadd, we can't store to st(0). */
15995 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15996 else
15997 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15998 #endif
15999 break;
16000 }
16001
16002 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16003 {
16004 #if SYSV386_COMPAT
16005 if (STACK_TOP_P (operands[0]))
16006 p = "{rp\t%0, %1|p\t%1, %0}";
16007 else
16008 p = "{p\t%1, %0|rp\t%0, %1}";
16009 #else
16010 if (STACK_TOP_P (operands[0]))
16011 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16012 else
16013 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16014 #endif
16015 break;
16016 }
16017
16018 if (STACK_TOP_P (operands[0]))
16019 {
16020 if (STACK_TOP_P (operands[1]))
16021 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16022 else
16023 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16024 break;
16025 }
16026 else if (STACK_TOP_P (operands[1]))
16027 {
16028 #if SYSV386_COMPAT
16029 p = "{\t%1, %0|r\t%0, %1}";
16030 #else
16031 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16032 #endif
16033 }
16034 else
16035 {
16036 #if SYSV386_COMPAT
16037 p = "{r\t%2, %0|\t%0, %2}";
16038 #else
16039 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16040 #endif
16041 }
16042 break;
16043
16044 default:
16045 gcc_unreachable ();
16046 }
16047
16048 strcat (buf, p);
16049 return buf;
16050 }
16051
16052 /* Check if a 256bit AVX register is referenced inside of EXP. */
16053
16054 static int
16055 ix86_check_avx256_register (rtx *pexp, void *)
16056 {
16057 rtx exp = *pexp;
16058
16059 if (GET_CODE (exp) == SUBREG)
16060 exp = SUBREG_REG (exp);
16061
16062 if (REG_P (exp)
16063 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16064 return 1;
16065
16066 return 0;
16067 }
16068
16069 /* Return needed mode for entity in optimize_mode_switching pass. */
16070
16071 static int
16072 ix86_avx_u128_mode_needed (rtx_insn *insn)
16073 {
16074 if (CALL_P (insn))
16075 {
16076 rtx link;
16077
16078 /* Needed mode is set to AVX_U128_CLEAN if there are
16079 no 256bit modes used in function arguments. */
16080 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16081 link;
16082 link = XEXP (link, 1))
16083 {
16084 if (GET_CODE (XEXP (link, 0)) == USE)
16085 {
16086 rtx arg = XEXP (XEXP (link, 0), 0);
16087
16088 if (ix86_check_avx256_register (&arg, NULL))
16089 return AVX_U128_DIRTY;
16090 }
16091 }
16092
16093 return AVX_U128_CLEAN;
16094 }
16095
16096 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16097 changes state only when a 256bit register is written to, but we need
16098 to prevent the compiler from moving optimal insertion point above
16099 eventual read from 256bit register. */
16100 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16101 return AVX_U128_DIRTY;
16102
16103 return AVX_U128_ANY;
16104 }
16105
16106 /* Return mode that i387 must be switched into
16107 prior to the execution of insn. */
16108
16109 static int
16110 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16111 {
16112 enum attr_i387_cw mode;
16113
16114 /* The mode UNINITIALIZED is used to store control word after a
16115 function call or ASM pattern. The mode ANY specify that function
16116 has no requirements on the control word and make no changes in the
16117 bits we are interested in. */
16118
16119 if (CALL_P (insn)
16120 || (NONJUMP_INSN_P (insn)
16121 && (asm_noperands (PATTERN (insn)) >= 0
16122 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16123 return I387_CW_UNINITIALIZED;
16124
16125 if (recog_memoized (insn) < 0)
16126 return I387_CW_ANY;
16127
16128 mode = get_attr_i387_cw (insn);
16129
16130 switch (entity)
16131 {
16132 case I387_TRUNC:
16133 if (mode == I387_CW_TRUNC)
16134 return mode;
16135 break;
16136
16137 case I387_FLOOR:
16138 if (mode == I387_CW_FLOOR)
16139 return mode;
16140 break;
16141
16142 case I387_CEIL:
16143 if (mode == I387_CW_CEIL)
16144 return mode;
16145 break;
16146
16147 case I387_MASK_PM:
16148 if (mode == I387_CW_MASK_PM)
16149 return mode;
16150 break;
16151
16152 default:
16153 gcc_unreachable ();
16154 }
16155
16156 return I387_CW_ANY;
16157 }
16158
16159 /* Return mode that entity must be switched into
16160 prior to the execution of insn. */
16161
16162 static int
16163 ix86_mode_needed (int entity, rtx_insn *insn)
16164 {
16165 switch (entity)
16166 {
16167 case AVX_U128:
16168 return ix86_avx_u128_mode_needed (insn);
16169 case I387_TRUNC:
16170 case I387_FLOOR:
16171 case I387_CEIL:
16172 case I387_MASK_PM:
16173 return ix86_i387_mode_needed (entity, insn);
16174 default:
16175 gcc_unreachable ();
16176 }
16177 return 0;
16178 }
16179
16180 /* Check if a 256bit AVX register is referenced in stores. */
16181
16182 static void
16183 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16184 {
16185 if (ix86_check_avx256_register (&dest, NULL))
16186 {
16187 bool *used = (bool *) data;
16188 *used = true;
16189 }
16190 }
16191
16192 /* Calculate mode of upper 128bit AVX registers after the insn. */
16193
16194 static int
16195 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16196 {
16197 rtx pat = PATTERN (insn);
16198
16199 if (vzeroupper_operation (pat, VOIDmode)
16200 || vzeroall_operation (pat, VOIDmode))
16201 return AVX_U128_CLEAN;
16202
16203 /* We know that state is clean after CALL insn if there are no
16204 256bit registers used in the function return register. */
16205 if (CALL_P (insn))
16206 {
16207 bool avx_reg256_found = false;
16208 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16209
16210 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16211 }
16212
16213 /* Otherwise, return current mode. Remember that if insn
16214 references AVX 256bit registers, the mode was already changed
16215 to DIRTY from MODE_NEEDED. */
16216 return mode;
16217 }
16218
16219 /* Return the mode that an insn results in. */
16220
16221 int
16222 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16223 {
16224 switch (entity)
16225 {
16226 case AVX_U128:
16227 return ix86_avx_u128_mode_after (mode, insn);
16228 case I387_TRUNC:
16229 case I387_FLOOR:
16230 case I387_CEIL:
16231 case I387_MASK_PM:
16232 return mode;
16233 default:
16234 gcc_unreachable ();
16235 }
16236 }
16237
16238 static int
16239 ix86_avx_u128_mode_entry (void)
16240 {
16241 tree arg;
16242
16243 /* Entry mode is set to AVX_U128_DIRTY if there are
16244 256bit modes used in function arguments. */
16245 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16246 arg = TREE_CHAIN (arg))
16247 {
16248 rtx incoming = DECL_INCOMING_RTL (arg);
16249
16250 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16251 return AVX_U128_DIRTY;
16252 }
16253
16254 return AVX_U128_CLEAN;
16255 }
16256
16257 /* Return a mode that ENTITY is assumed to be
16258 switched to at function entry. */
16259
16260 static int
16261 ix86_mode_entry (int entity)
16262 {
16263 switch (entity)
16264 {
16265 case AVX_U128:
16266 return ix86_avx_u128_mode_entry ();
16267 case I387_TRUNC:
16268 case I387_FLOOR:
16269 case I387_CEIL:
16270 case I387_MASK_PM:
16271 return I387_CW_ANY;
16272 default:
16273 gcc_unreachable ();
16274 }
16275 }
16276
16277 static int
16278 ix86_avx_u128_mode_exit (void)
16279 {
16280 rtx reg = crtl->return_rtx;
16281
16282 /* Exit mode is set to AVX_U128_DIRTY if there are
16283 256bit modes used in the function return register. */
16284 if (reg && ix86_check_avx256_register (&reg, NULL))
16285 return AVX_U128_DIRTY;
16286
16287 return AVX_U128_CLEAN;
16288 }
16289
16290 /* Return a mode that ENTITY is assumed to be
16291 switched to at function exit. */
16292
16293 static int
16294 ix86_mode_exit (int entity)
16295 {
16296 switch (entity)
16297 {
16298 case AVX_U128:
16299 return ix86_avx_u128_mode_exit ();
16300 case I387_TRUNC:
16301 case I387_FLOOR:
16302 case I387_CEIL:
16303 case I387_MASK_PM:
16304 return I387_CW_ANY;
16305 default:
16306 gcc_unreachable ();
16307 }
16308 }
16309
16310 static int
16311 ix86_mode_priority (int, int n)
16312 {
16313 return n;
16314 }
16315
16316 /* Output code to initialize control word copies used by trunc?f?i and
16317 rounding patterns. CURRENT_MODE is set to current control word,
16318 while NEW_MODE is set to new control word. */
16319
16320 static void
16321 emit_i387_cw_initialization (int mode)
16322 {
16323 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16324 rtx new_mode;
16325
16326 enum ix86_stack_slot slot;
16327
16328 rtx reg = gen_reg_rtx (HImode);
16329
16330 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16331 emit_move_insn (reg, copy_rtx (stored_mode));
16332
16333 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16334 || optimize_insn_for_size_p ())
16335 {
16336 switch (mode)
16337 {
16338 case I387_CW_TRUNC:
16339 /* round toward zero (truncate) */
16340 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16341 slot = SLOT_CW_TRUNC;
16342 break;
16343
16344 case I387_CW_FLOOR:
16345 /* round down toward -oo */
16346 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16348 slot = SLOT_CW_FLOOR;
16349 break;
16350
16351 case I387_CW_CEIL:
16352 /* round up toward +oo */
16353 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16355 slot = SLOT_CW_CEIL;
16356 break;
16357
16358 case I387_CW_MASK_PM:
16359 /* mask precision exception for nearbyint() */
16360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16361 slot = SLOT_CW_MASK_PM;
16362 break;
16363
16364 default:
16365 gcc_unreachable ();
16366 }
16367 }
16368 else
16369 {
16370 switch (mode)
16371 {
16372 case I387_CW_TRUNC:
16373 /* round toward zero (truncate) */
16374 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16375 slot = SLOT_CW_TRUNC;
16376 break;
16377
16378 case I387_CW_FLOOR:
16379 /* round down toward -oo */
16380 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16381 slot = SLOT_CW_FLOOR;
16382 break;
16383
16384 case I387_CW_CEIL:
16385 /* round up toward +oo */
16386 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16387 slot = SLOT_CW_CEIL;
16388 break;
16389
16390 case I387_CW_MASK_PM:
16391 /* mask precision exception for nearbyint() */
16392 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16393 slot = SLOT_CW_MASK_PM;
16394 break;
16395
16396 default:
16397 gcc_unreachable ();
16398 }
16399 }
16400
16401 gcc_assert (slot < MAX_386_STACK_LOCALS);
16402
16403 new_mode = assign_386_stack_local (HImode, slot);
16404 emit_move_insn (new_mode, reg);
16405 }
16406
16407 /* Emit vzeroupper. */
16408
16409 void
16410 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16411 {
16412 int i;
16413
16414 /* Cancel automatic vzeroupper insertion if there are
16415 live call-saved SSE registers at the insertion point. */
16416
16417 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16418 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16419 return;
16420
16421 if (TARGET_64BIT)
16422 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16423 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16424 return;
16425
16426 emit_insn (gen_avx_vzeroupper ());
16427 }
16428
16429 /* Generate one or more insns to set ENTITY to MODE. */
16430
16431 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16432 is the set of hard registers live at the point where the insn(s)
16433 are to be inserted. */
16434
16435 static void
16436 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16437 HARD_REG_SET regs_live)
16438 {
16439 switch (entity)
16440 {
16441 case AVX_U128:
16442 if (mode == AVX_U128_CLEAN)
16443 ix86_avx_emit_vzeroupper (regs_live);
16444 break;
16445 case I387_TRUNC:
16446 case I387_FLOOR:
16447 case I387_CEIL:
16448 case I387_MASK_PM:
16449 if (mode != I387_CW_ANY
16450 && mode != I387_CW_UNINITIALIZED)
16451 emit_i387_cw_initialization (mode);
16452 break;
16453 default:
16454 gcc_unreachable ();
16455 }
16456 }
16457
16458 /* Output code for INSN to convert a float to a signed int. OPERANDS
16459 are the insn operands. The output may be [HSD]Imode and the input
16460 operand may be [SDX]Fmode. */
16461
16462 const char *
16463 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16464 {
16465 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16466 int dimode_p = GET_MODE (operands[0]) == DImode;
16467 int round_mode = get_attr_i387_cw (insn);
16468
16469 /* Jump through a hoop or two for DImode, since the hardware has no
16470 non-popping instruction. We used to do this a different way, but
16471 that was somewhat fragile and broke with post-reload splitters. */
16472 if ((dimode_p || fisttp) && !stack_top_dies)
16473 output_asm_insn ("fld\t%y1", operands);
16474
16475 gcc_assert (STACK_TOP_P (operands[1]));
16476 gcc_assert (MEM_P (operands[0]));
16477 gcc_assert (GET_MODE (operands[1]) != TFmode);
16478
16479 if (fisttp)
16480 output_asm_insn ("fisttp%Z0\t%0", operands);
16481 else
16482 {
16483 if (round_mode != I387_CW_ANY)
16484 output_asm_insn ("fldcw\t%3", operands);
16485 if (stack_top_dies || dimode_p)
16486 output_asm_insn ("fistp%Z0\t%0", operands);
16487 else
16488 output_asm_insn ("fist%Z0\t%0", operands);
16489 if (round_mode != I387_CW_ANY)
16490 output_asm_insn ("fldcw\t%2", operands);
16491 }
16492
16493 return "";
16494 }
16495
16496 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16497 have the values zero or one, indicates the ffreep insn's operand
16498 from the OPERANDS array. */
16499
16500 static const char *
16501 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16502 {
16503 if (TARGET_USE_FFREEP)
16504 #ifdef HAVE_AS_IX86_FFREEP
16505 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16506 #else
16507 {
16508 static char retval[32];
16509 int regno = REGNO (operands[opno]);
16510
16511 gcc_assert (STACK_REGNO_P (regno));
16512
16513 regno -= FIRST_STACK_REG;
16514
16515 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16516 return retval;
16517 }
16518 #endif
16519
16520 return opno ? "fstp\t%y1" : "fstp\t%y0";
16521 }
16522
16523
16524 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16525 should be used. UNORDERED_P is true when fucom should be used. */
16526
16527 const char *
16528 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16529 {
16530 int stack_top_dies;
16531 rtx cmp_op0, cmp_op1;
16532 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16533
16534 if (eflags_p)
16535 {
16536 cmp_op0 = operands[0];
16537 cmp_op1 = operands[1];
16538 }
16539 else
16540 {
16541 cmp_op0 = operands[1];
16542 cmp_op1 = operands[2];
16543 }
16544
16545 if (is_sse)
16546 {
16547 if (GET_MODE (operands[0]) == SFmode)
16548 if (unordered_p)
16549 return "%vucomiss\t{%1, %0|%0, %1}";
16550 else
16551 return "%vcomiss\t{%1, %0|%0, %1}";
16552 else
16553 if (unordered_p)
16554 return "%vucomisd\t{%1, %0|%0, %1}";
16555 else
16556 return "%vcomisd\t{%1, %0|%0, %1}";
16557 }
16558
16559 gcc_assert (STACK_TOP_P (cmp_op0));
16560
16561 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16562
16563 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16564 {
16565 if (stack_top_dies)
16566 {
16567 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16568 return output_387_ffreep (operands, 1);
16569 }
16570 else
16571 return "ftst\n\tfnstsw\t%0";
16572 }
16573
16574 if (STACK_REG_P (cmp_op1)
16575 && stack_top_dies
16576 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16577 && REGNO (cmp_op1) != FIRST_STACK_REG)
16578 {
16579 /* If both the top of the 387 stack dies, and the other operand
16580 is also a stack register that dies, then this must be a
16581 `fcompp' float compare */
16582
16583 if (eflags_p)
16584 {
16585 /* There is no double popping fcomi variant. Fortunately,
16586 eflags is immune from the fstp's cc clobbering. */
16587 if (unordered_p)
16588 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16589 else
16590 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16591 return output_387_ffreep (operands, 0);
16592 }
16593 else
16594 {
16595 if (unordered_p)
16596 return "fucompp\n\tfnstsw\t%0";
16597 else
16598 return "fcompp\n\tfnstsw\t%0";
16599 }
16600 }
16601 else
16602 {
16603 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16604
16605 static const char * const alt[16] =
16606 {
16607 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16608 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16609 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16610 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16611
16612 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16613 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16614 NULL,
16615 NULL,
16616
16617 "fcomi\t{%y1, %0|%0, %y1}",
16618 "fcomip\t{%y1, %0|%0, %y1}",
16619 "fucomi\t{%y1, %0|%0, %y1}",
16620 "fucomip\t{%y1, %0|%0, %y1}",
16621
16622 NULL,
16623 NULL,
16624 NULL,
16625 NULL
16626 };
16627
16628 int mask;
16629 const char *ret;
16630
16631 mask = eflags_p << 3;
16632 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16633 mask |= unordered_p << 1;
16634 mask |= stack_top_dies;
16635
16636 gcc_assert (mask < 16);
16637 ret = alt[mask];
16638 gcc_assert (ret);
16639
16640 return ret;
16641 }
16642 }
16643
16644 void
16645 ix86_output_addr_vec_elt (FILE *file, int value)
16646 {
16647 const char *directive = ASM_LONG;
16648
16649 #ifdef ASM_QUAD
16650 if (TARGET_LP64)
16651 directive = ASM_QUAD;
16652 #else
16653 gcc_assert (!TARGET_64BIT);
16654 #endif
16655
16656 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16657 }
16658
16659 void
16660 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16661 {
16662 const char *directive = ASM_LONG;
16663
16664 #ifdef ASM_QUAD
16665 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16666 directive = ASM_QUAD;
16667 #else
16668 gcc_assert (!TARGET_64BIT);
16669 #endif
16670 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16671 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16672 fprintf (file, "%s%s%d-%s%d\n",
16673 directive, LPREFIX, value, LPREFIX, rel);
16674 else if (HAVE_AS_GOTOFF_IN_DATA)
16675 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16676 #if TARGET_MACHO
16677 else if (TARGET_MACHO)
16678 {
16679 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16680 machopic_output_function_base_name (file);
16681 putc ('\n', file);
16682 }
16683 #endif
16684 else
16685 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16686 GOT_SYMBOL_NAME, LPREFIX, value);
16687 }
16688 \f
16689 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16690 for the target. */
16691
16692 void
16693 ix86_expand_clear (rtx dest)
16694 {
16695 rtx tmp;
16696
16697 /* We play register width games, which are only valid after reload. */
16698 gcc_assert (reload_completed);
16699
16700 /* Avoid HImode and its attendant prefix byte. */
16701 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16702 dest = gen_rtx_REG (SImode, REGNO (dest));
16703 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16704
16705 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16706 {
16707 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16708 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16709 }
16710
16711 emit_insn (tmp);
16712 }
16713
16714 /* X is an unchanging MEM. If it is a constant pool reference, return
16715 the constant pool rtx, else NULL. */
16716
16717 rtx
16718 maybe_get_pool_constant (rtx x)
16719 {
16720 x = ix86_delegitimize_address (XEXP (x, 0));
16721
16722 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16723 return get_pool_constant (x);
16724
16725 return NULL_RTX;
16726 }
16727
16728 void
16729 ix86_expand_move (enum machine_mode mode, rtx operands[])
16730 {
16731 rtx op0, op1;
16732 enum tls_model model;
16733
16734 op0 = operands[0];
16735 op1 = operands[1];
16736
16737 if (GET_CODE (op1) == SYMBOL_REF)
16738 {
16739 rtx tmp;
16740
16741 model = SYMBOL_REF_TLS_MODEL (op1);
16742 if (model)
16743 {
16744 op1 = legitimize_tls_address (op1, model, true);
16745 op1 = force_operand (op1, op0);
16746 if (op1 == op0)
16747 return;
16748 op1 = convert_to_mode (mode, op1, 1);
16749 }
16750 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16751 op1 = tmp;
16752 }
16753 else if (GET_CODE (op1) == CONST
16754 && GET_CODE (XEXP (op1, 0)) == PLUS
16755 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16756 {
16757 rtx addend = XEXP (XEXP (op1, 0), 1);
16758 rtx symbol = XEXP (XEXP (op1, 0), 0);
16759 rtx tmp;
16760
16761 model = SYMBOL_REF_TLS_MODEL (symbol);
16762 if (model)
16763 tmp = legitimize_tls_address (symbol, model, true);
16764 else
16765 tmp = legitimize_pe_coff_symbol (symbol, true);
16766
16767 if (tmp)
16768 {
16769 tmp = force_operand (tmp, NULL);
16770 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16771 op0, 1, OPTAB_DIRECT);
16772 if (tmp == op0)
16773 return;
16774 op1 = convert_to_mode (mode, tmp, 1);
16775 }
16776 }
16777
16778 if ((flag_pic || MACHOPIC_INDIRECT)
16779 && symbolic_operand (op1, mode))
16780 {
16781 if (TARGET_MACHO && !TARGET_64BIT)
16782 {
16783 #if TARGET_MACHO
16784 /* dynamic-no-pic */
16785 if (MACHOPIC_INDIRECT)
16786 {
16787 rtx temp = ((reload_in_progress
16788 || ((op0 && REG_P (op0))
16789 && mode == Pmode))
16790 ? op0 : gen_reg_rtx (Pmode));
16791 op1 = machopic_indirect_data_reference (op1, temp);
16792 if (MACHOPIC_PURE)
16793 op1 = machopic_legitimize_pic_address (op1, mode,
16794 temp == op1 ? 0 : temp);
16795 }
16796 if (op0 != op1 && GET_CODE (op0) != MEM)
16797 {
16798 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16799 emit_insn (insn);
16800 return;
16801 }
16802 if (GET_CODE (op0) == MEM)
16803 op1 = force_reg (Pmode, op1);
16804 else
16805 {
16806 rtx temp = op0;
16807 if (GET_CODE (temp) != REG)
16808 temp = gen_reg_rtx (Pmode);
16809 temp = legitimize_pic_address (op1, temp);
16810 if (temp == op0)
16811 return;
16812 op1 = temp;
16813 }
16814 /* dynamic-no-pic */
16815 #endif
16816 }
16817 else
16818 {
16819 if (MEM_P (op0))
16820 op1 = force_reg (mode, op1);
16821 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16822 {
16823 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16824 op1 = legitimize_pic_address (op1, reg);
16825 if (op0 == op1)
16826 return;
16827 op1 = convert_to_mode (mode, op1, 1);
16828 }
16829 }
16830 }
16831 else
16832 {
16833 if (MEM_P (op0)
16834 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16835 || !push_operand (op0, mode))
16836 && MEM_P (op1))
16837 op1 = force_reg (mode, op1);
16838
16839 if (push_operand (op0, mode)
16840 && ! general_no_elim_operand (op1, mode))
16841 op1 = copy_to_mode_reg (mode, op1);
16842
16843 /* Force large constants in 64bit compilation into register
16844 to get them CSEed. */
16845 if (can_create_pseudo_p ()
16846 && (mode == DImode) && TARGET_64BIT
16847 && immediate_operand (op1, mode)
16848 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16849 && !register_operand (op0, mode)
16850 && optimize)
16851 op1 = copy_to_mode_reg (mode, op1);
16852
16853 if (can_create_pseudo_p ()
16854 && FLOAT_MODE_P (mode)
16855 && GET_CODE (op1) == CONST_DOUBLE)
16856 {
16857 /* If we are loading a floating point constant to a register,
16858 force the value to memory now, since we'll get better code
16859 out the back end. */
16860
16861 op1 = validize_mem (force_const_mem (mode, op1));
16862 if (!register_operand (op0, mode))
16863 {
16864 rtx temp = gen_reg_rtx (mode);
16865 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16866 emit_move_insn (op0, temp);
16867 return;
16868 }
16869 }
16870 }
16871
16872 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16873 }
16874
16875 void
16876 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16877 {
16878 rtx op0 = operands[0], op1 = operands[1];
16879 unsigned int align = GET_MODE_ALIGNMENT (mode);
16880
16881 if (push_operand (op0, VOIDmode))
16882 op0 = emit_move_resolve_push (mode, op0);
16883
16884 /* Force constants other than zero into memory. We do not know how
16885 the instructions used to build constants modify the upper 64 bits
16886 of the register, once we have that information we may be able
16887 to handle some of them more efficiently. */
16888 if (can_create_pseudo_p ()
16889 && register_operand (op0, mode)
16890 && (CONSTANT_P (op1)
16891 || (GET_CODE (op1) == SUBREG
16892 && CONSTANT_P (SUBREG_REG (op1))))
16893 && !standard_sse_constant_p (op1))
16894 op1 = validize_mem (force_const_mem (mode, op1));
16895
16896 /* We need to check memory alignment for SSE mode since attribute
16897 can make operands unaligned. */
16898 if (can_create_pseudo_p ()
16899 && SSE_REG_MODE_P (mode)
16900 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16901 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16902 {
16903 rtx tmp[2];
16904
16905 /* ix86_expand_vector_move_misalign() does not like constants ... */
16906 if (CONSTANT_P (op1)
16907 || (GET_CODE (op1) == SUBREG
16908 && CONSTANT_P (SUBREG_REG (op1))))
16909 op1 = validize_mem (force_const_mem (mode, op1));
16910
16911 /* ... nor both arguments in memory. */
16912 if (!register_operand (op0, mode)
16913 && !register_operand (op1, mode))
16914 op1 = force_reg (mode, op1);
16915
16916 tmp[0] = op0; tmp[1] = op1;
16917 ix86_expand_vector_move_misalign (mode, tmp);
16918 return;
16919 }
16920
16921 /* Make operand1 a register if it isn't already. */
16922 if (can_create_pseudo_p ()
16923 && !register_operand (op0, mode)
16924 && !register_operand (op1, mode))
16925 {
16926 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16927 return;
16928 }
16929
16930 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16931 }
16932
16933 /* Split 32-byte AVX unaligned load and store if needed. */
16934
16935 static void
16936 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16937 {
16938 rtx m;
16939 rtx (*extract) (rtx, rtx, rtx);
16940 rtx (*load_unaligned) (rtx, rtx);
16941 rtx (*store_unaligned) (rtx, rtx);
16942 enum machine_mode mode;
16943
16944 switch (GET_MODE (op0))
16945 {
16946 default:
16947 gcc_unreachable ();
16948 case V32QImode:
16949 extract = gen_avx_vextractf128v32qi;
16950 load_unaligned = gen_avx_loaddquv32qi;
16951 store_unaligned = gen_avx_storedquv32qi;
16952 mode = V16QImode;
16953 break;
16954 case V8SFmode:
16955 extract = gen_avx_vextractf128v8sf;
16956 load_unaligned = gen_avx_loadups256;
16957 store_unaligned = gen_avx_storeups256;
16958 mode = V4SFmode;
16959 break;
16960 case V4DFmode:
16961 extract = gen_avx_vextractf128v4df;
16962 load_unaligned = gen_avx_loadupd256;
16963 store_unaligned = gen_avx_storeupd256;
16964 mode = V2DFmode;
16965 break;
16966 }
16967
16968 if (MEM_P (op1))
16969 {
16970 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16971 {
16972 rtx r = gen_reg_rtx (mode);
16973 m = adjust_address (op1, mode, 0);
16974 emit_move_insn (r, m);
16975 m = adjust_address (op1, mode, 16);
16976 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16977 emit_move_insn (op0, r);
16978 }
16979 /* Normal *mov<mode>_internal pattern will handle
16980 unaligned loads just fine if misaligned_operand
16981 is true, and without the UNSPEC it can be combined
16982 with arithmetic instructions. */
16983 else if (misaligned_operand (op1, GET_MODE (op1)))
16984 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16985 else
16986 emit_insn (load_unaligned (op0, op1));
16987 }
16988 else if (MEM_P (op0))
16989 {
16990 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16991 {
16992 m = adjust_address (op0, mode, 0);
16993 emit_insn (extract (m, op1, const0_rtx));
16994 m = adjust_address (op0, mode, 16);
16995 emit_insn (extract (m, op1, const1_rtx));
16996 }
16997 else
16998 emit_insn (store_unaligned (op0, op1));
16999 }
17000 else
17001 gcc_unreachable ();
17002 }
17003
17004 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17005 straight to ix86_expand_vector_move. */
17006 /* Code generation for scalar reg-reg moves of single and double precision data:
17007 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17008 movaps reg, reg
17009 else
17010 movss reg, reg
17011 if (x86_sse_partial_reg_dependency == true)
17012 movapd reg, reg
17013 else
17014 movsd reg, reg
17015
17016 Code generation for scalar loads of double precision data:
17017 if (x86_sse_split_regs == true)
17018 movlpd mem, reg (gas syntax)
17019 else
17020 movsd mem, reg
17021
17022 Code generation for unaligned packed loads of single precision data
17023 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17024 if (x86_sse_unaligned_move_optimal)
17025 movups mem, reg
17026
17027 if (x86_sse_partial_reg_dependency == true)
17028 {
17029 xorps reg, reg
17030 movlps mem, reg
17031 movhps mem+8, reg
17032 }
17033 else
17034 {
17035 movlps mem, reg
17036 movhps mem+8, reg
17037 }
17038
17039 Code generation for unaligned packed loads of double precision data
17040 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17041 if (x86_sse_unaligned_move_optimal)
17042 movupd mem, reg
17043
17044 if (x86_sse_split_regs == true)
17045 {
17046 movlpd mem, reg
17047 movhpd mem+8, reg
17048 }
17049 else
17050 {
17051 movsd mem, reg
17052 movhpd mem+8, reg
17053 }
17054 */
17055
17056 void
17057 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17058 {
17059 rtx op0, op1, orig_op0 = NULL_RTX, m;
17060 rtx (*load_unaligned) (rtx, rtx);
17061 rtx (*store_unaligned) (rtx, rtx);
17062
17063 op0 = operands[0];
17064 op1 = operands[1];
17065
17066 if (GET_MODE_SIZE (mode) == 64)
17067 {
17068 switch (GET_MODE_CLASS (mode))
17069 {
17070 case MODE_VECTOR_INT:
17071 case MODE_INT:
17072 if (GET_MODE (op0) != V16SImode)
17073 {
17074 if (!MEM_P (op0))
17075 {
17076 orig_op0 = op0;
17077 op0 = gen_reg_rtx (V16SImode);
17078 }
17079 else
17080 op0 = gen_lowpart (V16SImode, op0);
17081 }
17082 op1 = gen_lowpart (V16SImode, op1);
17083 /* FALLTHRU */
17084
17085 case MODE_VECTOR_FLOAT:
17086 switch (GET_MODE (op0))
17087 {
17088 default:
17089 gcc_unreachable ();
17090 case V16SImode:
17091 load_unaligned = gen_avx512f_loaddquv16si;
17092 store_unaligned = gen_avx512f_storedquv16si;
17093 break;
17094 case V16SFmode:
17095 load_unaligned = gen_avx512f_loadups512;
17096 store_unaligned = gen_avx512f_storeups512;
17097 break;
17098 case V8DFmode:
17099 load_unaligned = gen_avx512f_loadupd512;
17100 store_unaligned = gen_avx512f_storeupd512;
17101 break;
17102 }
17103
17104 if (MEM_P (op1))
17105 emit_insn (load_unaligned (op0, op1));
17106 else if (MEM_P (op0))
17107 emit_insn (store_unaligned (op0, op1));
17108 else
17109 gcc_unreachable ();
17110 if (orig_op0)
17111 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17112 break;
17113
17114 default:
17115 gcc_unreachable ();
17116 }
17117
17118 return;
17119 }
17120
17121 if (TARGET_AVX
17122 && GET_MODE_SIZE (mode) == 32)
17123 {
17124 switch (GET_MODE_CLASS (mode))
17125 {
17126 case MODE_VECTOR_INT:
17127 case MODE_INT:
17128 if (GET_MODE (op0) != V32QImode)
17129 {
17130 if (!MEM_P (op0))
17131 {
17132 orig_op0 = op0;
17133 op0 = gen_reg_rtx (V32QImode);
17134 }
17135 else
17136 op0 = gen_lowpart (V32QImode, op0);
17137 }
17138 op1 = gen_lowpart (V32QImode, op1);
17139 /* FALLTHRU */
17140
17141 case MODE_VECTOR_FLOAT:
17142 ix86_avx256_split_vector_move_misalign (op0, op1);
17143 if (orig_op0)
17144 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17145 break;
17146
17147 default:
17148 gcc_unreachable ();
17149 }
17150
17151 return;
17152 }
17153
17154 if (MEM_P (op1))
17155 {
17156 /* Normal *mov<mode>_internal pattern will handle
17157 unaligned loads just fine if misaligned_operand
17158 is true, and without the UNSPEC it can be combined
17159 with arithmetic instructions. */
17160 if (TARGET_AVX
17161 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17162 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17163 && misaligned_operand (op1, GET_MODE (op1)))
17164 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17165 /* ??? If we have typed data, then it would appear that using
17166 movdqu is the only way to get unaligned data loaded with
17167 integer type. */
17168 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17169 {
17170 if (GET_MODE (op0) != V16QImode)
17171 {
17172 orig_op0 = op0;
17173 op0 = gen_reg_rtx (V16QImode);
17174 }
17175 op1 = gen_lowpart (V16QImode, op1);
17176 /* We will eventually emit movups based on insn attributes. */
17177 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17178 if (orig_op0)
17179 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17180 }
17181 else if (TARGET_SSE2 && mode == V2DFmode)
17182 {
17183 rtx zero;
17184
17185 if (TARGET_AVX
17186 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17187 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17188 || optimize_insn_for_size_p ())
17189 {
17190 /* We will eventually emit movups based on insn attributes. */
17191 emit_insn (gen_sse2_loadupd (op0, op1));
17192 return;
17193 }
17194
17195 /* When SSE registers are split into halves, we can avoid
17196 writing to the top half twice. */
17197 if (TARGET_SSE_SPLIT_REGS)
17198 {
17199 emit_clobber (op0);
17200 zero = op0;
17201 }
17202 else
17203 {
17204 /* ??? Not sure about the best option for the Intel chips.
17205 The following would seem to satisfy; the register is
17206 entirely cleared, breaking the dependency chain. We
17207 then store to the upper half, with a dependency depth
17208 of one. A rumor has it that Intel recommends two movsd
17209 followed by an unpacklpd, but this is unconfirmed. And
17210 given that the dependency depth of the unpacklpd would
17211 still be one, I'm not sure why this would be better. */
17212 zero = CONST0_RTX (V2DFmode);
17213 }
17214
17215 m = adjust_address (op1, DFmode, 0);
17216 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17217 m = adjust_address (op1, DFmode, 8);
17218 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17219 }
17220 else
17221 {
17222 rtx t;
17223
17224 if (TARGET_AVX
17225 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17226 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17227 || optimize_insn_for_size_p ())
17228 {
17229 if (GET_MODE (op0) != V4SFmode)
17230 {
17231 orig_op0 = op0;
17232 op0 = gen_reg_rtx (V4SFmode);
17233 }
17234 op1 = gen_lowpart (V4SFmode, op1);
17235 emit_insn (gen_sse_loadups (op0, op1));
17236 if (orig_op0)
17237 emit_move_insn (orig_op0,
17238 gen_lowpart (GET_MODE (orig_op0), op0));
17239 return;
17240 }
17241
17242 if (mode != V4SFmode)
17243 t = gen_reg_rtx (V4SFmode);
17244 else
17245 t = op0;
17246
17247 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17248 emit_move_insn (t, CONST0_RTX (V4SFmode));
17249 else
17250 emit_clobber (t);
17251
17252 m = adjust_address (op1, V2SFmode, 0);
17253 emit_insn (gen_sse_loadlps (t, t, m));
17254 m = adjust_address (op1, V2SFmode, 8);
17255 emit_insn (gen_sse_loadhps (t, t, m));
17256 if (mode != V4SFmode)
17257 emit_move_insn (op0, gen_lowpart (mode, t));
17258 }
17259 }
17260 else if (MEM_P (op0))
17261 {
17262 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17263 {
17264 op0 = gen_lowpart (V16QImode, op0);
17265 op1 = gen_lowpart (V16QImode, op1);
17266 /* We will eventually emit movups based on insn attributes. */
17267 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17268 }
17269 else if (TARGET_SSE2 && mode == V2DFmode)
17270 {
17271 if (TARGET_AVX
17272 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17273 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17274 || optimize_insn_for_size_p ())
17275 /* We will eventually emit movups based on insn attributes. */
17276 emit_insn (gen_sse2_storeupd (op0, op1));
17277 else
17278 {
17279 m = adjust_address (op0, DFmode, 0);
17280 emit_insn (gen_sse2_storelpd (m, op1));
17281 m = adjust_address (op0, DFmode, 8);
17282 emit_insn (gen_sse2_storehpd (m, op1));
17283 }
17284 }
17285 else
17286 {
17287 if (mode != V4SFmode)
17288 op1 = gen_lowpart (V4SFmode, op1);
17289
17290 if (TARGET_AVX
17291 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17292 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17293 || optimize_insn_for_size_p ())
17294 {
17295 op0 = gen_lowpart (V4SFmode, op0);
17296 emit_insn (gen_sse_storeups (op0, op1));
17297 }
17298 else
17299 {
17300 m = adjust_address (op0, V2SFmode, 0);
17301 emit_insn (gen_sse_storelps (m, op1));
17302 m = adjust_address (op0, V2SFmode, 8);
17303 emit_insn (gen_sse_storehps (m, op1));
17304 }
17305 }
17306 }
17307 else
17308 gcc_unreachable ();
17309 }
17310
17311 /* Helper function of ix86_fixup_binary_operands to canonicalize
17312 operand order. Returns true if the operands should be swapped. */
17313
17314 static bool
17315 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17316 rtx operands[])
17317 {
17318 rtx dst = operands[0];
17319 rtx src1 = operands[1];
17320 rtx src2 = operands[2];
17321
17322 /* If the operation is not commutative, we can't do anything. */
17323 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17324 return false;
17325
17326 /* Highest priority is that src1 should match dst. */
17327 if (rtx_equal_p (dst, src1))
17328 return false;
17329 if (rtx_equal_p (dst, src2))
17330 return true;
17331
17332 /* Next highest priority is that immediate constants come second. */
17333 if (immediate_operand (src2, mode))
17334 return false;
17335 if (immediate_operand (src1, mode))
17336 return true;
17337
17338 /* Lowest priority is that memory references should come second. */
17339 if (MEM_P (src2))
17340 return false;
17341 if (MEM_P (src1))
17342 return true;
17343
17344 return false;
17345 }
17346
17347
17348 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17349 destination to use for the operation. If different from the true
17350 destination in operands[0], a copy operation will be required. */
17351
17352 rtx
17353 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17354 rtx operands[])
17355 {
17356 rtx dst = operands[0];
17357 rtx src1 = operands[1];
17358 rtx src2 = operands[2];
17359
17360 /* Canonicalize operand order. */
17361 if (ix86_swap_binary_operands_p (code, mode, operands))
17362 {
17363 rtx temp;
17364
17365 /* It is invalid to swap operands of different modes. */
17366 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17367
17368 temp = src1;
17369 src1 = src2;
17370 src2 = temp;
17371 }
17372
17373 /* Both source operands cannot be in memory. */
17374 if (MEM_P (src1) && MEM_P (src2))
17375 {
17376 /* Optimization: Only read from memory once. */
17377 if (rtx_equal_p (src1, src2))
17378 {
17379 src2 = force_reg (mode, src2);
17380 src1 = src2;
17381 }
17382 else if (rtx_equal_p (dst, src1))
17383 src2 = force_reg (mode, src2);
17384 else
17385 src1 = force_reg (mode, src1);
17386 }
17387
17388 /* If the destination is memory, and we do not have matching source
17389 operands, do things in registers. */
17390 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17391 dst = gen_reg_rtx (mode);
17392
17393 /* Source 1 cannot be a constant. */
17394 if (CONSTANT_P (src1))
17395 src1 = force_reg (mode, src1);
17396
17397 /* Source 1 cannot be a non-matching memory. */
17398 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17399 src1 = force_reg (mode, src1);
17400
17401 /* Improve address combine. */
17402 if (code == PLUS
17403 && GET_MODE_CLASS (mode) == MODE_INT
17404 && MEM_P (src2))
17405 src2 = force_reg (mode, src2);
17406
17407 operands[1] = src1;
17408 operands[2] = src2;
17409 return dst;
17410 }
17411
17412 /* Similarly, but assume that the destination has already been
17413 set up properly. */
17414
17415 void
17416 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17417 enum machine_mode mode, rtx operands[])
17418 {
17419 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17420 gcc_assert (dst == operands[0]);
17421 }
17422
17423 /* Attempt to expand a binary operator. Make the expansion closer to the
17424 actual machine, then just general_operand, which will allow 3 separate
17425 memory references (one output, two input) in a single insn. */
17426
17427 void
17428 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17429 rtx operands[])
17430 {
17431 rtx src1, src2, dst, op, clob;
17432
17433 dst = ix86_fixup_binary_operands (code, mode, operands);
17434 src1 = operands[1];
17435 src2 = operands[2];
17436
17437 /* Emit the instruction. */
17438
17439 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17440 if (reload_in_progress)
17441 {
17442 /* Reload doesn't know about the flags register, and doesn't know that
17443 it doesn't want to clobber it. We can only do this with PLUS. */
17444 gcc_assert (code == PLUS);
17445 emit_insn (op);
17446 }
17447 else if (reload_completed
17448 && code == PLUS
17449 && !rtx_equal_p (dst, src1))
17450 {
17451 /* This is going to be an LEA; avoid splitting it later. */
17452 emit_insn (op);
17453 }
17454 else
17455 {
17456 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17457 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17458 }
17459
17460 /* Fix up the destination if needed. */
17461 if (dst != operands[0])
17462 emit_move_insn (operands[0], dst);
17463 }
17464
17465 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17466 the given OPERANDS. */
17467
17468 void
17469 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17470 rtx operands[])
17471 {
17472 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17473 if (GET_CODE (operands[1]) == SUBREG)
17474 {
17475 op1 = operands[1];
17476 op2 = operands[2];
17477 }
17478 else if (GET_CODE (operands[2]) == SUBREG)
17479 {
17480 op1 = operands[2];
17481 op2 = operands[1];
17482 }
17483 /* Optimize (__m128i) d | (__m128i) e and similar code
17484 when d and e are float vectors into float vector logical
17485 insn. In C/C++ without using intrinsics there is no other way
17486 to express vector logical operation on float vectors than
17487 to cast them temporarily to integer vectors. */
17488 if (op1
17489 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17490 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17491 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17492 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17493 && SUBREG_BYTE (op1) == 0
17494 && (GET_CODE (op2) == CONST_VECTOR
17495 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17496 && SUBREG_BYTE (op2) == 0))
17497 && can_create_pseudo_p ())
17498 {
17499 rtx dst;
17500 switch (GET_MODE (SUBREG_REG (op1)))
17501 {
17502 case V4SFmode:
17503 case V8SFmode:
17504 case V2DFmode:
17505 case V4DFmode:
17506 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17507 if (GET_CODE (op2) == CONST_VECTOR)
17508 {
17509 op2 = gen_lowpart (GET_MODE (dst), op2);
17510 op2 = force_reg (GET_MODE (dst), op2);
17511 }
17512 else
17513 {
17514 op1 = operands[1];
17515 op2 = SUBREG_REG (operands[2]);
17516 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17517 op2 = force_reg (GET_MODE (dst), op2);
17518 }
17519 op1 = SUBREG_REG (op1);
17520 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17521 op1 = force_reg (GET_MODE (dst), op1);
17522 emit_insn (gen_rtx_SET (VOIDmode, dst,
17523 gen_rtx_fmt_ee (code, GET_MODE (dst),
17524 op1, op2)));
17525 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17526 return;
17527 default:
17528 break;
17529 }
17530 }
17531 if (!nonimmediate_operand (operands[1], mode))
17532 operands[1] = force_reg (mode, operands[1]);
17533 if (!nonimmediate_operand (operands[2], mode))
17534 operands[2] = force_reg (mode, operands[2]);
17535 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17536 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17537 gen_rtx_fmt_ee (code, mode, operands[1],
17538 operands[2])));
17539 }
17540
17541 /* Return TRUE or FALSE depending on whether the binary operator meets the
17542 appropriate constraints. */
17543
17544 bool
17545 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17546 rtx operands[3])
17547 {
17548 rtx dst = operands[0];
17549 rtx src1 = operands[1];
17550 rtx src2 = operands[2];
17551
17552 /* Both source operands cannot be in memory. */
17553 if (MEM_P (src1) && MEM_P (src2))
17554 return false;
17555
17556 /* Canonicalize operand order for commutative operators. */
17557 if (ix86_swap_binary_operands_p (code, mode, operands))
17558 {
17559 rtx temp = src1;
17560 src1 = src2;
17561 src2 = temp;
17562 }
17563
17564 /* If the destination is memory, we must have a matching source operand. */
17565 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17566 return false;
17567
17568 /* Source 1 cannot be a constant. */
17569 if (CONSTANT_P (src1))
17570 return false;
17571
17572 /* Source 1 cannot be a non-matching memory. */
17573 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17574 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17575 return (code == AND
17576 && (mode == HImode
17577 || mode == SImode
17578 || (TARGET_64BIT && mode == DImode))
17579 && satisfies_constraint_L (src2));
17580
17581 return true;
17582 }
17583
17584 /* Attempt to expand a unary operator. Make the expansion closer to the
17585 actual machine, then just general_operand, which will allow 2 separate
17586 memory references (one output, one input) in a single insn. */
17587
17588 void
17589 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17590 rtx operands[])
17591 {
17592 int matching_memory;
17593 rtx src, dst, op, clob;
17594
17595 dst = operands[0];
17596 src = operands[1];
17597
17598 /* If the destination is memory, and we do not have matching source
17599 operands, do things in registers. */
17600 matching_memory = 0;
17601 if (MEM_P (dst))
17602 {
17603 if (rtx_equal_p (dst, src))
17604 matching_memory = 1;
17605 else
17606 dst = gen_reg_rtx (mode);
17607 }
17608
17609 /* When source operand is memory, destination must match. */
17610 if (MEM_P (src) && !matching_memory)
17611 src = force_reg (mode, src);
17612
17613 /* Emit the instruction. */
17614
17615 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17616 if (reload_in_progress || code == NOT)
17617 {
17618 /* Reload doesn't know about the flags register, and doesn't know that
17619 it doesn't want to clobber it. */
17620 gcc_assert (code == NOT);
17621 emit_insn (op);
17622 }
17623 else
17624 {
17625 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17626 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17627 }
17628
17629 /* Fix up the destination if needed. */
17630 if (dst != operands[0])
17631 emit_move_insn (operands[0], dst);
17632 }
17633
17634 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17635 divisor are within the range [0-255]. */
17636
17637 void
17638 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17639 bool signed_p)
17640 {
17641 rtx_code_label *end_label, *qimode_label;
17642 rtx insn, div, mod;
17643 rtx scratch, tmp0, tmp1, tmp2;
17644 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17645 rtx (*gen_zero_extend) (rtx, rtx);
17646 rtx (*gen_test_ccno_1) (rtx, rtx);
17647
17648 switch (mode)
17649 {
17650 case SImode:
17651 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17652 gen_test_ccno_1 = gen_testsi_ccno_1;
17653 gen_zero_extend = gen_zero_extendqisi2;
17654 break;
17655 case DImode:
17656 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17657 gen_test_ccno_1 = gen_testdi_ccno_1;
17658 gen_zero_extend = gen_zero_extendqidi2;
17659 break;
17660 default:
17661 gcc_unreachable ();
17662 }
17663
17664 end_label = gen_label_rtx ();
17665 qimode_label = gen_label_rtx ();
17666
17667 scratch = gen_reg_rtx (mode);
17668
17669 /* Use 8bit unsigned divimod if dividend and divisor are within
17670 the range [0-255]. */
17671 emit_move_insn (scratch, operands[2]);
17672 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17673 scratch, 1, OPTAB_DIRECT);
17674 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17675 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17676 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17677 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17678 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17679 pc_rtx);
17680 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17681 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17682 JUMP_LABEL (insn) = qimode_label;
17683
17684 /* Generate original signed/unsigned divimod. */
17685 div = gen_divmod4_1 (operands[0], operands[1],
17686 operands[2], operands[3]);
17687 emit_insn (div);
17688
17689 /* Branch to the end. */
17690 emit_jump_insn (gen_jump (end_label));
17691 emit_barrier ();
17692
17693 /* Generate 8bit unsigned divide. */
17694 emit_label (qimode_label);
17695 /* Don't use operands[0] for result of 8bit divide since not all
17696 registers support QImode ZERO_EXTRACT. */
17697 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17698 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17699 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17700 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17701
17702 if (signed_p)
17703 {
17704 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17705 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17706 }
17707 else
17708 {
17709 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17710 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17711 }
17712
17713 /* Extract remainder from AH. */
17714 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17715 if (REG_P (operands[1]))
17716 insn = emit_move_insn (operands[1], tmp1);
17717 else
17718 {
17719 /* Need a new scratch register since the old one has result
17720 of 8bit divide. */
17721 scratch = gen_reg_rtx (mode);
17722 emit_move_insn (scratch, tmp1);
17723 insn = emit_move_insn (operands[1], scratch);
17724 }
17725 set_unique_reg_note (insn, REG_EQUAL, mod);
17726
17727 /* Zero extend quotient from AL. */
17728 tmp1 = gen_lowpart (QImode, tmp0);
17729 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17730 set_unique_reg_note (insn, REG_EQUAL, div);
17731
17732 emit_label (end_label);
17733 }
17734
17735 /* Whether it is OK to emit CFI directives when emitting asm code. */
17736
17737 bool
17738 ix86_emit_cfi ()
17739 {
17740 return dwarf2out_do_cfi_asm ();
17741 }
17742
17743 #define LEA_MAX_STALL (3)
17744 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17745
17746 /* Increase given DISTANCE in half-cycles according to
17747 dependencies between PREV and NEXT instructions.
17748 Add 1 half-cycle if there is no dependency and
17749 go to next cycle if there is some dependecy. */
17750
17751 static unsigned int
17752 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17753 {
17754 df_ref def, use;
17755
17756 if (!prev || !next)
17757 return distance + (distance & 1) + 2;
17758
17759 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17760 return distance + 1;
17761
17762 FOR_EACH_INSN_USE (use, next)
17763 FOR_EACH_INSN_DEF (def, prev)
17764 if (!DF_REF_IS_ARTIFICIAL (def)
17765 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17766 return distance + (distance & 1) + 2;
17767
17768 return distance + 1;
17769 }
17770
17771 /* Function checks if instruction INSN defines register number
17772 REGNO1 or REGNO2. */
17773
17774 static bool
17775 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17776 rtx insn)
17777 {
17778 df_ref def;
17779
17780 FOR_EACH_INSN_DEF (def, insn)
17781 if (DF_REF_REG_DEF_P (def)
17782 && !DF_REF_IS_ARTIFICIAL (def)
17783 && (regno1 == DF_REF_REGNO (def)
17784 || regno2 == DF_REF_REGNO (def)))
17785 return true;
17786
17787 return false;
17788 }
17789
17790 /* Function checks if instruction INSN uses register number
17791 REGNO as a part of address expression. */
17792
17793 static bool
17794 insn_uses_reg_mem (unsigned int regno, rtx insn)
17795 {
17796 df_ref use;
17797
17798 FOR_EACH_INSN_USE (use, insn)
17799 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17800 return true;
17801
17802 return false;
17803 }
17804
17805 /* Search backward for non-agu definition of register number REGNO1
17806 or register number REGNO2 in basic block starting from instruction
17807 START up to head of basic block or instruction INSN.
17808
17809 Function puts true value into *FOUND var if definition was found
17810 and false otherwise.
17811
17812 Distance in half-cycles between START and found instruction or head
17813 of BB is added to DISTANCE and returned. */
17814
17815 static int
17816 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17817 rtx_insn *insn, int distance,
17818 rtx_insn *start, bool *found)
17819 {
17820 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17821 rtx_insn *prev = start;
17822 rtx_insn *next = NULL;
17823
17824 *found = false;
17825
17826 while (prev
17827 && prev != insn
17828 && distance < LEA_SEARCH_THRESHOLD)
17829 {
17830 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17831 {
17832 distance = increase_distance (prev, next, distance);
17833 if (insn_defines_reg (regno1, regno2, prev))
17834 {
17835 if (recog_memoized (prev) < 0
17836 || get_attr_type (prev) != TYPE_LEA)
17837 {
17838 *found = true;
17839 return distance;
17840 }
17841 }
17842
17843 next = prev;
17844 }
17845 if (prev == BB_HEAD (bb))
17846 break;
17847
17848 prev = PREV_INSN (prev);
17849 }
17850
17851 return distance;
17852 }
17853
17854 /* Search backward for non-agu definition of register number REGNO1
17855 or register number REGNO2 in INSN's basic block until
17856 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17857 2. Reach neighbour BBs boundary, or
17858 3. Reach agu definition.
17859 Returns the distance between the non-agu definition point and INSN.
17860 If no definition point, returns -1. */
17861
17862 static int
17863 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17864 rtx_insn *insn)
17865 {
17866 basic_block bb = BLOCK_FOR_INSN (insn);
17867 int distance = 0;
17868 bool found = false;
17869
17870 if (insn != BB_HEAD (bb))
17871 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17872 distance, PREV_INSN (insn),
17873 &found);
17874
17875 if (!found && distance < LEA_SEARCH_THRESHOLD)
17876 {
17877 edge e;
17878 edge_iterator ei;
17879 bool simple_loop = false;
17880
17881 FOR_EACH_EDGE (e, ei, bb->preds)
17882 if (e->src == bb)
17883 {
17884 simple_loop = true;
17885 break;
17886 }
17887
17888 if (simple_loop)
17889 distance = distance_non_agu_define_in_bb (regno1, regno2,
17890 insn, distance,
17891 BB_END (bb), &found);
17892 else
17893 {
17894 int shortest_dist = -1;
17895 bool found_in_bb = false;
17896
17897 FOR_EACH_EDGE (e, ei, bb->preds)
17898 {
17899 int bb_dist
17900 = distance_non_agu_define_in_bb (regno1, regno2,
17901 insn, distance,
17902 BB_END (e->src),
17903 &found_in_bb);
17904 if (found_in_bb)
17905 {
17906 if (shortest_dist < 0)
17907 shortest_dist = bb_dist;
17908 else if (bb_dist > 0)
17909 shortest_dist = MIN (bb_dist, shortest_dist);
17910
17911 found = true;
17912 }
17913 }
17914
17915 distance = shortest_dist;
17916 }
17917 }
17918
17919 /* get_attr_type may modify recog data. We want to make sure
17920 that recog data is valid for instruction INSN, on which
17921 distance_non_agu_define is called. INSN is unchanged here. */
17922 extract_insn_cached (insn);
17923
17924 if (!found)
17925 return -1;
17926
17927 return distance >> 1;
17928 }
17929
17930 /* Return the distance in half-cycles between INSN and the next
17931 insn that uses register number REGNO in memory address added
17932 to DISTANCE. Return -1 if REGNO0 is set.
17933
17934 Put true value into *FOUND if register usage was found and
17935 false otherwise.
17936 Put true value into *REDEFINED if register redefinition was
17937 found and false otherwise. */
17938
17939 static int
17940 distance_agu_use_in_bb (unsigned int regno,
17941 rtx_insn *insn, int distance, rtx_insn *start,
17942 bool *found, bool *redefined)
17943 {
17944 basic_block bb = NULL;
17945 rtx_insn *next = start;
17946 rtx_insn *prev = NULL;
17947
17948 *found = false;
17949 *redefined = false;
17950
17951 if (start != NULL_RTX)
17952 {
17953 bb = BLOCK_FOR_INSN (start);
17954 if (start != BB_HEAD (bb))
17955 /* If insn and start belong to the same bb, set prev to insn,
17956 so the call to increase_distance will increase the distance
17957 between insns by 1. */
17958 prev = insn;
17959 }
17960
17961 while (next
17962 && next != insn
17963 && distance < LEA_SEARCH_THRESHOLD)
17964 {
17965 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17966 {
17967 distance = increase_distance(prev, next, distance);
17968 if (insn_uses_reg_mem (regno, next))
17969 {
17970 /* Return DISTANCE if OP0 is used in memory
17971 address in NEXT. */
17972 *found = true;
17973 return distance;
17974 }
17975
17976 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17977 {
17978 /* Return -1 if OP0 is set in NEXT. */
17979 *redefined = true;
17980 return -1;
17981 }
17982
17983 prev = next;
17984 }
17985
17986 if (next == BB_END (bb))
17987 break;
17988
17989 next = NEXT_INSN (next);
17990 }
17991
17992 return distance;
17993 }
17994
17995 /* Return the distance between INSN and the next insn that uses
17996 register number REGNO0 in memory address. Return -1 if no such
17997 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17998
17999 static int
18000 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18001 {
18002 basic_block bb = BLOCK_FOR_INSN (insn);
18003 int distance = 0;
18004 bool found = false;
18005 bool redefined = false;
18006
18007 if (insn != BB_END (bb))
18008 distance = distance_agu_use_in_bb (regno0, insn, distance,
18009 NEXT_INSN (insn),
18010 &found, &redefined);
18011
18012 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18013 {
18014 edge e;
18015 edge_iterator ei;
18016 bool simple_loop = false;
18017
18018 FOR_EACH_EDGE (e, ei, bb->succs)
18019 if (e->dest == bb)
18020 {
18021 simple_loop = true;
18022 break;
18023 }
18024
18025 if (simple_loop)
18026 distance = distance_agu_use_in_bb (regno0, insn,
18027 distance, BB_HEAD (bb),
18028 &found, &redefined);
18029 else
18030 {
18031 int shortest_dist = -1;
18032 bool found_in_bb = false;
18033 bool redefined_in_bb = false;
18034
18035 FOR_EACH_EDGE (e, ei, bb->succs)
18036 {
18037 int bb_dist
18038 = distance_agu_use_in_bb (regno0, insn,
18039 distance, BB_HEAD (e->dest),
18040 &found_in_bb, &redefined_in_bb);
18041 if (found_in_bb)
18042 {
18043 if (shortest_dist < 0)
18044 shortest_dist = bb_dist;
18045 else if (bb_dist > 0)
18046 shortest_dist = MIN (bb_dist, shortest_dist);
18047
18048 found = true;
18049 }
18050 }
18051
18052 distance = shortest_dist;
18053 }
18054 }
18055
18056 if (!found || redefined)
18057 return -1;
18058
18059 return distance >> 1;
18060 }
18061
18062 /* Define this macro to tune LEA priority vs ADD, it take effect when
18063 there is a dilemma of choicing LEA or ADD
18064 Negative value: ADD is more preferred than LEA
18065 Zero: Netrual
18066 Positive value: LEA is more preferred than ADD*/
18067 #define IX86_LEA_PRIORITY 0
18068
18069 /* Return true if usage of lea INSN has performance advantage
18070 over a sequence of instructions. Instructions sequence has
18071 SPLIT_COST cycles higher latency than lea latency. */
18072
18073 static bool
18074 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18075 unsigned int regno2, int split_cost, bool has_scale)
18076 {
18077 int dist_define, dist_use;
18078
18079 /* For Silvermont if using a 2-source or 3-source LEA for
18080 non-destructive destination purposes, or due to wanting
18081 ability to use SCALE, the use of LEA is justified. */
18082 if (TARGET_SILVERMONT || TARGET_INTEL)
18083 {
18084 if (has_scale)
18085 return true;
18086 if (split_cost < 1)
18087 return false;
18088 if (regno0 == regno1 || regno0 == regno2)
18089 return false;
18090 return true;
18091 }
18092
18093 dist_define = distance_non_agu_define (regno1, regno2, insn);
18094 dist_use = distance_agu_use (regno0, insn);
18095
18096 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18097 {
18098 /* If there is no non AGU operand definition, no AGU
18099 operand usage and split cost is 0 then both lea
18100 and non lea variants have same priority. Currently
18101 we prefer lea for 64 bit code and non lea on 32 bit
18102 code. */
18103 if (dist_use < 0 && split_cost == 0)
18104 return TARGET_64BIT || IX86_LEA_PRIORITY;
18105 else
18106 return true;
18107 }
18108
18109 /* With longer definitions distance lea is more preferable.
18110 Here we change it to take into account splitting cost and
18111 lea priority. */
18112 dist_define += split_cost + IX86_LEA_PRIORITY;
18113
18114 /* If there is no use in memory addess then we just check
18115 that split cost exceeds AGU stall. */
18116 if (dist_use < 0)
18117 return dist_define > LEA_MAX_STALL;
18118
18119 /* If this insn has both backward non-agu dependence and forward
18120 agu dependence, the one with short distance takes effect. */
18121 return dist_define >= dist_use;
18122 }
18123
18124 /* Return true if it is legal to clobber flags by INSN and
18125 false otherwise. */
18126
18127 static bool
18128 ix86_ok_to_clobber_flags (rtx_insn *insn)
18129 {
18130 basic_block bb = BLOCK_FOR_INSN (insn);
18131 df_ref use;
18132 bitmap live;
18133
18134 while (insn)
18135 {
18136 if (NONDEBUG_INSN_P (insn))
18137 {
18138 FOR_EACH_INSN_USE (use, insn)
18139 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18140 return false;
18141
18142 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18143 return true;
18144 }
18145
18146 if (insn == BB_END (bb))
18147 break;
18148
18149 insn = NEXT_INSN (insn);
18150 }
18151
18152 live = df_get_live_out(bb);
18153 return !REGNO_REG_SET_P (live, FLAGS_REG);
18154 }
18155
18156 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18157 move and add to avoid AGU stalls. */
18158
18159 bool
18160 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18161 {
18162 unsigned int regno0, regno1, regno2;
18163
18164 /* Check if we need to optimize. */
18165 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18166 return false;
18167
18168 /* Check it is correct to split here. */
18169 if (!ix86_ok_to_clobber_flags(insn))
18170 return false;
18171
18172 regno0 = true_regnum (operands[0]);
18173 regno1 = true_regnum (operands[1]);
18174 regno2 = true_regnum (operands[2]);
18175
18176 /* We need to split only adds with non destructive
18177 destination operand. */
18178 if (regno0 == regno1 || regno0 == regno2)
18179 return false;
18180 else
18181 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18182 }
18183
18184 /* Return true if we should emit lea instruction instead of mov
18185 instruction. */
18186
18187 bool
18188 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18189 {
18190 unsigned int regno0, regno1;
18191
18192 /* Check if we need to optimize. */
18193 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18194 return false;
18195
18196 /* Use lea for reg to reg moves only. */
18197 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18198 return false;
18199
18200 regno0 = true_regnum (operands[0]);
18201 regno1 = true_regnum (operands[1]);
18202
18203 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18204 }
18205
18206 /* Return true if we need to split lea into a sequence of
18207 instructions to avoid AGU stalls. */
18208
18209 bool
18210 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18211 {
18212 unsigned int regno0, regno1, regno2;
18213 int split_cost;
18214 struct ix86_address parts;
18215 int ok;
18216
18217 /* Check we need to optimize. */
18218 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18219 return false;
18220
18221 /* The "at least two components" test below might not catch simple
18222 move or zero extension insns if parts.base is non-NULL and parts.disp
18223 is const0_rtx as the only components in the address, e.g. if the
18224 register is %rbp or %r13. As this test is much cheaper and moves or
18225 zero extensions are the common case, do this check first. */
18226 if (REG_P (operands[1])
18227 || (SImode_address_operand (operands[1], VOIDmode)
18228 && REG_P (XEXP (operands[1], 0))))
18229 return false;
18230
18231 /* Check if it is OK to split here. */
18232 if (!ix86_ok_to_clobber_flags (insn))
18233 return false;
18234
18235 ok = ix86_decompose_address (operands[1], &parts);
18236 gcc_assert (ok);
18237
18238 /* There should be at least two components in the address. */
18239 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18240 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18241 return false;
18242
18243 /* We should not split into add if non legitimate pic
18244 operand is used as displacement. */
18245 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18246 return false;
18247
18248 regno0 = true_regnum (operands[0]) ;
18249 regno1 = INVALID_REGNUM;
18250 regno2 = INVALID_REGNUM;
18251
18252 if (parts.base)
18253 regno1 = true_regnum (parts.base);
18254 if (parts.index)
18255 regno2 = true_regnum (parts.index);
18256
18257 split_cost = 0;
18258
18259 /* Compute how many cycles we will add to execution time
18260 if split lea into a sequence of instructions. */
18261 if (parts.base || parts.index)
18262 {
18263 /* Have to use mov instruction if non desctructive
18264 destination form is used. */
18265 if (regno1 != regno0 && regno2 != regno0)
18266 split_cost += 1;
18267
18268 /* Have to add index to base if both exist. */
18269 if (parts.base && parts.index)
18270 split_cost += 1;
18271
18272 /* Have to use shift and adds if scale is 2 or greater. */
18273 if (parts.scale > 1)
18274 {
18275 if (regno0 != regno1)
18276 split_cost += 1;
18277 else if (regno2 == regno0)
18278 split_cost += 4;
18279 else
18280 split_cost += parts.scale;
18281 }
18282
18283 /* Have to use add instruction with immediate if
18284 disp is non zero. */
18285 if (parts.disp && parts.disp != const0_rtx)
18286 split_cost += 1;
18287
18288 /* Subtract the price of lea. */
18289 split_cost -= 1;
18290 }
18291
18292 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18293 parts.scale > 1);
18294 }
18295
18296 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18297 matches destination. RTX includes clobber of FLAGS_REG. */
18298
18299 static void
18300 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18301 rtx dst, rtx src)
18302 {
18303 rtx op, clob;
18304
18305 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18306 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18307
18308 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18309 }
18310
18311 /* Return true if regno1 def is nearest to the insn. */
18312
18313 static bool
18314 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18315 {
18316 rtx_insn *prev = insn;
18317 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18318
18319 if (insn == start)
18320 return false;
18321 while (prev && prev != start)
18322 {
18323 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18324 {
18325 prev = PREV_INSN (prev);
18326 continue;
18327 }
18328 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18329 return true;
18330 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18331 return false;
18332 prev = PREV_INSN (prev);
18333 }
18334
18335 /* None of the regs is defined in the bb. */
18336 return false;
18337 }
18338
18339 /* Split lea instructions into a sequence of instructions
18340 which are executed on ALU to avoid AGU stalls.
18341 It is assumed that it is allowed to clobber flags register
18342 at lea position. */
18343
18344 void
18345 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18346 {
18347 unsigned int regno0, regno1, regno2;
18348 struct ix86_address parts;
18349 rtx target, tmp;
18350 int ok, adds;
18351
18352 ok = ix86_decompose_address (operands[1], &parts);
18353 gcc_assert (ok);
18354
18355 target = gen_lowpart (mode, operands[0]);
18356
18357 regno0 = true_regnum (target);
18358 regno1 = INVALID_REGNUM;
18359 regno2 = INVALID_REGNUM;
18360
18361 if (parts.base)
18362 {
18363 parts.base = gen_lowpart (mode, parts.base);
18364 regno1 = true_regnum (parts.base);
18365 }
18366
18367 if (parts.index)
18368 {
18369 parts.index = gen_lowpart (mode, parts.index);
18370 regno2 = true_regnum (parts.index);
18371 }
18372
18373 if (parts.disp)
18374 parts.disp = gen_lowpart (mode, parts.disp);
18375
18376 if (parts.scale > 1)
18377 {
18378 /* Case r1 = r1 + ... */
18379 if (regno1 == regno0)
18380 {
18381 /* If we have a case r1 = r1 + C * r2 then we
18382 should use multiplication which is very
18383 expensive. Assume cost model is wrong if we
18384 have such case here. */
18385 gcc_assert (regno2 != regno0);
18386
18387 for (adds = parts.scale; adds > 0; adds--)
18388 ix86_emit_binop (PLUS, mode, target, parts.index);
18389 }
18390 else
18391 {
18392 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18393 if (regno0 != regno2)
18394 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18395
18396 /* Use shift for scaling. */
18397 ix86_emit_binop (ASHIFT, mode, target,
18398 GEN_INT (exact_log2 (parts.scale)));
18399
18400 if (parts.base)
18401 ix86_emit_binop (PLUS, mode, target, parts.base);
18402
18403 if (parts.disp && parts.disp != const0_rtx)
18404 ix86_emit_binop (PLUS, mode, target, parts.disp);
18405 }
18406 }
18407 else if (!parts.base && !parts.index)
18408 {
18409 gcc_assert(parts.disp);
18410 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18411 }
18412 else
18413 {
18414 if (!parts.base)
18415 {
18416 if (regno0 != regno2)
18417 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18418 }
18419 else if (!parts.index)
18420 {
18421 if (regno0 != regno1)
18422 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18423 }
18424 else
18425 {
18426 if (regno0 == regno1)
18427 tmp = parts.index;
18428 else if (regno0 == regno2)
18429 tmp = parts.base;
18430 else
18431 {
18432 rtx tmp1;
18433
18434 /* Find better operand for SET instruction, depending
18435 on which definition is farther from the insn. */
18436 if (find_nearest_reg_def (insn, regno1, regno2))
18437 tmp = parts.index, tmp1 = parts.base;
18438 else
18439 tmp = parts.base, tmp1 = parts.index;
18440
18441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18442
18443 if (parts.disp && parts.disp != const0_rtx)
18444 ix86_emit_binop (PLUS, mode, target, parts.disp);
18445
18446 ix86_emit_binop (PLUS, mode, target, tmp1);
18447 return;
18448 }
18449
18450 ix86_emit_binop (PLUS, mode, target, tmp);
18451 }
18452
18453 if (parts.disp && parts.disp != const0_rtx)
18454 ix86_emit_binop (PLUS, mode, target, parts.disp);
18455 }
18456 }
18457
18458 /* Return true if it is ok to optimize an ADD operation to LEA
18459 operation to avoid flag register consumation. For most processors,
18460 ADD is faster than LEA. For the processors like BONNELL, if the
18461 destination register of LEA holds an actual address which will be
18462 used soon, LEA is better and otherwise ADD is better. */
18463
18464 bool
18465 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18466 {
18467 unsigned int regno0 = true_regnum (operands[0]);
18468 unsigned int regno1 = true_regnum (operands[1]);
18469 unsigned int regno2 = true_regnum (operands[2]);
18470
18471 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18472 if (regno0 != regno1 && regno0 != regno2)
18473 return true;
18474
18475 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18476 return false;
18477
18478 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18479 }
18480
18481 /* Return true if destination reg of SET_BODY is shift count of
18482 USE_BODY. */
18483
18484 static bool
18485 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18486 {
18487 rtx set_dest;
18488 rtx shift_rtx;
18489 int i;
18490
18491 /* Retrieve destination of SET_BODY. */
18492 switch (GET_CODE (set_body))
18493 {
18494 case SET:
18495 set_dest = SET_DEST (set_body);
18496 if (!set_dest || !REG_P (set_dest))
18497 return false;
18498 break;
18499 case PARALLEL:
18500 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18501 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18502 use_body))
18503 return true;
18504 default:
18505 return false;
18506 break;
18507 }
18508
18509 /* Retrieve shift count of USE_BODY. */
18510 switch (GET_CODE (use_body))
18511 {
18512 case SET:
18513 shift_rtx = XEXP (use_body, 1);
18514 break;
18515 case PARALLEL:
18516 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18517 if (ix86_dep_by_shift_count_body (set_body,
18518 XVECEXP (use_body, 0, i)))
18519 return true;
18520 default:
18521 return false;
18522 break;
18523 }
18524
18525 if (shift_rtx
18526 && (GET_CODE (shift_rtx) == ASHIFT
18527 || GET_CODE (shift_rtx) == LSHIFTRT
18528 || GET_CODE (shift_rtx) == ASHIFTRT
18529 || GET_CODE (shift_rtx) == ROTATE
18530 || GET_CODE (shift_rtx) == ROTATERT))
18531 {
18532 rtx shift_count = XEXP (shift_rtx, 1);
18533
18534 /* Return true if shift count is dest of SET_BODY. */
18535 if (REG_P (shift_count))
18536 {
18537 /* Add check since it can be invoked before register
18538 allocation in pre-reload schedule. */
18539 if (reload_completed
18540 && true_regnum (set_dest) == true_regnum (shift_count))
18541 return true;
18542 else if (REGNO(set_dest) == REGNO(shift_count))
18543 return true;
18544 }
18545 }
18546
18547 return false;
18548 }
18549
18550 /* Return true if destination reg of SET_INSN is shift count of
18551 USE_INSN. */
18552
18553 bool
18554 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18555 {
18556 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18557 PATTERN (use_insn));
18558 }
18559
18560 /* Return TRUE or FALSE depending on whether the unary operator meets the
18561 appropriate constraints. */
18562
18563 bool
18564 ix86_unary_operator_ok (enum rtx_code,
18565 enum machine_mode,
18566 rtx operands[2])
18567 {
18568 /* If one of operands is memory, source and destination must match. */
18569 if ((MEM_P (operands[0])
18570 || MEM_P (operands[1]))
18571 && ! rtx_equal_p (operands[0], operands[1]))
18572 return false;
18573 return true;
18574 }
18575
18576 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18577 are ok, keeping in mind the possible movddup alternative. */
18578
18579 bool
18580 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18581 {
18582 if (MEM_P (operands[0]))
18583 return rtx_equal_p (operands[0], operands[1 + high]);
18584 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18585 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18586 return true;
18587 }
18588
18589 /* Post-reload splitter for converting an SF or DFmode value in an
18590 SSE register into an unsigned SImode. */
18591
18592 void
18593 ix86_split_convert_uns_si_sse (rtx operands[])
18594 {
18595 enum machine_mode vecmode;
18596 rtx value, large, zero_or_two31, input, two31, x;
18597
18598 large = operands[1];
18599 zero_or_two31 = operands[2];
18600 input = operands[3];
18601 two31 = operands[4];
18602 vecmode = GET_MODE (large);
18603 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18604
18605 /* Load up the value into the low element. We must ensure that the other
18606 elements are valid floats -- zero is the easiest such value. */
18607 if (MEM_P (input))
18608 {
18609 if (vecmode == V4SFmode)
18610 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18611 else
18612 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18613 }
18614 else
18615 {
18616 input = gen_rtx_REG (vecmode, REGNO (input));
18617 emit_move_insn (value, CONST0_RTX (vecmode));
18618 if (vecmode == V4SFmode)
18619 emit_insn (gen_sse_movss (value, value, input));
18620 else
18621 emit_insn (gen_sse2_movsd (value, value, input));
18622 }
18623
18624 emit_move_insn (large, two31);
18625 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18626
18627 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18628 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18629
18630 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18631 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18632
18633 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18634 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18635
18636 large = gen_rtx_REG (V4SImode, REGNO (large));
18637 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18638
18639 x = gen_rtx_REG (V4SImode, REGNO (value));
18640 if (vecmode == V4SFmode)
18641 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18642 else
18643 emit_insn (gen_sse2_cvttpd2dq (x, value));
18644 value = x;
18645
18646 emit_insn (gen_xorv4si3 (value, value, large));
18647 }
18648
18649 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18650 Expects the 64-bit DImode to be supplied in a pair of integral
18651 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18652 -mfpmath=sse, !optimize_size only. */
18653
18654 void
18655 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18656 {
18657 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18658 rtx int_xmm, fp_xmm;
18659 rtx biases, exponents;
18660 rtx x;
18661
18662 int_xmm = gen_reg_rtx (V4SImode);
18663 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18664 emit_insn (gen_movdi_to_sse (int_xmm, input));
18665 else if (TARGET_SSE_SPLIT_REGS)
18666 {
18667 emit_clobber (int_xmm);
18668 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18669 }
18670 else
18671 {
18672 x = gen_reg_rtx (V2DImode);
18673 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18674 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18675 }
18676
18677 x = gen_rtx_CONST_VECTOR (V4SImode,
18678 gen_rtvec (4, GEN_INT (0x43300000UL),
18679 GEN_INT (0x45300000UL),
18680 const0_rtx, const0_rtx));
18681 exponents = validize_mem (force_const_mem (V4SImode, x));
18682
18683 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18684 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18685
18686 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18687 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18688 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18689 (0x1.0p84 + double(fp_value_hi_xmm)).
18690 Note these exponents differ by 32. */
18691
18692 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18693
18694 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18695 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18696 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18697 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18698 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18699 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18700 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18701 biases = validize_mem (force_const_mem (V2DFmode, biases));
18702 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18703
18704 /* Add the upper and lower DFmode values together. */
18705 if (TARGET_SSE3)
18706 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18707 else
18708 {
18709 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18710 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18711 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18712 }
18713
18714 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18715 }
18716
18717 /* Not used, but eases macroization of patterns. */
18718 void
18719 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18720 {
18721 gcc_unreachable ();
18722 }
18723
18724 /* Convert an unsigned SImode value into a DFmode. Only currently used
18725 for SSE, but applicable anywhere. */
18726
18727 void
18728 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18729 {
18730 REAL_VALUE_TYPE TWO31r;
18731 rtx x, fp;
18732
18733 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18734 NULL, 1, OPTAB_DIRECT);
18735
18736 fp = gen_reg_rtx (DFmode);
18737 emit_insn (gen_floatsidf2 (fp, x));
18738
18739 real_ldexp (&TWO31r, &dconst1, 31);
18740 x = const_double_from_real_value (TWO31r, DFmode);
18741
18742 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18743 if (x != target)
18744 emit_move_insn (target, x);
18745 }
18746
18747 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18748 32-bit mode; otherwise we have a direct convert instruction. */
18749
18750 void
18751 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18752 {
18753 REAL_VALUE_TYPE TWO32r;
18754 rtx fp_lo, fp_hi, x;
18755
18756 fp_lo = gen_reg_rtx (DFmode);
18757 fp_hi = gen_reg_rtx (DFmode);
18758
18759 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18760
18761 real_ldexp (&TWO32r, &dconst1, 32);
18762 x = const_double_from_real_value (TWO32r, DFmode);
18763 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18764
18765 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18766
18767 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18768 0, OPTAB_DIRECT);
18769 if (x != target)
18770 emit_move_insn (target, x);
18771 }
18772
18773 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18774 For x86_32, -mfpmath=sse, !optimize_size only. */
18775 void
18776 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18777 {
18778 REAL_VALUE_TYPE ONE16r;
18779 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18780
18781 real_ldexp (&ONE16r, &dconst1, 16);
18782 x = const_double_from_real_value (ONE16r, SFmode);
18783 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18784 NULL, 0, OPTAB_DIRECT);
18785 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18786 NULL, 0, OPTAB_DIRECT);
18787 fp_hi = gen_reg_rtx (SFmode);
18788 fp_lo = gen_reg_rtx (SFmode);
18789 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18790 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18791 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18792 0, OPTAB_DIRECT);
18793 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18794 0, OPTAB_DIRECT);
18795 if (!rtx_equal_p (target, fp_hi))
18796 emit_move_insn (target, fp_hi);
18797 }
18798
18799 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18800 a vector of unsigned ints VAL to vector of floats TARGET. */
18801
18802 void
18803 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18804 {
18805 rtx tmp[8];
18806 REAL_VALUE_TYPE TWO16r;
18807 enum machine_mode intmode = GET_MODE (val);
18808 enum machine_mode fltmode = GET_MODE (target);
18809 rtx (*cvt) (rtx, rtx);
18810
18811 if (intmode == V4SImode)
18812 cvt = gen_floatv4siv4sf2;
18813 else
18814 cvt = gen_floatv8siv8sf2;
18815 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18816 tmp[0] = force_reg (intmode, tmp[0]);
18817 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18818 OPTAB_DIRECT);
18819 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18820 NULL_RTX, 1, OPTAB_DIRECT);
18821 tmp[3] = gen_reg_rtx (fltmode);
18822 emit_insn (cvt (tmp[3], tmp[1]));
18823 tmp[4] = gen_reg_rtx (fltmode);
18824 emit_insn (cvt (tmp[4], tmp[2]));
18825 real_ldexp (&TWO16r, &dconst1, 16);
18826 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18827 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18828 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18829 OPTAB_DIRECT);
18830 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18831 OPTAB_DIRECT);
18832 if (tmp[7] != target)
18833 emit_move_insn (target, tmp[7]);
18834 }
18835
18836 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18837 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18838 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18839 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18840
18841 rtx
18842 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18843 {
18844 REAL_VALUE_TYPE TWO31r;
18845 rtx two31r, tmp[4];
18846 enum machine_mode mode = GET_MODE (val);
18847 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18848 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18849 rtx (*cmp) (rtx, rtx, rtx, rtx);
18850 int i;
18851
18852 for (i = 0; i < 3; i++)
18853 tmp[i] = gen_reg_rtx (mode);
18854 real_ldexp (&TWO31r, &dconst1, 31);
18855 two31r = const_double_from_real_value (TWO31r, scalarmode);
18856 two31r = ix86_build_const_vector (mode, 1, two31r);
18857 two31r = force_reg (mode, two31r);
18858 switch (mode)
18859 {
18860 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18861 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18862 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18863 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18864 default: gcc_unreachable ();
18865 }
18866 tmp[3] = gen_rtx_LE (mode, two31r, val);
18867 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18868 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18869 0, OPTAB_DIRECT);
18870 if (intmode == V4SImode || TARGET_AVX2)
18871 *xorp = expand_simple_binop (intmode, ASHIFT,
18872 gen_lowpart (intmode, tmp[0]),
18873 GEN_INT (31), NULL_RTX, 0,
18874 OPTAB_DIRECT);
18875 else
18876 {
18877 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18878 two31 = ix86_build_const_vector (intmode, 1, two31);
18879 *xorp = expand_simple_binop (intmode, AND,
18880 gen_lowpart (intmode, tmp[0]),
18881 two31, NULL_RTX, 0,
18882 OPTAB_DIRECT);
18883 }
18884 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18885 0, OPTAB_DIRECT);
18886 }
18887
18888 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18889 then replicate the value for all elements of the vector
18890 register. */
18891
18892 rtx
18893 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18894 {
18895 int i, n_elt;
18896 rtvec v;
18897 enum machine_mode scalar_mode;
18898
18899 switch (mode)
18900 {
18901 case V64QImode:
18902 case V32QImode:
18903 case V16QImode:
18904 case V32HImode:
18905 case V16HImode:
18906 case V8HImode:
18907 case V16SImode:
18908 case V8SImode:
18909 case V4SImode:
18910 case V8DImode:
18911 case V4DImode:
18912 case V2DImode:
18913 gcc_assert (vect);
18914 case V16SFmode:
18915 case V8SFmode:
18916 case V4SFmode:
18917 case V8DFmode:
18918 case V4DFmode:
18919 case V2DFmode:
18920 n_elt = GET_MODE_NUNITS (mode);
18921 v = rtvec_alloc (n_elt);
18922 scalar_mode = GET_MODE_INNER (mode);
18923
18924 RTVEC_ELT (v, 0) = value;
18925
18926 for (i = 1; i < n_elt; ++i)
18927 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18928
18929 return gen_rtx_CONST_VECTOR (mode, v);
18930
18931 default:
18932 gcc_unreachable ();
18933 }
18934 }
18935
18936 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18937 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18938 for an SSE register. If VECT is true, then replicate the mask for
18939 all elements of the vector register. If INVERT is true, then create
18940 a mask excluding the sign bit. */
18941
18942 rtx
18943 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18944 {
18945 enum machine_mode vec_mode, imode;
18946 HOST_WIDE_INT hi, lo;
18947 int shift = 63;
18948 rtx v;
18949 rtx mask;
18950
18951 /* Find the sign bit, sign extended to 2*HWI. */
18952 switch (mode)
18953 {
18954 case V16SImode:
18955 case V16SFmode:
18956 case V8SImode:
18957 case V4SImode:
18958 case V8SFmode:
18959 case V4SFmode:
18960 vec_mode = mode;
18961 mode = GET_MODE_INNER (mode);
18962 imode = SImode;
18963 lo = 0x80000000, hi = lo < 0;
18964 break;
18965
18966 case V8DImode:
18967 case V4DImode:
18968 case V2DImode:
18969 case V8DFmode:
18970 case V4DFmode:
18971 case V2DFmode:
18972 vec_mode = mode;
18973 mode = GET_MODE_INNER (mode);
18974 imode = DImode;
18975 if (HOST_BITS_PER_WIDE_INT >= 64)
18976 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18977 else
18978 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18979 break;
18980
18981 case TImode:
18982 case TFmode:
18983 vec_mode = VOIDmode;
18984 if (HOST_BITS_PER_WIDE_INT >= 64)
18985 {
18986 imode = TImode;
18987 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18988 }
18989 else
18990 {
18991 rtvec vec;
18992
18993 imode = DImode;
18994 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18995
18996 if (invert)
18997 {
18998 lo = ~lo, hi = ~hi;
18999 v = constm1_rtx;
19000 }
19001 else
19002 v = const0_rtx;
19003
19004 mask = immed_double_const (lo, hi, imode);
19005
19006 vec = gen_rtvec (2, v, mask);
19007 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19008 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19009
19010 return v;
19011 }
19012 break;
19013
19014 default:
19015 gcc_unreachable ();
19016 }
19017
19018 if (invert)
19019 lo = ~lo, hi = ~hi;
19020
19021 /* Force this value into the low part of a fp vector constant. */
19022 mask = immed_double_const (lo, hi, imode);
19023 mask = gen_lowpart (mode, mask);
19024
19025 if (vec_mode == VOIDmode)
19026 return force_reg (mode, mask);
19027
19028 v = ix86_build_const_vector (vec_mode, vect, mask);
19029 return force_reg (vec_mode, v);
19030 }
19031
19032 /* Generate code for floating point ABS or NEG. */
19033
19034 void
19035 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19036 rtx operands[])
19037 {
19038 rtx mask, set, dst, src;
19039 bool use_sse = false;
19040 bool vector_mode = VECTOR_MODE_P (mode);
19041 enum machine_mode vmode = mode;
19042
19043 if (vector_mode)
19044 use_sse = true;
19045 else if (mode == TFmode)
19046 use_sse = true;
19047 else if (TARGET_SSE_MATH)
19048 {
19049 use_sse = SSE_FLOAT_MODE_P (mode);
19050 if (mode == SFmode)
19051 vmode = V4SFmode;
19052 else if (mode == DFmode)
19053 vmode = V2DFmode;
19054 }
19055
19056 /* NEG and ABS performed with SSE use bitwise mask operations.
19057 Create the appropriate mask now. */
19058 if (use_sse)
19059 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19060 else
19061 mask = NULL_RTX;
19062
19063 dst = operands[0];
19064 src = operands[1];
19065
19066 set = gen_rtx_fmt_e (code, mode, src);
19067 set = gen_rtx_SET (VOIDmode, dst, set);
19068
19069 if (mask)
19070 {
19071 rtx use, clob;
19072 rtvec par;
19073
19074 use = gen_rtx_USE (VOIDmode, mask);
19075 if (vector_mode)
19076 par = gen_rtvec (2, set, use);
19077 else
19078 {
19079 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19080 par = gen_rtvec (3, set, use, clob);
19081 }
19082 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19083 }
19084 else
19085 emit_insn (set);
19086 }
19087
19088 /* Expand a copysign operation. Special case operand 0 being a constant. */
19089
19090 void
19091 ix86_expand_copysign (rtx operands[])
19092 {
19093 enum machine_mode mode, vmode;
19094 rtx dest, op0, op1, mask, nmask;
19095
19096 dest = operands[0];
19097 op0 = operands[1];
19098 op1 = operands[2];
19099
19100 mode = GET_MODE (dest);
19101
19102 if (mode == SFmode)
19103 vmode = V4SFmode;
19104 else if (mode == DFmode)
19105 vmode = V2DFmode;
19106 else
19107 vmode = mode;
19108
19109 if (GET_CODE (op0) == CONST_DOUBLE)
19110 {
19111 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19112
19113 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19114 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19115
19116 if (mode == SFmode || mode == DFmode)
19117 {
19118 if (op0 == CONST0_RTX (mode))
19119 op0 = CONST0_RTX (vmode);
19120 else
19121 {
19122 rtx v = ix86_build_const_vector (vmode, false, op0);
19123
19124 op0 = force_reg (vmode, v);
19125 }
19126 }
19127 else if (op0 != CONST0_RTX (mode))
19128 op0 = force_reg (mode, op0);
19129
19130 mask = ix86_build_signbit_mask (vmode, 0, 0);
19131
19132 if (mode == SFmode)
19133 copysign_insn = gen_copysignsf3_const;
19134 else if (mode == DFmode)
19135 copysign_insn = gen_copysigndf3_const;
19136 else
19137 copysign_insn = gen_copysigntf3_const;
19138
19139 emit_insn (copysign_insn (dest, op0, op1, mask));
19140 }
19141 else
19142 {
19143 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19144
19145 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19146 mask = ix86_build_signbit_mask (vmode, 0, 0);
19147
19148 if (mode == SFmode)
19149 copysign_insn = gen_copysignsf3_var;
19150 else if (mode == DFmode)
19151 copysign_insn = gen_copysigndf3_var;
19152 else
19153 copysign_insn = gen_copysigntf3_var;
19154
19155 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19156 }
19157 }
19158
19159 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19160 be a constant, and so has already been expanded into a vector constant. */
19161
19162 void
19163 ix86_split_copysign_const (rtx operands[])
19164 {
19165 enum machine_mode mode, vmode;
19166 rtx dest, op0, mask, x;
19167
19168 dest = operands[0];
19169 op0 = operands[1];
19170 mask = operands[3];
19171
19172 mode = GET_MODE (dest);
19173 vmode = GET_MODE (mask);
19174
19175 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19176 x = gen_rtx_AND (vmode, dest, mask);
19177 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19178
19179 if (op0 != CONST0_RTX (vmode))
19180 {
19181 x = gen_rtx_IOR (vmode, dest, op0);
19182 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19183 }
19184 }
19185
19186 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19187 so we have to do two masks. */
19188
19189 void
19190 ix86_split_copysign_var (rtx operands[])
19191 {
19192 enum machine_mode mode, vmode;
19193 rtx dest, scratch, op0, op1, mask, nmask, x;
19194
19195 dest = operands[0];
19196 scratch = operands[1];
19197 op0 = operands[2];
19198 op1 = operands[3];
19199 nmask = operands[4];
19200 mask = operands[5];
19201
19202 mode = GET_MODE (dest);
19203 vmode = GET_MODE (mask);
19204
19205 if (rtx_equal_p (op0, op1))
19206 {
19207 /* Shouldn't happen often (it's useless, obviously), but when it does
19208 we'd generate incorrect code if we continue below. */
19209 emit_move_insn (dest, op0);
19210 return;
19211 }
19212
19213 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19214 {
19215 gcc_assert (REGNO (op1) == REGNO (scratch));
19216
19217 x = gen_rtx_AND (vmode, scratch, mask);
19218 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19219
19220 dest = mask;
19221 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19222 x = gen_rtx_NOT (vmode, dest);
19223 x = gen_rtx_AND (vmode, x, op0);
19224 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19225 }
19226 else
19227 {
19228 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19229 {
19230 x = gen_rtx_AND (vmode, scratch, mask);
19231 }
19232 else /* alternative 2,4 */
19233 {
19234 gcc_assert (REGNO (mask) == REGNO (scratch));
19235 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19236 x = gen_rtx_AND (vmode, scratch, op1);
19237 }
19238 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19239
19240 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19241 {
19242 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19243 x = gen_rtx_AND (vmode, dest, nmask);
19244 }
19245 else /* alternative 3,4 */
19246 {
19247 gcc_assert (REGNO (nmask) == REGNO (dest));
19248 dest = nmask;
19249 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19250 x = gen_rtx_AND (vmode, dest, op0);
19251 }
19252 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19253 }
19254
19255 x = gen_rtx_IOR (vmode, dest, scratch);
19256 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19257 }
19258
19259 /* Return TRUE or FALSE depending on whether the first SET in INSN
19260 has source and destination with matching CC modes, and that the
19261 CC mode is at least as constrained as REQ_MODE. */
19262
19263 bool
19264 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19265 {
19266 rtx set;
19267 enum machine_mode set_mode;
19268
19269 set = PATTERN (insn);
19270 if (GET_CODE (set) == PARALLEL)
19271 set = XVECEXP (set, 0, 0);
19272 gcc_assert (GET_CODE (set) == SET);
19273 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19274
19275 set_mode = GET_MODE (SET_DEST (set));
19276 switch (set_mode)
19277 {
19278 case CCNOmode:
19279 if (req_mode != CCNOmode
19280 && (req_mode != CCmode
19281 || XEXP (SET_SRC (set), 1) != const0_rtx))
19282 return false;
19283 break;
19284 case CCmode:
19285 if (req_mode == CCGCmode)
19286 return false;
19287 /* FALLTHRU */
19288 case CCGCmode:
19289 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19290 return false;
19291 /* FALLTHRU */
19292 case CCGOCmode:
19293 if (req_mode == CCZmode)
19294 return false;
19295 /* FALLTHRU */
19296 case CCZmode:
19297 break;
19298
19299 case CCAmode:
19300 case CCCmode:
19301 case CCOmode:
19302 case CCSmode:
19303 if (set_mode != req_mode)
19304 return false;
19305 break;
19306
19307 default:
19308 gcc_unreachable ();
19309 }
19310
19311 return GET_MODE (SET_SRC (set)) == set_mode;
19312 }
19313
19314 /* Generate insn patterns to do an integer compare of OPERANDS. */
19315
19316 static rtx
19317 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19318 {
19319 enum machine_mode cmpmode;
19320 rtx tmp, flags;
19321
19322 cmpmode = SELECT_CC_MODE (code, op0, op1);
19323 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19324
19325 /* This is very simple, but making the interface the same as in the
19326 FP case makes the rest of the code easier. */
19327 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19328 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19329
19330 /* Return the test that should be put into the flags user, i.e.
19331 the bcc, scc, or cmov instruction. */
19332 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19333 }
19334
19335 /* Figure out whether to use ordered or unordered fp comparisons.
19336 Return the appropriate mode to use. */
19337
19338 enum machine_mode
19339 ix86_fp_compare_mode (enum rtx_code)
19340 {
19341 /* ??? In order to make all comparisons reversible, we do all comparisons
19342 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19343 all forms trapping and nontrapping comparisons, we can make inequality
19344 comparisons trapping again, since it results in better code when using
19345 FCOM based compares. */
19346 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19347 }
19348
19349 enum machine_mode
19350 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19351 {
19352 enum machine_mode mode = GET_MODE (op0);
19353
19354 if (SCALAR_FLOAT_MODE_P (mode))
19355 {
19356 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19357 return ix86_fp_compare_mode (code);
19358 }
19359
19360 switch (code)
19361 {
19362 /* Only zero flag is needed. */
19363 case EQ: /* ZF=0 */
19364 case NE: /* ZF!=0 */
19365 return CCZmode;
19366 /* Codes needing carry flag. */
19367 case GEU: /* CF=0 */
19368 case LTU: /* CF=1 */
19369 /* Detect overflow checks. They need just the carry flag. */
19370 if (GET_CODE (op0) == PLUS
19371 && rtx_equal_p (op1, XEXP (op0, 0)))
19372 return CCCmode;
19373 else
19374 return CCmode;
19375 case GTU: /* CF=0 & ZF=0 */
19376 case LEU: /* CF=1 | ZF=1 */
19377 return CCmode;
19378 /* Codes possibly doable only with sign flag when
19379 comparing against zero. */
19380 case GE: /* SF=OF or SF=0 */
19381 case LT: /* SF<>OF or SF=1 */
19382 if (op1 == const0_rtx)
19383 return CCGOCmode;
19384 else
19385 /* For other cases Carry flag is not required. */
19386 return CCGCmode;
19387 /* Codes doable only with sign flag when comparing
19388 against zero, but we miss jump instruction for it
19389 so we need to use relational tests against overflow
19390 that thus needs to be zero. */
19391 case GT: /* ZF=0 & SF=OF */
19392 case LE: /* ZF=1 | SF<>OF */
19393 if (op1 == const0_rtx)
19394 return CCNOmode;
19395 else
19396 return CCGCmode;
19397 /* strcmp pattern do (use flags) and combine may ask us for proper
19398 mode. */
19399 case USE:
19400 return CCmode;
19401 default:
19402 gcc_unreachable ();
19403 }
19404 }
19405
19406 /* Return the fixed registers used for condition codes. */
19407
19408 static bool
19409 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19410 {
19411 *p1 = FLAGS_REG;
19412 *p2 = FPSR_REG;
19413 return true;
19414 }
19415
19416 /* If two condition code modes are compatible, return a condition code
19417 mode which is compatible with both. Otherwise, return
19418 VOIDmode. */
19419
19420 static enum machine_mode
19421 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19422 {
19423 if (m1 == m2)
19424 return m1;
19425
19426 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19427 return VOIDmode;
19428
19429 if ((m1 == CCGCmode && m2 == CCGOCmode)
19430 || (m1 == CCGOCmode && m2 == CCGCmode))
19431 return CCGCmode;
19432
19433 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19434 return m2;
19435 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19436 return m1;
19437
19438 switch (m1)
19439 {
19440 default:
19441 gcc_unreachable ();
19442
19443 case CCmode:
19444 case CCGCmode:
19445 case CCGOCmode:
19446 case CCNOmode:
19447 case CCAmode:
19448 case CCCmode:
19449 case CCOmode:
19450 case CCSmode:
19451 case CCZmode:
19452 switch (m2)
19453 {
19454 default:
19455 return VOIDmode;
19456
19457 case CCmode:
19458 case CCGCmode:
19459 case CCGOCmode:
19460 case CCNOmode:
19461 case CCAmode:
19462 case CCCmode:
19463 case CCOmode:
19464 case CCSmode:
19465 case CCZmode:
19466 return CCmode;
19467 }
19468
19469 case CCFPmode:
19470 case CCFPUmode:
19471 /* These are only compatible with themselves, which we already
19472 checked above. */
19473 return VOIDmode;
19474 }
19475 }
19476
19477
19478 /* Return a comparison we can do and that it is equivalent to
19479 swap_condition (code) apart possibly from orderedness.
19480 But, never change orderedness if TARGET_IEEE_FP, returning
19481 UNKNOWN in that case if necessary. */
19482
19483 static enum rtx_code
19484 ix86_fp_swap_condition (enum rtx_code code)
19485 {
19486 switch (code)
19487 {
19488 case GT: /* GTU - CF=0 & ZF=0 */
19489 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19490 case GE: /* GEU - CF=0 */
19491 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19492 case UNLT: /* LTU - CF=1 */
19493 return TARGET_IEEE_FP ? UNKNOWN : GT;
19494 case UNLE: /* LEU - CF=1 | ZF=1 */
19495 return TARGET_IEEE_FP ? UNKNOWN : GE;
19496 default:
19497 return swap_condition (code);
19498 }
19499 }
19500
19501 /* Return cost of comparison CODE using the best strategy for performance.
19502 All following functions do use number of instructions as a cost metrics.
19503 In future this should be tweaked to compute bytes for optimize_size and
19504 take into account performance of various instructions on various CPUs. */
19505
19506 static int
19507 ix86_fp_comparison_cost (enum rtx_code code)
19508 {
19509 int arith_cost;
19510
19511 /* The cost of code using bit-twiddling on %ah. */
19512 switch (code)
19513 {
19514 case UNLE:
19515 case UNLT:
19516 case LTGT:
19517 case GT:
19518 case GE:
19519 case UNORDERED:
19520 case ORDERED:
19521 case UNEQ:
19522 arith_cost = 4;
19523 break;
19524 case LT:
19525 case NE:
19526 case EQ:
19527 case UNGE:
19528 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19529 break;
19530 case LE:
19531 case UNGT:
19532 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19533 break;
19534 default:
19535 gcc_unreachable ();
19536 }
19537
19538 switch (ix86_fp_comparison_strategy (code))
19539 {
19540 case IX86_FPCMP_COMI:
19541 return arith_cost > 4 ? 3 : 2;
19542 case IX86_FPCMP_SAHF:
19543 return arith_cost > 4 ? 4 : 3;
19544 default:
19545 return arith_cost;
19546 }
19547 }
19548
19549 /* Return strategy to use for floating-point. We assume that fcomi is always
19550 preferrable where available, since that is also true when looking at size
19551 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19552
19553 enum ix86_fpcmp_strategy
19554 ix86_fp_comparison_strategy (enum rtx_code)
19555 {
19556 /* Do fcomi/sahf based test when profitable. */
19557
19558 if (TARGET_CMOVE)
19559 return IX86_FPCMP_COMI;
19560
19561 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19562 return IX86_FPCMP_SAHF;
19563
19564 return IX86_FPCMP_ARITH;
19565 }
19566
19567 /* Swap, force into registers, or otherwise massage the two operands
19568 to a fp comparison. The operands are updated in place; the new
19569 comparison code is returned. */
19570
19571 static enum rtx_code
19572 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19573 {
19574 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19575 rtx op0 = *pop0, op1 = *pop1;
19576 enum machine_mode op_mode = GET_MODE (op0);
19577 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19578
19579 /* All of the unordered compare instructions only work on registers.
19580 The same is true of the fcomi compare instructions. The XFmode
19581 compare instructions require registers except when comparing
19582 against zero or when converting operand 1 from fixed point to
19583 floating point. */
19584
19585 if (!is_sse
19586 && (fpcmp_mode == CCFPUmode
19587 || (op_mode == XFmode
19588 && ! (standard_80387_constant_p (op0) == 1
19589 || standard_80387_constant_p (op1) == 1)
19590 && GET_CODE (op1) != FLOAT)
19591 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19592 {
19593 op0 = force_reg (op_mode, op0);
19594 op1 = force_reg (op_mode, op1);
19595 }
19596 else
19597 {
19598 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19599 things around if they appear profitable, otherwise force op0
19600 into a register. */
19601
19602 if (standard_80387_constant_p (op0) == 0
19603 || (MEM_P (op0)
19604 && ! (standard_80387_constant_p (op1) == 0
19605 || MEM_P (op1))))
19606 {
19607 enum rtx_code new_code = ix86_fp_swap_condition (code);
19608 if (new_code != UNKNOWN)
19609 {
19610 rtx tmp;
19611 tmp = op0, op0 = op1, op1 = tmp;
19612 code = new_code;
19613 }
19614 }
19615
19616 if (!REG_P (op0))
19617 op0 = force_reg (op_mode, op0);
19618
19619 if (CONSTANT_P (op1))
19620 {
19621 int tmp = standard_80387_constant_p (op1);
19622 if (tmp == 0)
19623 op1 = validize_mem (force_const_mem (op_mode, op1));
19624 else if (tmp == 1)
19625 {
19626 if (TARGET_CMOVE)
19627 op1 = force_reg (op_mode, op1);
19628 }
19629 else
19630 op1 = force_reg (op_mode, op1);
19631 }
19632 }
19633
19634 /* Try to rearrange the comparison to make it cheaper. */
19635 if (ix86_fp_comparison_cost (code)
19636 > ix86_fp_comparison_cost (swap_condition (code))
19637 && (REG_P (op1) || can_create_pseudo_p ()))
19638 {
19639 rtx tmp;
19640 tmp = op0, op0 = op1, op1 = tmp;
19641 code = swap_condition (code);
19642 if (!REG_P (op0))
19643 op0 = force_reg (op_mode, op0);
19644 }
19645
19646 *pop0 = op0;
19647 *pop1 = op1;
19648 return code;
19649 }
19650
19651 /* Convert comparison codes we use to represent FP comparison to integer
19652 code that will result in proper branch. Return UNKNOWN if no such code
19653 is available. */
19654
19655 enum rtx_code
19656 ix86_fp_compare_code_to_integer (enum rtx_code code)
19657 {
19658 switch (code)
19659 {
19660 case GT:
19661 return GTU;
19662 case GE:
19663 return GEU;
19664 case ORDERED:
19665 case UNORDERED:
19666 return code;
19667 break;
19668 case UNEQ:
19669 return EQ;
19670 break;
19671 case UNLT:
19672 return LTU;
19673 break;
19674 case UNLE:
19675 return LEU;
19676 break;
19677 case LTGT:
19678 return NE;
19679 break;
19680 default:
19681 return UNKNOWN;
19682 }
19683 }
19684
19685 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19686
19687 static rtx
19688 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19689 {
19690 enum machine_mode fpcmp_mode, intcmp_mode;
19691 rtx tmp, tmp2;
19692
19693 fpcmp_mode = ix86_fp_compare_mode (code);
19694 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19695
19696 /* Do fcomi/sahf based test when profitable. */
19697 switch (ix86_fp_comparison_strategy (code))
19698 {
19699 case IX86_FPCMP_COMI:
19700 intcmp_mode = fpcmp_mode;
19701 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19702 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19703 tmp);
19704 emit_insn (tmp);
19705 break;
19706
19707 case IX86_FPCMP_SAHF:
19708 intcmp_mode = fpcmp_mode;
19709 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19710 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19711 tmp);
19712
19713 if (!scratch)
19714 scratch = gen_reg_rtx (HImode);
19715 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19716 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19717 break;
19718
19719 case IX86_FPCMP_ARITH:
19720 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19721 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19722 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19723 if (!scratch)
19724 scratch = gen_reg_rtx (HImode);
19725 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19726
19727 /* In the unordered case, we have to check C2 for NaN's, which
19728 doesn't happen to work out to anything nice combination-wise.
19729 So do some bit twiddling on the value we've got in AH to come
19730 up with an appropriate set of condition codes. */
19731
19732 intcmp_mode = CCNOmode;
19733 switch (code)
19734 {
19735 case GT:
19736 case UNGT:
19737 if (code == GT || !TARGET_IEEE_FP)
19738 {
19739 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19740 code = EQ;
19741 }
19742 else
19743 {
19744 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19745 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19746 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19747 intcmp_mode = CCmode;
19748 code = GEU;
19749 }
19750 break;
19751 case LT:
19752 case UNLT:
19753 if (code == LT && TARGET_IEEE_FP)
19754 {
19755 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19756 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19757 intcmp_mode = CCmode;
19758 code = EQ;
19759 }
19760 else
19761 {
19762 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19763 code = NE;
19764 }
19765 break;
19766 case GE:
19767 case UNGE:
19768 if (code == GE || !TARGET_IEEE_FP)
19769 {
19770 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19771 code = EQ;
19772 }
19773 else
19774 {
19775 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19776 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19777 code = NE;
19778 }
19779 break;
19780 case LE:
19781 case UNLE:
19782 if (code == LE && TARGET_IEEE_FP)
19783 {
19784 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19785 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19786 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19787 intcmp_mode = CCmode;
19788 code = LTU;
19789 }
19790 else
19791 {
19792 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19793 code = NE;
19794 }
19795 break;
19796 case EQ:
19797 case UNEQ:
19798 if (code == EQ && TARGET_IEEE_FP)
19799 {
19800 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19801 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19802 intcmp_mode = CCmode;
19803 code = EQ;
19804 }
19805 else
19806 {
19807 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19808 code = NE;
19809 }
19810 break;
19811 case NE:
19812 case LTGT:
19813 if (code == NE && TARGET_IEEE_FP)
19814 {
19815 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19816 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19817 GEN_INT (0x40)));
19818 code = NE;
19819 }
19820 else
19821 {
19822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19823 code = EQ;
19824 }
19825 break;
19826
19827 case UNORDERED:
19828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19829 code = NE;
19830 break;
19831 case ORDERED:
19832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19833 code = EQ;
19834 break;
19835
19836 default:
19837 gcc_unreachable ();
19838 }
19839 break;
19840
19841 default:
19842 gcc_unreachable();
19843 }
19844
19845 /* Return the test that should be put into the flags user, i.e.
19846 the bcc, scc, or cmov instruction. */
19847 return gen_rtx_fmt_ee (code, VOIDmode,
19848 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19849 const0_rtx);
19850 }
19851
19852 static rtx
19853 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19854 {
19855 rtx ret;
19856
19857 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19858 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19859
19860 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19861 {
19862 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19863 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19864 }
19865 else
19866 ret = ix86_expand_int_compare (code, op0, op1);
19867
19868 return ret;
19869 }
19870
19871 void
19872 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19873 {
19874 enum machine_mode mode = GET_MODE (op0);
19875 rtx tmp;
19876
19877 switch (mode)
19878 {
19879 case SFmode:
19880 case DFmode:
19881 case XFmode:
19882 case QImode:
19883 case HImode:
19884 case SImode:
19885 simple:
19886 tmp = ix86_expand_compare (code, op0, op1);
19887 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19888 gen_rtx_LABEL_REF (VOIDmode, label),
19889 pc_rtx);
19890 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19891 return;
19892
19893 case DImode:
19894 if (TARGET_64BIT)
19895 goto simple;
19896 case TImode:
19897 /* Expand DImode branch into multiple compare+branch. */
19898 {
19899 rtx lo[2], hi[2];
19900 rtx_code_label *label2;
19901 enum rtx_code code1, code2, code3;
19902 enum machine_mode submode;
19903
19904 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19905 {
19906 tmp = op0, op0 = op1, op1 = tmp;
19907 code = swap_condition (code);
19908 }
19909
19910 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19911 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19912
19913 submode = mode == DImode ? SImode : DImode;
19914
19915 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19916 avoid two branches. This costs one extra insn, so disable when
19917 optimizing for size. */
19918
19919 if ((code == EQ || code == NE)
19920 && (!optimize_insn_for_size_p ()
19921 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19922 {
19923 rtx xor0, xor1;
19924
19925 xor1 = hi[0];
19926 if (hi[1] != const0_rtx)
19927 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19928 NULL_RTX, 0, OPTAB_WIDEN);
19929
19930 xor0 = lo[0];
19931 if (lo[1] != const0_rtx)
19932 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19933 NULL_RTX, 0, OPTAB_WIDEN);
19934
19935 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19936 NULL_RTX, 0, OPTAB_WIDEN);
19937
19938 ix86_expand_branch (code, tmp, const0_rtx, label);
19939 return;
19940 }
19941
19942 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19943 op1 is a constant and the low word is zero, then we can just
19944 examine the high word. Similarly for low word -1 and
19945 less-or-equal-than or greater-than. */
19946
19947 if (CONST_INT_P (hi[1]))
19948 switch (code)
19949 {
19950 case LT: case LTU: case GE: case GEU:
19951 if (lo[1] == const0_rtx)
19952 {
19953 ix86_expand_branch (code, hi[0], hi[1], label);
19954 return;
19955 }
19956 break;
19957 case LE: case LEU: case GT: case GTU:
19958 if (lo[1] == constm1_rtx)
19959 {
19960 ix86_expand_branch (code, hi[0], hi[1], label);
19961 return;
19962 }
19963 break;
19964 default:
19965 break;
19966 }
19967
19968 /* Otherwise, we need two or three jumps. */
19969
19970 label2 = gen_label_rtx ();
19971
19972 code1 = code;
19973 code2 = swap_condition (code);
19974 code3 = unsigned_condition (code);
19975
19976 switch (code)
19977 {
19978 case LT: case GT: case LTU: case GTU:
19979 break;
19980
19981 case LE: code1 = LT; code2 = GT; break;
19982 case GE: code1 = GT; code2 = LT; break;
19983 case LEU: code1 = LTU; code2 = GTU; break;
19984 case GEU: code1 = GTU; code2 = LTU; break;
19985
19986 case EQ: code1 = UNKNOWN; code2 = NE; break;
19987 case NE: code2 = UNKNOWN; break;
19988
19989 default:
19990 gcc_unreachable ();
19991 }
19992
19993 /*
19994 * a < b =>
19995 * if (hi(a) < hi(b)) goto true;
19996 * if (hi(a) > hi(b)) goto false;
19997 * if (lo(a) < lo(b)) goto true;
19998 * false:
19999 */
20000
20001 if (code1 != UNKNOWN)
20002 ix86_expand_branch (code1, hi[0], hi[1], label);
20003 if (code2 != UNKNOWN)
20004 ix86_expand_branch (code2, hi[0], hi[1], label2);
20005
20006 ix86_expand_branch (code3, lo[0], lo[1], label);
20007
20008 if (code2 != UNKNOWN)
20009 emit_label (label2);
20010 return;
20011 }
20012
20013 default:
20014 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20015 goto simple;
20016 }
20017 }
20018
20019 /* Split branch based on floating point condition. */
20020 void
20021 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20022 rtx target1, rtx target2, rtx tmp)
20023 {
20024 rtx condition;
20025 rtx i;
20026
20027 if (target2 != pc_rtx)
20028 {
20029 rtx tmp = target2;
20030 code = reverse_condition_maybe_unordered (code);
20031 target2 = target1;
20032 target1 = tmp;
20033 }
20034
20035 condition = ix86_expand_fp_compare (code, op1, op2,
20036 tmp);
20037
20038 i = emit_jump_insn (gen_rtx_SET
20039 (VOIDmode, pc_rtx,
20040 gen_rtx_IF_THEN_ELSE (VOIDmode,
20041 condition, target1, target2)));
20042 if (split_branch_probability >= 0)
20043 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20044 }
20045
20046 void
20047 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20048 {
20049 rtx ret;
20050
20051 gcc_assert (GET_MODE (dest) == QImode);
20052
20053 ret = ix86_expand_compare (code, op0, op1);
20054 PUT_MODE (ret, QImode);
20055 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20056 }
20057
20058 /* Expand comparison setting or clearing carry flag. Return true when
20059 successful and set pop for the operation. */
20060 static bool
20061 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20062 {
20063 enum machine_mode mode =
20064 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20065
20066 /* Do not handle double-mode compares that go through special path. */
20067 if (mode == (TARGET_64BIT ? TImode : DImode))
20068 return false;
20069
20070 if (SCALAR_FLOAT_MODE_P (mode))
20071 {
20072 rtx compare_op;
20073 rtx_insn *compare_seq;
20074
20075 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20076
20077 /* Shortcut: following common codes never translate
20078 into carry flag compares. */
20079 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20080 || code == ORDERED || code == UNORDERED)
20081 return false;
20082
20083 /* These comparisons require zero flag; swap operands so they won't. */
20084 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20085 && !TARGET_IEEE_FP)
20086 {
20087 rtx tmp = op0;
20088 op0 = op1;
20089 op1 = tmp;
20090 code = swap_condition (code);
20091 }
20092
20093 /* Try to expand the comparison and verify that we end up with
20094 carry flag based comparison. This fails to be true only when
20095 we decide to expand comparison using arithmetic that is not
20096 too common scenario. */
20097 start_sequence ();
20098 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20099 compare_seq = get_insns ();
20100 end_sequence ();
20101
20102 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20103 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20104 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20105 else
20106 code = GET_CODE (compare_op);
20107
20108 if (code != LTU && code != GEU)
20109 return false;
20110
20111 emit_insn (compare_seq);
20112 *pop = compare_op;
20113 return true;
20114 }
20115
20116 if (!INTEGRAL_MODE_P (mode))
20117 return false;
20118
20119 switch (code)
20120 {
20121 case LTU:
20122 case GEU:
20123 break;
20124
20125 /* Convert a==0 into (unsigned)a<1. */
20126 case EQ:
20127 case NE:
20128 if (op1 != const0_rtx)
20129 return false;
20130 op1 = const1_rtx;
20131 code = (code == EQ ? LTU : GEU);
20132 break;
20133
20134 /* Convert a>b into b<a or a>=b-1. */
20135 case GTU:
20136 case LEU:
20137 if (CONST_INT_P (op1))
20138 {
20139 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20140 /* Bail out on overflow. We still can swap operands but that
20141 would force loading of the constant into register. */
20142 if (op1 == const0_rtx
20143 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20144 return false;
20145 code = (code == GTU ? GEU : LTU);
20146 }
20147 else
20148 {
20149 rtx tmp = op1;
20150 op1 = op0;
20151 op0 = tmp;
20152 code = (code == GTU ? LTU : GEU);
20153 }
20154 break;
20155
20156 /* Convert a>=0 into (unsigned)a<0x80000000. */
20157 case LT:
20158 case GE:
20159 if (mode == DImode || op1 != const0_rtx)
20160 return false;
20161 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20162 code = (code == LT ? GEU : LTU);
20163 break;
20164 case LE:
20165 case GT:
20166 if (mode == DImode || op1 != constm1_rtx)
20167 return false;
20168 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20169 code = (code == LE ? GEU : LTU);
20170 break;
20171
20172 default:
20173 return false;
20174 }
20175 /* Swapping operands may cause constant to appear as first operand. */
20176 if (!nonimmediate_operand (op0, VOIDmode))
20177 {
20178 if (!can_create_pseudo_p ())
20179 return false;
20180 op0 = force_reg (mode, op0);
20181 }
20182 *pop = ix86_expand_compare (code, op0, op1);
20183 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20184 return true;
20185 }
20186
20187 bool
20188 ix86_expand_int_movcc (rtx operands[])
20189 {
20190 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20191 rtx_insn *compare_seq;
20192 rtx compare_op;
20193 enum machine_mode mode = GET_MODE (operands[0]);
20194 bool sign_bit_compare_p = false;
20195 rtx op0 = XEXP (operands[1], 0);
20196 rtx op1 = XEXP (operands[1], 1);
20197
20198 if (GET_MODE (op0) == TImode
20199 || (GET_MODE (op0) == DImode
20200 && !TARGET_64BIT))
20201 return false;
20202
20203 start_sequence ();
20204 compare_op = ix86_expand_compare (code, op0, op1);
20205 compare_seq = get_insns ();
20206 end_sequence ();
20207
20208 compare_code = GET_CODE (compare_op);
20209
20210 if ((op1 == const0_rtx && (code == GE || code == LT))
20211 || (op1 == constm1_rtx && (code == GT || code == LE)))
20212 sign_bit_compare_p = true;
20213
20214 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20215 HImode insns, we'd be swallowed in word prefix ops. */
20216
20217 if ((mode != HImode || TARGET_FAST_PREFIX)
20218 && (mode != (TARGET_64BIT ? TImode : DImode))
20219 && CONST_INT_P (operands[2])
20220 && CONST_INT_P (operands[3]))
20221 {
20222 rtx out = operands[0];
20223 HOST_WIDE_INT ct = INTVAL (operands[2]);
20224 HOST_WIDE_INT cf = INTVAL (operands[3]);
20225 HOST_WIDE_INT diff;
20226
20227 diff = ct - cf;
20228 /* Sign bit compares are better done using shifts than we do by using
20229 sbb. */
20230 if (sign_bit_compare_p
20231 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20232 {
20233 /* Detect overlap between destination and compare sources. */
20234 rtx tmp = out;
20235
20236 if (!sign_bit_compare_p)
20237 {
20238 rtx flags;
20239 bool fpcmp = false;
20240
20241 compare_code = GET_CODE (compare_op);
20242
20243 flags = XEXP (compare_op, 0);
20244
20245 if (GET_MODE (flags) == CCFPmode
20246 || GET_MODE (flags) == CCFPUmode)
20247 {
20248 fpcmp = true;
20249 compare_code
20250 = ix86_fp_compare_code_to_integer (compare_code);
20251 }
20252
20253 /* To simplify rest of code, restrict to the GEU case. */
20254 if (compare_code == LTU)
20255 {
20256 HOST_WIDE_INT tmp = ct;
20257 ct = cf;
20258 cf = tmp;
20259 compare_code = reverse_condition (compare_code);
20260 code = reverse_condition (code);
20261 }
20262 else
20263 {
20264 if (fpcmp)
20265 PUT_CODE (compare_op,
20266 reverse_condition_maybe_unordered
20267 (GET_CODE (compare_op)));
20268 else
20269 PUT_CODE (compare_op,
20270 reverse_condition (GET_CODE (compare_op)));
20271 }
20272 diff = ct - cf;
20273
20274 if (reg_overlap_mentioned_p (out, op0)
20275 || reg_overlap_mentioned_p (out, op1))
20276 tmp = gen_reg_rtx (mode);
20277
20278 if (mode == DImode)
20279 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20280 else
20281 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20282 flags, compare_op));
20283 }
20284 else
20285 {
20286 if (code == GT || code == GE)
20287 code = reverse_condition (code);
20288 else
20289 {
20290 HOST_WIDE_INT tmp = ct;
20291 ct = cf;
20292 cf = tmp;
20293 diff = ct - cf;
20294 }
20295 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20296 }
20297
20298 if (diff == 1)
20299 {
20300 /*
20301 * cmpl op0,op1
20302 * sbbl dest,dest
20303 * [addl dest, ct]
20304 *
20305 * Size 5 - 8.
20306 */
20307 if (ct)
20308 tmp = expand_simple_binop (mode, PLUS,
20309 tmp, GEN_INT (ct),
20310 copy_rtx (tmp), 1, OPTAB_DIRECT);
20311 }
20312 else if (cf == -1)
20313 {
20314 /*
20315 * cmpl op0,op1
20316 * sbbl dest,dest
20317 * orl $ct, dest
20318 *
20319 * Size 8.
20320 */
20321 tmp = expand_simple_binop (mode, IOR,
20322 tmp, GEN_INT (ct),
20323 copy_rtx (tmp), 1, OPTAB_DIRECT);
20324 }
20325 else if (diff == -1 && ct)
20326 {
20327 /*
20328 * cmpl op0,op1
20329 * sbbl dest,dest
20330 * notl dest
20331 * [addl dest, cf]
20332 *
20333 * Size 8 - 11.
20334 */
20335 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20336 if (cf)
20337 tmp = expand_simple_binop (mode, PLUS,
20338 copy_rtx (tmp), GEN_INT (cf),
20339 copy_rtx (tmp), 1, OPTAB_DIRECT);
20340 }
20341 else
20342 {
20343 /*
20344 * cmpl op0,op1
20345 * sbbl dest,dest
20346 * [notl dest]
20347 * andl cf - ct, dest
20348 * [addl dest, ct]
20349 *
20350 * Size 8 - 11.
20351 */
20352
20353 if (cf == 0)
20354 {
20355 cf = ct;
20356 ct = 0;
20357 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20358 }
20359
20360 tmp = expand_simple_binop (mode, AND,
20361 copy_rtx (tmp),
20362 gen_int_mode (cf - ct, mode),
20363 copy_rtx (tmp), 1, OPTAB_DIRECT);
20364 if (ct)
20365 tmp = expand_simple_binop (mode, PLUS,
20366 copy_rtx (tmp), GEN_INT (ct),
20367 copy_rtx (tmp), 1, OPTAB_DIRECT);
20368 }
20369
20370 if (!rtx_equal_p (tmp, out))
20371 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20372
20373 return true;
20374 }
20375
20376 if (diff < 0)
20377 {
20378 enum machine_mode cmp_mode = GET_MODE (op0);
20379
20380 HOST_WIDE_INT tmp;
20381 tmp = ct, ct = cf, cf = tmp;
20382 diff = -diff;
20383
20384 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20385 {
20386 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20387
20388 /* We may be reversing unordered compare to normal compare, that
20389 is not valid in general (we may convert non-trapping condition
20390 to trapping one), however on i386 we currently emit all
20391 comparisons unordered. */
20392 compare_code = reverse_condition_maybe_unordered (compare_code);
20393 code = reverse_condition_maybe_unordered (code);
20394 }
20395 else
20396 {
20397 compare_code = reverse_condition (compare_code);
20398 code = reverse_condition (code);
20399 }
20400 }
20401
20402 compare_code = UNKNOWN;
20403 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20404 && CONST_INT_P (op1))
20405 {
20406 if (op1 == const0_rtx
20407 && (code == LT || code == GE))
20408 compare_code = code;
20409 else if (op1 == constm1_rtx)
20410 {
20411 if (code == LE)
20412 compare_code = LT;
20413 else if (code == GT)
20414 compare_code = GE;
20415 }
20416 }
20417
20418 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20419 if (compare_code != UNKNOWN
20420 && GET_MODE (op0) == GET_MODE (out)
20421 && (cf == -1 || ct == -1))
20422 {
20423 /* If lea code below could be used, only optimize
20424 if it results in a 2 insn sequence. */
20425
20426 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20427 || diff == 3 || diff == 5 || diff == 9)
20428 || (compare_code == LT && ct == -1)
20429 || (compare_code == GE && cf == -1))
20430 {
20431 /*
20432 * notl op1 (if necessary)
20433 * sarl $31, op1
20434 * orl cf, op1
20435 */
20436 if (ct != -1)
20437 {
20438 cf = ct;
20439 ct = -1;
20440 code = reverse_condition (code);
20441 }
20442
20443 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20444
20445 out = expand_simple_binop (mode, IOR,
20446 out, GEN_INT (cf),
20447 out, 1, OPTAB_DIRECT);
20448 if (out != operands[0])
20449 emit_move_insn (operands[0], out);
20450
20451 return true;
20452 }
20453 }
20454
20455
20456 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20457 || diff == 3 || diff == 5 || diff == 9)
20458 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20459 && (mode != DImode
20460 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20461 {
20462 /*
20463 * xorl dest,dest
20464 * cmpl op1,op2
20465 * setcc dest
20466 * lea cf(dest*(ct-cf)),dest
20467 *
20468 * Size 14.
20469 *
20470 * This also catches the degenerate setcc-only case.
20471 */
20472
20473 rtx tmp;
20474 int nops;
20475
20476 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20477
20478 nops = 0;
20479 /* On x86_64 the lea instruction operates on Pmode, so we need
20480 to get arithmetics done in proper mode to match. */
20481 if (diff == 1)
20482 tmp = copy_rtx (out);
20483 else
20484 {
20485 rtx out1;
20486 out1 = copy_rtx (out);
20487 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20488 nops++;
20489 if (diff & 1)
20490 {
20491 tmp = gen_rtx_PLUS (mode, tmp, out1);
20492 nops++;
20493 }
20494 }
20495 if (cf != 0)
20496 {
20497 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20498 nops++;
20499 }
20500 if (!rtx_equal_p (tmp, out))
20501 {
20502 if (nops == 1)
20503 out = force_operand (tmp, copy_rtx (out));
20504 else
20505 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20506 }
20507 if (!rtx_equal_p (out, operands[0]))
20508 emit_move_insn (operands[0], copy_rtx (out));
20509
20510 return true;
20511 }
20512
20513 /*
20514 * General case: Jumpful:
20515 * xorl dest,dest cmpl op1, op2
20516 * cmpl op1, op2 movl ct, dest
20517 * setcc dest jcc 1f
20518 * decl dest movl cf, dest
20519 * andl (cf-ct),dest 1:
20520 * addl ct,dest
20521 *
20522 * Size 20. Size 14.
20523 *
20524 * This is reasonably steep, but branch mispredict costs are
20525 * high on modern cpus, so consider failing only if optimizing
20526 * for space.
20527 */
20528
20529 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20530 && BRANCH_COST (optimize_insn_for_speed_p (),
20531 false) >= 2)
20532 {
20533 if (cf == 0)
20534 {
20535 enum machine_mode cmp_mode = GET_MODE (op0);
20536
20537 cf = ct;
20538 ct = 0;
20539
20540 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20541 {
20542 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20543
20544 /* We may be reversing unordered compare to normal compare,
20545 that is not valid in general (we may convert non-trapping
20546 condition to trapping one), however on i386 we currently
20547 emit all comparisons unordered. */
20548 code = reverse_condition_maybe_unordered (code);
20549 }
20550 else
20551 {
20552 code = reverse_condition (code);
20553 if (compare_code != UNKNOWN)
20554 compare_code = reverse_condition (compare_code);
20555 }
20556 }
20557
20558 if (compare_code != UNKNOWN)
20559 {
20560 /* notl op1 (if needed)
20561 sarl $31, op1
20562 andl (cf-ct), op1
20563 addl ct, op1
20564
20565 For x < 0 (resp. x <= -1) there will be no notl,
20566 so if possible swap the constants to get rid of the
20567 complement.
20568 True/false will be -1/0 while code below (store flag
20569 followed by decrement) is 0/-1, so the constants need
20570 to be exchanged once more. */
20571
20572 if (compare_code == GE || !cf)
20573 {
20574 code = reverse_condition (code);
20575 compare_code = LT;
20576 }
20577 else
20578 {
20579 HOST_WIDE_INT tmp = cf;
20580 cf = ct;
20581 ct = tmp;
20582 }
20583
20584 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20585 }
20586 else
20587 {
20588 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20589
20590 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20591 constm1_rtx,
20592 copy_rtx (out), 1, OPTAB_DIRECT);
20593 }
20594
20595 out = expand_simple_binop (mode, AND, copy_rtx (out),
20596 gen_int_mode (cf - ct, mode),
20597 copy_rtx (out), 1, OPTAB_DIRECT);
20598 if (ct)
20599 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20600 copy_rtx (out), 1, OPTAB_DIRECT);
20601 if (!rtx_equal_p (out, operands[0]))
20602 emit_move_insn (operands[0], copy_rtx (out));
20603
20604 return true;
20605 }
20606 }
20607
20608 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20609 {
20610 /* Try a few things more with specific constants and a variable. */
20611
20612 optab op;
20613 rtx var, orig_out, out, tmp;
20614
20615 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20616 return false;
20617
20618 /* If one of the two operands is an interesting constant, load a
20619 constant with the above and mask it in with a logical operation. */
20620
20621 if (CONST_INT_P (operands[2]))
20622 {
20623 var = operands[3];
20624 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20625 operands[3] = constm1_rtx, op = and_optab;
20626 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20627 operands[3] = const0_rtx, op = ior_optab;
20628 else
20629 return false;
20630 }
20631 else if (CONST_INT_P (operands[3]))
20632 {
20633 var = operands[2];
20634 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20635 operands[2] = constm1_rtx, op = and_optab;
20636 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20637 operands[2] = const0_rtx, op = ior_optab;
20638 else
20639 return false;
20640 }
20641 else
20642 return false;
20643
20644 orig_out = operands[0];
20645 tmp = gen_reg_rtx (mode);
20646 operands[0] = tmp;
20647
20648 /* Recurse to get the constant loaded. */
20649 if (ix86_expand_int_movcc (operands) == 0)
20650 return false;
20651
20652 /* Mask in the interesting variable. */
20653 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20654 OPTAB_WIDEN);
20655 if (!rtx_equal_p (out, orig_out))
20656 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20657
20658 return true;
20659 }
20660
20661 /*
20662 * For comparison with above,
20663 *
20664 * movl cf,dest
20665 * movl ct,tmp
20666 * cmpl op1,op2
20667 * cmovcc tmp,dest
20668 *
20669 * Size 15.
20670 */
20671
20672 if (! nonimmediate_operand (operands[2], mode))
20673 operands[2] = force_reg (mode, operands[2]);
20674 if (! nonimmediate_operand (operands[3], mode))
20675 operands[3] = force_reg (mode, operands[3]);
20676
20677 if (! register_operand (operands[2], VOIDmode)
20678 && (mode == QImode
20679 || ! register_operand (operands[3], VOIDmode)))
20680 operands[2] = force_reg (mode, operands[2]);
20681
20682 if (mode == QImode
20683 && ! register_operand (operands[3], VOIDmode))
20684 operands[3] = force_reg (mode, operands[3]);
20685
20686 emit_insn (compare_seq);
20687 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20688 gen_rtx_IF_THEN_ELSE (mode,
20689 compare_op, operands[2],
20690 operands[3])));
20691 return true;
20692 }
20693
20694 /* Swap, force into registers, or otherwise massage the two operands
20695 to an sse comparison with a mask result. Thus we differ a bit from
20696 ix86_prepare_fp_compare_args which expects to produce a flags result.
20697
20698 The DEST operand exists to help determine whether to commute commutative
20699 operators. The POP0/POP1 operands are updated in place. The new
20700 comparison code is returned, or UNKNOWN if not implementable. */
20701
20702 static enum rtx_code
20703 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20704 rtx *pop0, rtx *pop1)
20705 {
20706 rtx tmp;
20707
20708 switch (code)
20709 {
20710 case LTGT:
20711 case UNEQ:
20712 /* AVX supports all the needed comparisons. */
20713 if (TARGET_AVX)
20714 break;
20715 /* We have no LTGT as an operator. We could implement it with
20716 NE & ORDERED, but this requires an extra temporary. It's
20717 not clear that it's worth it. */
20718 return UNKNOWN;
20719
20720 case LT:
20721 case LE:
20722 case UNGT:
20723 case UNGE:
20724 /* These are supported directly. */
20725 break;
20726
20727 case EQ:
20728 case NE:
20729 case UNORDERED:
20730 case ORDERED:
20731 /* AVX has 3 operand comparisons, no need to swap anything. */
20732 if (TARGET_AVX)
20733 break;
20734 /* For commutative operators, try to canonicalize the destination
20735 operand to be first in the comparison - this helps reload to
20736 avoid extra moves. */
20737 if (!dest || !rtx_equal_p (dest, *pop1))
20738 break;
20739 /* FALLTHRU */
20740
20741 case GE:
20742 case GT:
20743 case UNLE:
20744 case UNLT:
20745 /* These are not supported directly before AVX, and furthermore
20746 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20747 comparison operands to transform into something that is
20748 supported. */
20749 tmp = *pop0;
20750 *pop0 = *pop1;
20751 *pop1 = tmp;
20752 code = swap_condition (code);
20753 break;
20754
20755 default:
20756 gcc_unreachable ();
20757 }
20758
20759 return code;
20760 }
20761
20762 /* Detect conditional moves that exactly match min/max operational
20763 semantics. Note that this is IEEE safe, as long as we don't
20764 interchange the operands.
20765
20766 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20767 and TRUE if the operation is successful and instructions are emitted. */
20768
20769 static bool
20770 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20771 rtx cmp_op1, rtx if_true, rtx if_false)
20772 {
20773 enum machine_mode mode;
20774 bool is_min;
20775 rtx tmp;
20776
20777 if (code == LT)
20778 ;
20779 else if (code == UNGE)
20780 {
20781 tmp = if_true;
20782 if_true = if_false;
20783 if_false = tmp;
20784 }
20785 else
20786 return false;
20787
20788 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20789 is_min = true;
20790 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20791 is_min = false;
20792 else
20793 return false;
20794
20795 mode = GET_MODE (dest);
20796
20797 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20798 but MODE may be a vector mode and thus not appropriate. */
20799 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20800 {
20801 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20802 rtvec v;
20803
20804 if_true = force_reg (mode, if_true);
20805 v = gen_rtvec (2, if_true, if_false);
20806 tmp = gen_rtx_UNSPEC (mode, v, u);
20807 }
20808 else
20809 {
20810 code = is_min ? SMIN : SMAX;
20811 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20812 }
20813
20814 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20815 return true;
20816 }
20817
20818 /* Expand an sse vector comparison. Return the register with the result. */
20819
20820 static rtx
20821 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20822 rtx op_true, rtx op_false)
20823 {
20824 enum machine_mode mode = GET_MODE (dest);
20825 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20826
20827 /* In general case result of comparison can differ from operands' type. */
20828 enum machine_mode cmp_mode;
20829
20830 /* In AVX512F the result of comparison is an integer mask. */
20831 bool maskcmp = false;
20832 rtx x;
20833
20834 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20835 {
20836 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20837 gcc_assert (cmp_mode != BLKmode);
20838
20839 maskcmp = true;
20840 }
20841 else
20842 cmp_mode = cmp_ops_mode;
20843
20844
20845 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20846 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20847 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20848
20849 if (optimize
20850 || reg_overlap_mentioned_p (dest, op_true)
20851 || reg_overlap_mentioned_p (dest, op_false))
20852 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20853
20854 /* Compare patterns for int modes are unspec in AVX512F only. */
20855 if (maskcmp && (code == GT || code == EQ))
20856 {
20857 rtx (*gen)(rtx, rtx, rtx);
20858
20859 switch (cmp_ops_mode)
20860 {
20861 case V16SImode:
20862 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20863 break;
20864 case V8DImode:
20865 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20866 break;
20867 default:
20868 gen = NULL;
20869 }
20870
20871 if (gen)
20872 {
20873 emit_insn (gen (dest, cmp_op0, cmp_op1));
20874 return dest;
20875 }
20876 }
20877 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20878
20879 if (cmp_mode != mode && !maskcmp)
20880 {
20881 x = force_reg (cmp_ops_mode, x);
20882 convert_move (dest, x, false);
20883 }
20884 else
20885 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20886
20887 return dest;
20888 }
20889
20890 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20891 operations. This is used for both scalar and vector conditional moves. */
20892
20893 static void
20894 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20895 {
20896 enum machine_mode mode = GET_MODE (dest);
20897 enum machine_mode cmpmode = GET_MODE (cmp);
20898
20899 /* In AVX512F the result of comparison is an integer mask. */
20900 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20901
20902 rtx t2, t3, x;
20903
20904 if (vector_all_ones_operand (op_true, mode)
20905 && rtx_equal_p (op_false, CONST0_RTX (mode))
20906 && !maskcmp)
20907 {
20908 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20909 }
20910 else if (op_false == CONST0_RTX (mode)
20911 && !maskcmp)
20912 {
20913 op_true = force_reg (mode, op_true);
20914 x = gen_rtx_AND (mode, cmp, op_true);
20915 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20916 }
20917 else if (op_true == CONST0_RTX (mode)
20918 && !maskcmp)
20919 {
20920 op_false = force_reg (mode, op_false);
20921 x = gen_rtx_NOT (mode, cmp);
20922 x = gen_rtx_AND (mode, x, op_false);
20923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20924 }
20925 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20926 && !maskcmp)
20927 {
20928 op_false = force_reg (mode, op_false);
20929 x = gen_rtx_IOR (mode, cmp, op_false);
20930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20931 }
20932 else if (TARGET_XOP
20933 && !maskcmp)
20934 {
20935 op_true = force_reg (mode, op_true);
20936
20937 if (!nonimmediate_operand (op_false, mode))
20938 op_false = force_reg (mode, op_false);
20939
20940 emit_insn (gen_rtx_SET (mode, dest,
20941 gen_rtx_IF_THEN_ELSE (mode, cmp,
20942 op_true,
20943 op_false)));
20944 }
20945 else
20946 {
20947 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20948 rtx d = dest;
20949
20950 if (!nonimmediate_operand (op_true, mode))
20951 op_true = force_reg (mode, op_true);
20952
20953 op_false = force_reg (mode, op_false);
20954
20955 switch (mode)
20956 {
20957 case V4SFmode:
20958 if (TARGET_SSE4_1)
20959 gen = gen_sse4_1_blendvps;
20960 break;
20961 case V2DFmode:
20962 if (TARGET_SSE4_1)
20963 gen = gen_sse4_1_blendvpd;
20964 break;
20965 case V16QImode:
20966 case V8HImode:
20967 case V4SImode:
20968 case V2DImode:
20969 if (TARGET_SSE4_1)
20970 {
20971 gen = gen_sse4_1_pblendvb;
20972 if (mode != V16QImode)
20973 d = gen_reg_rtx (V16QImode);
20974 op_false = gen_lowpart (V16QImode, op_false);
20975 op_true = gen_lowpart (V16QImode, op_true);
20976 cmp = gen_lowpart (V16QImode, cmp);
20977 }
20978 break;
20979 case V8SFmode:
20980 if (TARGET_AVX)
20981 gen = gen_avx_blendvps256;
20982 break;
20983 case V4DFmode:
20984 if (TARGET_AVX)
20985 gen = gen_avx_blendvpd256;
20986 break;
20987 case V32QImode:
20988 case V16HImode:
20989 case V8SImode:
20990 case V4DImode:
20991 if (TARGET_AVX2)
20992 {
20993 gen = gen_avx2_pblendvb;
20994 if (mode != V32QImode)
20995 d = gen_reg_rtx (V32QImode);
20996 op_false = gen_lowpart (V32QImode, op_false);
20997 op_true = gen_lowpart (V32QImode, op_true);
20998 cmp = gen_lowpart (V32QImode, cmp);
20999 }
21000 break;
21001
21002 case V16SImode:
21003 gen = gen_avx512f_blendmv16si;
21004 break;
21005 case V8DImode:
21006 gen = gen_avx512f_blendmv8di;
21007 break;
21008 case V8DFmode:
21009 gen = gen_avx512f_blendmv8df;
21010 break;
21011 case V16SFmode:
21012 gen = gen_avx512f_blendmv16sf;
21013 break;
21014
21015 default:
21016 break;
21017 }
21018
21019 if (gen != NULL)
21020 {
21021 emit_insn (gen (d, op_false, op_true, cmp));
21022 if (d != dest)
21023 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21024 }
21025 else
21026 {
21027 op_true = force_reg (mode, op_true);
21028
21029 t2 = gen_reg_rtx (mode);
21030 if (optimize)
21031 t3 = gen_reg_rtx (mode);
21032 else
21033 t3 = dest;
21034
21035 x = gen_rtx_AND (mode, op_true, cmp);
21036 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21037
21038 x = gen_rtx_NOT (mode, cmp);
21039 x = gen_rtx_AND (mode, x, op_false);
21040 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21041
21042 x = gen_rtx_IOR (mode, t3, t2);
21043 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21044 }
21045 }
21046 }
21047
21048 /* Expand a floating-point conditional move. Return true if successful. */
21049
21050 bool
21051 ix86_expand_fp_movcc (rtx operands[])
21052 {
21053 enum machine_mode mode = GET_MODE (operands[0]);
21054 enum rtx_code code = GET_CODE (operands[1]);
21055 rtx tmp, compare_op;
21056 rtx op0 = XEXP (operands[1], 0);
21057 rtx op1 = XEXP (operands[1], 1);
21058
21059 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21060 {
21061 enum machine_mode cmode;
21062
21063 /* Since we've no cmove for sse registers, don't force bad register
21064 allocation just to gain access to it. Deny movcc when the
21065 comparison mode doesn't match the move mode. */
21066 cmode = GET_MODE (op0);
21067 if (cmode == VOIDmode)
21068 cmode = GET_MODE (op1);
21069 if (cmode != mode)
21070 return false;
21071
21072 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21073 if (code == UNKNOWN)
21074 return false;
21075
21076 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21077 operands[2], operands[3]))
21078 return true;
21079
21080 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21081 operands[2], operands[3]);
21082 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21083 return true;
21084 }
21085
21086 if (GET_MODE (op0) == TImode
21087 || (GET_MODE (op0) == DImode
21088 && !TARGET_64BIT))
21089 return false;
21090
21091 /* The floating point conditional move instructions don't directly
21092 support conditions resulting from a signed integer comparison. */
21093
21094 compare_op = ix86_expand_compare (code, op0, op1);
21095 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21096 {
21097 tmp = gen_reg_rtx (QImode);
21098 ix86_expand_setcc (tmp, code, op0, op1);
21099
21100 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21101 }
21102
21103 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21104 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21105 operands[2], operands[3])));
21106
21107 return true;
21108 }
21109
21110 /* Expand a floating-point vector conditional move; a vcond operation
21111 rather than a movcc operation. */
21112
21113 bool
21114 ix86_expand_fp_vcond (rtx operands[])
21115 {
21116 enum rtx_code code = GET_CODE (operands[3]);
21117 rtx cmp;
21118
21119 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21120 &operands[4], &operands[5]);
21121 if (code == UNKNOWN)
21122 {
21123 rtx temp;
21124 switch (GET_CODE (operands[3]))
21125 {
21126 case LTGT:
21127 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21128 operands[5], operands[0], operands[0]);
21129 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21130 operands[5], operands[1], operands[2]);
21131 code = AND;
21132 break;
21133 case UNEQ:
21134 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21135 operands[5], operands[0], operands[0]);
21136 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21137 operands[5], operands[1], operands[2]);
21138 code = IOR;
21139 break;
21140 default:
21141 gcc_unreachable ();
21142 }
21143 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21144 OPTAB_DIRECT);
21145 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21146 return true;
21147 }
21148
21149 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21150 operands[5], operands[1], operands[2]))
21151 return true;
21152
21153 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21154 operands[1], operands[2]);
21155 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21156 return true;
21157 }
21158
21159 /* Expand a signed/unsigned integral vector conditional move. */
21160
21161 bool
21162 ix86_expand_int_vcond (rtx operands[])
21163 {
21164 enum machine_mode data_mode = GET_MODE (operands[0]);
21165 enum machine_mode mode = GET_MODE (operands[4]);
21166 enum rtx_code code = GET_CODE (operands[3]);
21167 bool negate = false;
21168 rtx x, cop0, cop1;
21169
21170 cop0 = operands[4];
21171 cop1 = operands[5];
21172
21173 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21174 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21175 if ((code == LT || code == GE)
21176 && data_mode == mode
21177 && cop1 == CONST0_RTX (mode)
21178 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21179 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21180 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21181 && (GET_MODE_SIZE (data_mode) == 16
21182 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21183 {
21184 rtx negop = operands[2 - (code == LT)];
21185 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21186 if (negop == CONST1_RTX (data_mode))
21187 {
21188 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21189 operands[0], 1, OPTAB_DIRECT);
21190 if (res != operands[0])
21191 emit_move_insn (operands[0], res);
21192 return true;
21193 }
21194 else if (GET_MODE_INNER (data_mode) != DImode
21195 && vector_all_ones_operand (negop, data_mode))
21196 {
21197 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21198 operands[0], 0, OPTAB_DIRECT);
21199 if (res != operands[0])
21200 emit_move_insn (operands[0], res);
21201 return true;
21202 }
21203 }
21204
21205 if (!nonimmediate_operand (cop1, mode))
21206 cop1 = force_reg (mode, cop1);
21207 if (!general_operand (operands[1], data_mode))
21208 operands[1] = force_reg (data_mode, operands[1]);
21209 if (!general_operand (operands[2], data_mode))
21210 operands[2] = force_reg (data_mode, operands[2]);
21211
21212 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21213 if (TARGET_XOP
21214 && (mode == V16QImode || mode == V8HImode
21215 || mode == V4SImode || mode == V2DImode))
21216 ;
21217 else
21218 {
21219 /* Canonicalize the comparison to EQ, GT, GTU. */
21220 switch (code)
21221 {
21222 case EQ:
21223 case GT:
21224 case GTU:
21225 break;
21226
21227 case NE:
21228 case LE:
21229 case LEU:
21230 code = reverse_condition (code);
21231 negate = true;
21232 break;
21233
21234 case GE:
21235 case GEU:
21236 code = reverse_condition (code);
21237 negate = true;
21238 /* FALLTHRU */
21239
21240 case LT:
21241 case LTU:
21242 code = swap_condition (code);
21243 x = cop0, cop0 = cop1, cop1 = x;
21244 break;
21245
21246 default:
21247 gcc_unreachable ();
21248 }
21249
21250 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21251 if (mode == V2DImode)
21252 {
21253 switch (code)
21254 {
21255 case EQ:
21256 /* SSE4.1 supports EQ. */
21257 if (!TARGET_SSE4_1)
21258 return false;
21259 break;
21260
21261 case GT:
21262 case GTU:
21263 /* SSE4.2 supports GT/GTU. */
21264 if (!TARGET_SSE4_2)
21265 return false;
21266 break;
21267
21268 default:
21269 gcc_unreachable ();
21270 }
21271 }
21272
21273 /* Unsigned parallel compare is not supported by the hardware.
21274 Play some tricks to turn this into a signed comparison
21275 against 0. */
21276 if (code == GTU)
21277 {
21278 cop0 = force_reg (mode, cop0);
21279
21280 switch (mode)
21281 {
21282 case V16SImode:
21283 case V8DImode:
21284 case V8SImode:
21285 case V4DImode:
21286 case V4SImode:
21287 case V2DImode:
21288 {
21289 rtx t1, t2, mask;
21290 rtx (*gen_sub3) (rtx, rtx, rtx);
21291
21292 switch (mode)
21293 {
21294 case V16SImode: gen_sub3 = gen_subv16si3; break;
21295 case V8DImode: gen_sub3 = gen_subv8di3; break;
21296 case V8SImode: gen_sub3 = gen_subv8si3; break;
21297 case V4DImode: gen_sub3 = gen_subv4di3; break;
21298 case V4SImode: gen_sub3 = gen_subv4si3; break;
21299 case V2DImode: gen_sub3 = gen_subv2di3; break;
21300 default:
21301 gcc_unreachable ();
21302 }
21303 /* Subtract (-(INT MAX) - 1) from both operands to make
21304 them signed. */
21305 mask = ix86_build_signbit_mask (mode, true, false);
21306 t1 = gen_reg_rtx (mode);
21307 emit_insn (gen_sub3 (t1, cop0, mask));
21308
21309 t2 = gen_reg_rtx (mode);
21310 emit_insn (gen_sub3 (t2, cop1, mask));
21311
21312 cop0 = t1;
21313 cop1 = t2;
21314 code = GT;
21315 }
21316 break;
21317
21318 case V32QImode:
21319 case V16HImode:
21320 case V16QImode:
21321 case V8HImode:
21322 /* Perform a parallel unsigned saturating subtraction. */
21323 x = gen_reg_rtx (mode);
21324 emit_insn (gen_rtx_SET (VOIDmode, x,
21325 gen_rtx_US_MINUS (mode, cop0, cop1)));
21326
21327 cop0 = x;
21328 cop1 = CONST0_RTX (mode);
21329 code = EQ;
21330 negate = !negate;
21331 break;
21332
21333 default:
21334 gcc_unreachable ();
21335 }
21336 }
21337 }
21338
21339 /* Allow the comparison to be done in one mode, but the movcc to
21340 happen in another mode. */
21341 if (data_mode == mode)
21342 {
21343 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21344 operands[1+negate], operands[2-negate]);
21345 }
21346 else
21347 {
21348 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21349 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21350 operands[1+negate], operands[2-negate]);
21351 if (GET_MODE (x) == mode)
21352 x = gen_lowpart (data_mode, x);
21353 }
21354
21355 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21356 operands[2-negate]);
21357 return true;
21358 }
21359
21360 static bool
21361 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21362 {
21363 enum machine_mode mode = GET_MODE (op0);
21364 switch (mode)
21365 {
21366 case V16SImode:
21367 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21368 force_reg (V16SImode, mask),
21369 op1));
21370 return true;
21371 case V16SFmode:
21372 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21373 force_reg (V16SImode, mask),
21374 op1));
21375 return true;
21376 case V8DImode:
21377 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21378 force_reg (V8DImode, mask), op1));
21379 return true;
21380 case V8DFmode:
21381 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21382 force_reg (V8DImode, mask), op1));
21383 return true;
21384 default:
21385 return false;
21386 }
21387 }
21388
21389 /* Expand a variable vector permutation. */
21390
21391 void
21392 ix86_expand_vec_perm (rtx operands[])
21393 {
21394 rtx target = operands[0];
21395 rtx op0 = operands[1];
21396 rtx op1 = operands[2];
21397 rtx mask = operands[3];
21398 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21399 enum machine_mode mode = GET_MODE (op0);
21400 enum machine_mode maskmode = GET_MODE (mask);
21401 int w, e, i;
21402 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21403
21404 /* Number of elements in the vector. */
21405 w = GET_MODE_NUNITS (mode);
21406 e = GET_MODE_UNIT_SIZE (mode);
21407 gcc_assert (w <= 64);
21408
21409 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21410 return;
21411
21412 if (TARGET_AVX2)
21413 {
21414 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21415 {
21416 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21417 an constant shuffle operand. With a tiny bit of effort we can
21418 use VPERMD instead. A re-interpretation stall for V4DFmode is
21419 unfortunate but there's no avoiding it.
21420 Similarly for V16HImode we don't have instructions for variable
21421 shuffling, while for V32QImode we can use after preparing suitable
21422 masks vpshufb; vpshufb; vpermq; vpor. */
21423
21424 if (mode == V16HImode)
21425 {
21426 maskmode = mode = V32QImode;
21427 w = 32;
21428 e = 1;
21429 }
21430 else
21431 {
21432 maskmode = mode = V8SImode;
21433 w = 8;
21434 e = 4;
21435 }
21436 t1 = gen_reg_rtx (maskmode);
21437
21438 /* Replicate the low bits of the V4DImode mask into V8SImode:
21439 mask = { A B C D }
21440 t1 = { A A B B C C D D }. */
21441 for (i = 0; i < w / 2; ++i)
21442 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21443 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21444 vt = force_reg (maskmode, vt);
21445 mask = gen_lowpart (maskmode, mask);
21446 if (maskmode == V8SImode)
21447 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21448 else
21449 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21450
21451 /* Multiply the shuffle indicies by two. */
21452 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21453 OPTAB_DIRECT);
21454
21455 /* Add one to the odd shuffle indicies:
21456 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21457 for (i = 0; i < w / 2; ++i)
21458 {
21459 vec[i * 2] = const0_rtx;
21460 vec[i * 2 + 1] = const1_rtx;
21461 }
21462 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21463 vt = validize_mem (force_const_mem (maskmode, vt));
21464 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21465 OPTAB_DIRECT);
21466
21467 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21468 operands[3] = mask = t1;
21469 target = gen_reg_rtx (mode);
21470 op0 = gen_lowpart (mode, op0);
21471 op1 = gen_lowpart (mode, op1);
21472 }
21473
21474 switch (mode)
21475 {
21476 case V8SImode:
21477 /* The VPERMD and VPERMPS instructions already properly ignore
21478 the high bits of the shuffle elements. No need for us to
21479 perform an AND ourselves. */
21480 if (one_operand_shuffle)
21481 {
21482 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21483 if (target != operands[0])
21484 emit_move_insn (operands[0],
21485 gen_lowpart (GET_MODE (operands[0]), target));
21486 }
21487 else
21488 {
21489 t1 = gen_reg_rtx (V8SImode);
21490 t2 = gen_reg_rtx (V8SImode);
21491 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21492 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21493 goto merge_two;
21494 }
21495 return;
21496
21497 case V8SFmode:
21498 mask = gen_lowpart (V8SImode, mask);
21499 if (one_operand_shuffle)
21500 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21501 else
21502 {
21503 t1 = gen_reg_rtx (V8SFmode);
21504 t2 = gen_reg_rtx (V8SFmode);
21505 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21506 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21507 goto merge_two;
21508 }
21509 return;
21510
21511 case V4SImode:
21512 /* By combining the two 128-bit input vectors into one 256-bit
21513 input vector, we can use VPERMD and VPERMPS for the full
21514 two-operand shuffle. */
21515 t1 = gen_reg_rtx (V8SImode);
21516 t2 = gen_reg_rtx (V8SImode);
21517 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21518 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21519 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21520 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21521 return;
21522
21523 case V4SFmode:
21524 t1 = gen_reg_rtx (V8SFmode);
21525 t2 = gen_reg_rtx (V8SImode);
21526 mask = gen_lowpart (V4SImode, mask);
21527 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21528 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21529 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21530 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21531 return;
21532
21533 case V32QImode:
21534 t1 = gen_reg_rtx (V32QImode);
21535 t2 = gen_reg_rtx (V32QImode);
21536 t3 = gen_reg_rtx (V32QImode);
21537 vt2 = GEN_INT (-128);
21538 for (i = 0; i < 32; i++)
21539 vec[i] = vt2;
21540 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21541 vt = force_reg (V32QImode, vt);
21542 for (i = 0; i < 32; i++)
21543 vec[i] = i < 16 ? vt2 : const0_rtx;
21544 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21545 vt2 = force_reg (V32QImode, vt2);
21546 /* From mask create two adjusted masks, which contain the same
21547 bits as mask in the low 7 bits of each vector element.
21548 The first mask will have the most significant bit clear
21549 if it requests element from the same 128-bit lane
21550 and MSB set if it requests element from the other 128-bit lane.
21551 The second mask will have the opposite values of the MSB,
21552 and additionally will have its 128-bit lanes swapped.
21553 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21554 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21555 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21556 stands for other 12 bytes. */
21557 /* The bit whether element is from the same lane or the other
21558 lane is bit 4, so shift it up by 3 to the MSB position. */
21559 t5 = gen_reg_rtx (V4DImode);
21560 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21561 GEN_INT (3)));
21562 /* Clear MSB bits from the mask just in case it had them set. */
21563 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21564 /* After this t1 will have MSB set for elements from other lane. */
21565 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21566 /* Clear bits other than MSB. */
21567 emit_insn (gen_andv32qi3 (t1, t1, vt));
21568 /* Or in the lower bits from mask into t3. */
21569 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21570 /* And invert MSB bits in t1, so MSB is set for elements from the same
21571 lane. */
21572 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21573 /* Swap 128-bit lanes in t3. */
21574 t6 = gen_reg_rtx (V4DImode);
21575 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21576 const2_rtx, GEN_INT (3),
21577 const0_rtx, const1_rtx));
21578 /* And or in the lower bits from mask into t1. */
21579 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21580 if (one_operand_shuffle)
21581 {
21582 /* Each of these shuffles will put 0s in places where
21583 element from the other 128-bit lane is needed, otherwise
21584 will shuffle in the requested value. */
21585 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21586 gen_lowpart (V32QImode, t6)));
21587 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21588 /* For t3 the 128-bit lanes are swapped again. */
21589 t7 = gen_reg_rtx (V4DImode);
21590 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21591 const2_rtx, GEN_INT (3),
21592 const0_rtx, const1_rtx));
21593 /* And oring both together leads to the result. */
21594 emit_insn (gen_iorv32qi3 (target, t1,
21595 gen_lowpart (V32QImode, t7)));
21596 if (target != operands[0])
21597 emit_move_insn (operands[0],
21598 gen_lowpart (GET_MODE (operands[0]), target));
21599 return;
21600 }
21601
21602 t4 = gen_reg_rtx (V32QImode);
21603 /* Similarly to the above one_operand_shuffle code,
21604 just for repeated twice for each operand. merge_two:
21605 code will merge the two results together. */
21606 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21607 gen_lowpart (V32QImode, t6)));
21608 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21609 gen_lowpart (V32QImode, t6)));
21610 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21611 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21612 t7 = gen_reg_rtx (V4DImode);
21613 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21614 const2_rtx, GEN_INT (3),
21615 const0_rtx, const1_rtx));
21616 t8 = gen_reg_rtx (V4DImode);
21617 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21618 const2_rtx, GEN_INT (3),
21619 const0_rtx, const1_rtx));
21620 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21621 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21622 t1 = t4;
21623 t2 = t3;
21624 goto merge_two;
21625
21626 default:
21627 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21628 break;
21629 }
21630 }
21631
21632 if (TARGET_XOP)
21633 {
21634 /* The XOP VPPERM insn supports three inputs. By ignoring the
21635 one_operand_shuffle special case, we avoid creating another
21636 set of constant vectors in memory. */
21637 one_operand_shuffle = false;
21638
21639 /* mask = mask & {2*w-1, ...} */
21640 vt = GEN_INT (2*w - 1);
21641 }
21642 else
21643 {
21644 /* mask = mask & {w-1, ...} */
21645 vt = GEN_INT (w - 1);
21646 }
21647
21648 for (i = 0; i < w; i++)
21649 vec[i] = vt;
21650 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21651 mask = expand_simple_binop (maskmode, AND, mask, vt,
21652 NULL_RTX, 0, OPTAB_DIRECT);
21653
21654 /* For non-QImode operations, convert the word permutation control
21655 into a byte permutation control. */
21656 if (mode != V16QImode)
21657 {
21658 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21659 GEN_INT (exact_log2 (e)),
21660 NULL_RTX, 0, OPTAB_DIRECT);
21661
21662 /* Convert mask to vector of chars. */
21663 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21664
21665 /* Replicate each of the input bytes into byte positions:
21666 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21667 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21668 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21669 for (i = 0; i < 16; ++i)
21670 vec[i] = GEN_INT (i/e * e);
21671 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21672 vt = validize_mem (force_const_mem (V16QImode, vt));
21673 if (TARGET_XOP)
21674 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21675 else
21676 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21677
21678 /* Convert it into the byte positions by doing
21679 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21680 for (i = 0; i < 16; ++i)
21681 vec[i] = GEN_INT (i % e);
21682 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21683 vt = validize_mem (force_const_mem (V16QImode, vt));
21684 emit_insn (gen_addv16qi3 (mask, mask, vt));
21685 }
21686
21687 /* The actual shuffle operations all operate on V16QImode. */
21688 op0 = gen_lowpart (V16QImode, op0);
21689 op1 = gen_lowpart (V16QImode, op1);
21690
21691 if (TARGET_XOP)
21692 {
21693 if (GET_MODE (target) != V16QImode)
21694 target = gen_reg_rtx (V16QImode);
21695 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21696 if (target != operands[0])
21697 emit_move_insn (operands[0],
21698 gen_lowpart (GET_MODE (operands[0]), target));
21699 }
21700 else if (one_operand_shuffle)
21701 {
21702 if (GET_MODE (target) != V16QImode)
21703 target = gen_reg_rtx (V16QImode);
21704 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21705 if (target != operands[0])
21706 emit_move_insn (operands[0],
21707 gen_lowpart (GET_MODE (operands[0]), target));
21708 }
21709 else
21710 {
21711 rtx xops[6];
21712 bool ok;
21713
21714 /* Shuffle the two input vectors independently. */
21715 t1 = gen_reg_rtx (V16QImode);
21716 t2 = gen_reg_rtx (V16QImode);
21717 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21718 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21719
21720 merge_two:
21721 /* Then merge them together. The key is whether any given control
21722 element contained a bit set that indicates the second word. */
21723 mask = operands[3];
21724 vt = GEN_INT (w);
21725 if (maskmode == V2DImode && !TARGET_SSE4_1)
21726 {
21727 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21728 more shuffle to convert the V2DI input mask into a V4SI
21729 input mask. At which point the masking that expand_int_vcond
21730 will work as desired. */
21731 rtx t3 = gen_reg_rtx (V4SImode);
21732 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21733 const0_rtx, const0_rtx,
21734 const2_rtx, const2_rtx));
21735 mask = t3;
21736 maskmode = V4SImode;
21737 e = w = 4;
21738 }
21739
21740 for (i = 0; i < w; i++)
21741 vec[i] = vt;
21742 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21743 vt = force_reg (maskmode, vt);
21744 mask = expand_simple_binop (maskmode, AND, mask, vt,
21745 NULL_RTX, 0, OPTAB_DIRECT);
21746
21747 if (GET_MODE (target) != mode)
21748 target = gen_reg_rtx (mode);
21749 xops[0] = target;
21750 xops[1] = gen_lowpart (mode, t2);
21751 xops[2] = gen_lowpart (mode, t1);
21752 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21753 xops[4] = mask;
21754 xops[5] = vt;
21755 ok = ix86_expand_int_vcond (xops);
21756 gcc_assert (ok);
21757 if (target != operands[0])
21758 emit_move_insn (operands[0],
21759 gen_lowpart (GET_MODE (operands[0]), target));
21760 }
21761 }
21762
21763 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21764 true if we should do zero extension, else sign extension. HIGH_P is
21765 true if we want the N/2 high elements, else the low elements. */
21766
21767 void
21768 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21769 {
21770 enum machine_mode imode = GET_MODE (src);
21771 rtx tmp;
21772
21773 if (TARGET_SSE4_1)
21774 {
21775 rtx (*unpack)(rtx, rtx);
21776 rtx (*extract)(rtx, rtx) = NULL;
21777 enum machine_mode halfmode = BLKmode;
21778
21779 switch (imode)
21780 {
21781 case V32QImode:
21782 if (unsigned_p)
21783 unpack = gen_avx2_zero_extendv16qiv16hi2;
21784 else
21785 unpack = gen_avx2_sign_extendv16qiv16hi2;
21786 halfmode = V16QImode;
21787 extract
21788 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21789 break;
21790 case V32HImode:
21791 if (unsigned_p)
21792 unpack = gen_avx512f_zero_extendv16hiv16si2;
21793 else
21794 unpack = gen_avx512f_sign_extendv16hiv16si2;
21795 halfmode = V16HImode;
21796 extract
21797 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21798 break;
21799 case V16HImode:
21800 if (unsigned_p)
21801 unpack = gen_avx2_zero_extendv8hiv8si2;
21802 else
21803 unpack = gen_avx2_sign_extendv8hiv8si2;
21804 halfmode = V8HImode;
21805 extract
21806 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21807 break;
21808 case V16SImode:
21809 if (unsigned_p)
21810 unpack = gen_avx512f_zero_extendv8siv8di2;
21811 else
21812 unpack = gen_avx512f_sign_extendv8siv8di2;
21813 halfmode = V8SImode;
21814 extract
21815 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21816 break;
21817 case V8SImode:
21818 if (unsigned_p)
21819 unpack = gen_avx2_zero_extendv4siv4di2;
21820 else
21821 unpack = gen_avx2_sign_extendv4siv4di2;
21822 halfmode = V4SImode;
21823 extract
21824 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21825 break;
21826 case V16QImode:
21827 if (unsigned_p)
21828 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21829 else
21830 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21831 break;
21832 case V8HImode:
21833 if (unsigned_p)
21834 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21835 else
21836 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21837 break;
21838 case V4SImode:
21839 if (unsigned_p)
21840 unpack = gen_sse4_1_zero_extendv2siv2di2;
21841 else
21842 unpack = gen_sse4_1_sign_extendv2siv2di2;
21843 break;
21844 default:
21845 gcc_unreachable ();
21846 }
21847
21848 if (GET_MODE_SIZE (imode) >= 32)
21849 {
21850 tmp = gen_reg_rtx (halfmode);
21851 emit_insn (extract (tmp, src));
21852 }
21853 else if (high_p)
21854 {
21855 /* Shift higher 8 bytes to lower 8 bytes. */
21856 tmp = gen_reg_rtx (V1TImode);
21857 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21858 GEN_INT (64)));
21859 tmp = gen_lowpart (imode, tmp);
21860 }
21861 else
21862 tmp = src;
21863
21864 emit_insn (unpack (dest, tmp));
21865 }
21866 else
21867 {
21868 rtx (*unpack)(rtx, rtx, rtx);
21869
21870 switch (imode)
21871 {
21872 case V16QImode:
21873 if (high_p)
21874 unpack = gen_vec_interleave_highv16qi;
21875 else
21876 unpack = gen_vec_interleave_lowv16qi;
21877 break;
21878 case V8HImode:
21879 if (high_p)
21880 unpack = gen_vec_interleave_highv8hi;
21881 else
21882 unpack = gen_vec_interleave_lowv8hi;
21883 break;
21884 case V4SImode:
21885 if (high_p)
21886 unpack = gen_vec_interleave_highv4si;
21887 else
21888 unpack = gen_vec_interleave_lowv4si;
21889 break;
21890 default:
21891 gcc_unreachable ();
21892 }
21893
21894 if (unsigned_p)
21895 tmp = force_reg (imode, CONST0_RTX (imode));
21896 else
21897 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21898 src, pc_rtx, pc_rtx);
21899
21900 rtx tmp2 = gen_reg_rtx (imode);
21901 emit_insn (unpack (tmp2, src, tmp));
21902 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21903 }
21904 }
21905
21906 /* Expand conditional increment or decrement using adb/sbb instructions.
21907 The default case using setcc followed by the conditional move can be
21908 done by generic code. */
21909 bool
21910 ix86_expand_int_addcc (rtx operands[])
21911 {
21912 enum rtx_code code = GET_CODE (operands[1]);
21913 rtx flags;
21914 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21915 rtx compare_op;
21916 rtx val = const0_rtx;
21917 bool fpcmp = false;
21918 enum machine_mode mode;
21919 rtx op0 = XEXP (operands[1], 0);
21920 rtx op1 = XEXP (operands[1], 1);
21921
21922 if (operands[3] != const1_rtx
21923 && operands[3] != constm1_rtx)
21924 return false;
21925 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21926 return false;
21927 code = GET_CODE (compare_op);
21928
21929 flags = XEXP (compare_op, 0);
21930
21931 if (GET_MODE (flags) == CCFPmode
21932 || GET_MODE (flags) == CCFPUmode)
21933 {
21934 fpcmp = true;
21935 code = ix86_fp_compare_code_to_integer (code);
21936 }
21937
21938 if (code != LTU)
21939 {
21940 val = constm1_rtx;
21941 if (fpcmp)
21942 PUT_CODE (compare_op,
21943 reverse_condition_maybe_unordered
21944 (GET_CODE (compare_op)));
21945 else
21946 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21947 }
21948
21949 mode = GET_MODE (operands[0]);
21950
21951 /* Construct either adc or sbb insn. */
21952 if ((code == LTU) == (operands[3] == constm1_rtx))
21953 {
21954 switch (mode)
21955 {
21956 case QImode:
21957 insn = gen_subqi3_carry;
21958 break;
21959 case HImode:
21960 insn = gen_subhi3_carry;
21961 break;
21962 case SImode:
21963 insn = gen_subsi3_carry;
21964 break;
21965 case DImode:
21966 insn = gen_subdi3_carry;
21967 break;
21968 default:
21969 gcc_unreachable ();
21970 }
21971 }
21972 else
21973 {
21974 switch (mode)
21975 {
21976 case QImode:
21977 insn = gen_addqi3_carry;
21978 break;
21979 case HImode:
21980 insn = gen_addhi3_carry;
21981 break;
21982 case SImode:
21983 insn = gen_addsi3_carry;
21984 break;
21985 case DImode:
21986 insn = gen_adddi3_carry;
21987 break;
21988 default:
21989 gcc_unreachable ();
21990 }
21991 }
21992 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21993
21994 return true;
21995 }
21996
21997
21998 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21999 but works for floating pointer parameters and nonoffsetable memories.
22000 For pushes, it returns just stack offsets; the values will be saved
22001 in the right order. Maximally three parts are generated. */
22002
22003 static int
22004 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22005 {
22006 int size;
22007
22008 if (!TARGET_64BIT)
22009 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22010 else
22011 size = (GET_MODE_SIZE (mode) + 4) / 8;
22012
22013 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22014 gcc_assert (size >= 2 && size <= 4);
22015
22016 /* Optimize constant pool reference to immediates. This is used by fp
22017 moves, that force all constants to memory to allow combining. */
22018 if (MEM_P (operand) && MEM_READONLY_P (operand))
22019 {
22020 rtx tmp = maybe_get_pool_constant (operand);
22021 if (tmp)
22022 operand = tmp;
22023 }
22024
22025 if (MEM_P (operand) && !offsettable_memref_p (operand))
22026 {
22027 /* The only non-offsetable memories we handle are pushes. */
22028 int ok = push_operand (operand, VOIDmode);
22029
22030 gcc_assert (ok);
22031
22032 operand = copy_rtx (operand);
22033 PUT_MODE (operand, word_mode);
22034 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22035 return size;
22036 }
22037
22038 if (GET_CODE (operand) == CONST_VECTOR)
22039 {
22040 enum machine_mode imode = int_mode_for_mode (mode);
22041 /* Caution: if we looked through a constant pool memory above,
22042 the operand may actually have a different mode now. That's
22043 ok, since we want to pun this all the way back to an integer. */
22044 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22045 gcc_assert (operand != NULL);
22046 mode = imode;
22047 }
22048
22049 if (!TARGET_64BIT)
22050 {
22051 if (mode == DImode)
22052 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22053 else
22054 {
22055 int i;
22056
22057 if (REG_P (operand))
22058 {
22059 gcc_assert (reload_completed);
22060 for (i = 0; i < size; i++)
22061 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22062 }
22063 else if (offsettable_memref_p (operand))
22064 {
22065 operand = adjust_address (operand, SImode, 0);
22066 parts[0] = operand;
22067 for (i = 1; i < size; i++)
22068 parts[i] = adjust_address (operand, SImode, 4 * i);
22069 }
22070 else if (GET_CODE (operand) == CONST_DOUBLE)
22071 {
22072 REAL_VALUE_TYPE r;
22073 long l[4];
22074
22075 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22076 switch (mode)
22077 {
22078 case TFmode:
22079 real_to_target (l, &r, mode);
22080 parts[3] = gen_int_mode (l[3], SImode);
22081 parts[2] = gen_int_mode (l[2], SImode);
22082 break;
22083 case XFmode:
22084 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22085 long double may not be 80-bit. */
22086 real_to_target (l, &r, mode);
22087 parts[2] = gen_int_mode (l[2], SImode);
22088 break;
22089 case DFmode:
22090 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22091 break;
22092 default:
22093 gcc_unreachable ();
22094 }
22095 parts[1] = gen_int_mode (l[1], SImode);
22096 parts[0] = gen_int_mode (l[0], SImode);
22097 }
22098 else
22099 gcc_unreachable ();
22100 }
22101 }
22102 else
22103 {
22104 if (mode == TImode)
22105 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22106 if (mode == XFmode || mode == TFmode)
22107 {
22108 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22109 if (REG_P (operand))
22110 {
22111 gcc_assert (reload_completed);
22112 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22113 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22114 }
22115 else if (offsettable_memref_p (operand))
22116 {
22117 operand = adjust_address (operand, DImode, 0);
22118 parts[0] = operand;
22119 parts[1] = adjust_address (operand, upper_mode, 8);
22120 }
22121 else if (GET_CODE (operand) == CONST_DOUBLE)
22122 {
22123 REAL_VALUE_TYPE r;
22124 long l[4];
22125
22126 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22127 real_to_target (l, &r, mode);
22128
22129 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22130 if (HOST_BITS_PER_WIDE_INT >= 64)
22131 parts[0]
22132 = gen_int_mode
22133 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22134 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22135 DImode);
22136 else
22137 parts[0] = immed_double_const (l[0], l[1], DImode);
22138
22139 if (upper_mode == SImode)
22140 parts[1] = gen_int_mode (l[2], SImode);
22141 else if (HOST_BITS_PER_WIDE_INT >= 64)
22142 parts[1]
22143 = gen_int_mode
22144 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22145 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22146 DImode);
22147 else
22148 parts[1] = immed_double_const (l[2], l[3], DImode);
22149 }
22150 else
22151 gcc_unreachable ();
22152 }
22153 }
22154
22155 return size;
22156 }
22157
22158 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22159 Return false when normal moves are needed; true when all required
22160 insns have been emitted. Operands 2-4 contain the input values
22161 int the correct order; operands 5-7 contain the output values. */
22162
22163 void
22164 ix86_split_long_move (rtx operands[])
22165 {
22166 rtx part[2][4];
22167 int nparts, i, j;
22168 int push = 0;
22169 int collisions = 0;
22170 enum machine_mode mode = GET_MODE (operands[0]);
22171 bool collisionparts[4];
22172
22173 /* The DFmode expanders may ask us to move double.
22174 For 64bit target this is single move. By hiding the fact
22175 here we simplify i386.md splitters. */
22176 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22177 {
22178 /* Optimize constant pool reference to immediates. This is used by
22179 fp moves, that force all constants to memory to allow combining. */
22180
22181 if (MEM_P (operands[1])
22182 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22183 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22184 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22185 if (push_operand (operands[0], VOIDmode))
22186 {
22187 operands[0] = copy_rtx (operands[0]);
22188 PUT_MODE (operands[0], word_mode);
22189 }
22190 else
22191 operands[0] = gen_lowpart (DImode, operands[0]);
22192 operands[1] = gen_lowpart (DImode, operands[1]);
22193 emit_move_insn (operands[0], operands[1]);
22194 return;
22195 }
22196
22197 /* The only non-offsettable memory we handle is push. */
22198 if (push_operand (operands[0], VOIDmode))
22199 push = 1;
22200 else
22201 gcc_assert (!MEM_P (operands[0])
22202 || offsettable_memref_p (operands[0]));
22203
22204 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22205 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22206
22207 /* When emitting push, take care for source operands on the stack. */
22208 if (push && MEM_P (operands[1])
22209 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22210 {
22211 rtx src_base = XEXP (part[1][nparts - 1], 0);
22212
22213 /* Compensate for the stack decrement by 4. */
22214 if (!TARGET_64BIT && nparts == 3
22215 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22216 src_base = plus_constant (Pmode, src_base, 4);
22217
22218 /* src_base refers to the stack pointer and is
22219 automatically decreased by emitted push. */
22220 for (i = 0; i < nparts; i++)
22221 part[1][i] = change_address (part[1][i],
22222 GET_MODE (part[1][i]), src_base);
22223 }
22224
22225 /* We need to do copy in the right order in case an address register
22226 of the source overlaps the destination. */
22227 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22228 {
22229 rtx tmp;
22230
22231 for (i = 0; i < nparts; i++)
22232 {
22233 collisionparts[i]
22234 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22235 if (collisionparts[i])
22236 collisions++;
22237 }
22238
22239 /* Collision in the middle part can be handled by reordering. */
22240 if (collisions == 1 && nparts == 3 && collisionparts [1])
22241 {
22242 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22243 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22244 }
22245 else if (collisions == 1
22246 && nparts == 4
22247 && (collisionparts [1] || collisionparts [2]))
22248 {
22249 if (collisionparts [1])
22250 {
22251 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22252 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22253 }
22254 else
22255 {
22256 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22257 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22258 }
22259 }
22260
22261 /* If there are more collisions, we can't handle it by reordering.
22262 Do an lea to the last part and use only one colliding move. */
22263 else if (collisions > 1)
22264 {
22265 rtx base;
22266
22267 collisions = 1;
22268
22269 base = part[0][nparts - 1];
22270
22271 /* Handle the case when the last part isn't valid for lea.
22272 Happens in 64-bit mode storing the 12-byte XFmode. */
22273 if (GET_MODE (base) != Pmode)
22274 base = gen_rtx_REG (Pmode, REGNO (base));
22275
22276 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22277 part[1][0] = replace_equiv_address (part[1][0], base);
22278 for (i = 1; i < nparts; i++)
22279 {
22280 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22281 part[1][i] = replace_equiv_address (part[1][i], tmp);
22282 }
22283 }
22284 }
22285
22286 if (push)
22287 {
22288 if (!TARGET_64BIT)
22289 {
22290 if (nparts == 3)
22291 {
22292 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22293 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22294 stack_pointer_rtx, GEN_INT (-4)));
22295 emit_move_insn (part[0][2], part[1][2]);
22296 }
22297 else if (nparts == 4)
22298 {
22299 emit_move_insn (part[0][3], part[1][3]);
22300 emit_move_insn (part[0][2], part[1][2]);
22301 }
22302 }
22303 else
22304 {
22305 /* In 64bit mode we don't have 32bit push available. In case this is
22306 register, it is OK - we will just use larger counterpart. We also
22307 retype memory - these comes from attempt to avoid REX prefix on
22308 moving of second half of TFmode value. */
22309 if (GET_MODE (part[1][1]) == SImode)
22310 {
22311 switch (GET_CODE (part[1][1]))
22312 {
22313 case MEM:
22314 part[1][1] = adjust_address (part[1][1], DImode, 0);
22315 break;
22316
22317 case REG:
22318 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22319 break;
22320
22321 default:
22322 gcc_unreachable ();
22323 }
22324
22325 if (GET_MODE (part[1][0]) == SImode)
22326 part[1][0] = part[1][1];
22327 }
22328 }
22329 emit_move_insn (part[0][1], part[1][1]);
22330 emit_move_insn (part[0][0], part[1][0]);
22331 return;
22332 }
22333
22334 /* Choose correct order to not overwrite the source before it is copied. */
22335 if ((REG_P (part[0][0])
22336 && REG_P (part[1][1])
22337 && (REGNO (part[0][0]) == REGNO (part[1][1])
22338 || (nparts == 3
22339 && REGNO (part[0][0]) == REGNO (part[1][2]))
22340 || (nparts == 4
22341 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22342 || (collisions > 0
22343 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22344 {
22345 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22346 {
22347 operands[2 + i] = part[0][j];
22348 operands[6 + i] = part[1][j];
22349 }
22350 }
22351 else
22352 {
22353 for (i = 0; i < nparts; i++)
22354 {
22355 operands[2 + i] = part[0][i];
22356 operands[6 + i] = part[1][i];
22357 }
22358 }
22359
22360 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22361 if (optimize_insn_for_size_p ())
22362 {
22363 for (j = 0; j < nparts - 1; j++)
22364 if (CONST_INT_P (operands[6 + j])
22365 && operands[6 + j] != const0_rtx
22366 && REG_P (operands[2 + j]))
22367 for (i = j; i < nparts - 1; i++)
22368 if (CONST_INT_P (operands[7 + i])
22369 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22370 operands[7 + i] = operands[2 + j];
22371 }
22372
22373 for (i = 0; i < nparts; i++)
22374 emit_move_insn (operands[2 + i], operands[6 + i]);
22375
22376 return;
22377 }
22378
22379 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22380 left shift by a constant, either using a single shift or
22381 a sequence of add instructions. */
22382
22383 static void
22384 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22385 {
22386 rtx (*insn)(rtx, rtx, rtx);
22387
22388 if (count == 1
22389 || (count * ix86_cost->add <= ix86_cost->shift_const
22390 && !optimize_insn_for_size_p ()))
22391 {
22392 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22393 while (count-- > 0)
22394 emit_insn (insn (operand, operand, operand));
22395 }
22396 else
22397 {
22398 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22399 emit_insn (insn (operand, operand, GEN_INT (count)));
22400 }
22401 }
22402
22403 void
22404 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22405 {
22406 rtx (*gen_ashl3)(rtx, rtx, rtx);
22407 rtx (*gen_shld)(rtx, rtx, rtx);
22408 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22409
22410 rtx low[2], high[2];
22411 int count;
22412
22413 if (CONST_INT_P (operands[2]))
22414 {
22415 split_double_mode (mode, operands, 2, low, high);
22416 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22417
22418 if (count >= half_width)
22419 {
22420 emit_move_insn (high[0], low[1]);
22421 emit_move_insn (low[0], const0_rtx);
22422
22423 if (count > half_width)
22424 ix86_expand_ashl_const (high[0], count - half_width, mode);
22425 }
22426 else
22427 {
22428 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22429
22430 if (!rtx_equal_p (operands[0], operands[1]))
22431 emit_move_insn (operands[0], operands[1]);
22432
22433 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22434 ix86_expand_ashl_const (low[0], count, mode);
22435 }
22436 return;
22437 }
22438
22439 split_double_mode (mode, operands, 1, low, high);
22440
22441 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22442
22443 if (operands[1] == const1_rtx)
22444 {
22445 /* Assuming we've chosen a QImode capable registers, then 1 << N
22446 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22447 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22448 {
22449 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22450
22451 ix86_expand_clear (low[0]);
22452 ix86_expand_clear (high[0]);
22453 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22454
22455 d = gen_lowpart (QImode, low[0]);
22456 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22457 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22458 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22459
22460 d = gen_lowpart (QImode, high[0]);
22461 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22462 s = gen_rtx_NE (QImode, flags, const0_rtx);
22463 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22464 }
22465
22466 /* Otherwise, we can get the same results by manually performing
22467 a bit extract operation on bit 5/6, and then performing the two
22468 shifts. The two methods of getting 0/1 into low/high are exactly
22469 the same size. Avoiding the shift in the bit extract case helps
22470 pentium4 a bit; no one else seems to care much either way. */
22471 else
22472 {
22473 enum machine_mode half_mode;
22474 rtx (*gen_lshr3)(rtx, rtx, rtx);
22475 rtx (*gen_and3)(rtx, rtx, rtx);
22476 rtx (*gen_xor3)(rtx, rtx, rtx);
22477 HOST_WIDE_INT bits;
22478 rtx x;
22479
22480 if (mode == DImode)
22481 {
22482 half_mode = SImode;
22483 gen_lshr3 = gen_lshrsi3;
22484 gen_and3 = gen_andsi3;
22485 gen_xor3 = gen_xorsi3;
22486 bits = 5;
22487 }
22488 else
22489 {
22490 half_mode = DImode;
22491 gen_lshr3 = gen_lshrdi3;
22492 gen_and3 = gen_anddi3;
22493 gen_xor3 = gen_xordi3;
22494 bits = 6;
22495 }
22496
22497 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22498 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22499 else
22500 x = gen_lowpart (half_mode, operands[2]);
22501 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22502
22503 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22504 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22505 emit_move_insn (low[0], high[0]);
22506 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22507 }
22508
22509 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22510 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22511 return;
22512 }
22513
22514 if (operands[1] == constm1_rtx)
22515 {
22516 /* For -1 << N, we can avoid the shld instruction, because we
22517 know that we're shifting 0...31/63 ones into a -1. */
22518 emit_move_insn (low[0], constm1_rtx);
22519 if (optimize_insn_for_size_p ())
22520 emit_move_insn (high[0], low[0]);
22521 else
22522 emit_move_insn (high[0], constm1_rtx);
22523 }
22524 else
22525 {
22526 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22527
22528 if (!rtx_equal_p (operands[0], operands[1]))
22529 emit_move_insn (operands[0], operands[1]);
22530
22531 split_double_mode (mode, operands, 1, low, high);
22532 emit_insn (gen_shld (high[0], low[0], operands[2]));
22533 }
22534
22535 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22536
22537 if (TARGET_CMOVE && scratch)
22538 {
22539 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22540 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22541
22542 ix86_expand_clear (scratch);
22543 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22544 }
22545 else
22546 {
22547 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22548 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22549
22550 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22551 }
22552 }
22553
22554 void
22555 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22556 {
22557 rtx (*gen_ashr3)(rtx, rtx, rtx)
22558 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22559 rtx (*gen_shrd)(rtx, rtx, rtx);
22560 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22561
22562 rtx low[2], high[2];
22563 int count;
22564
22565 if (CONST_INT_P (operands[2]))
22566 {
22567 split_double_mode (mode, operands, 2, low, high);
22568 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22569
22570 if (count == GET_MODE_BITSIZE (mode) - 1)
22571 {
22572 emit_move_insn (high[0], high[1]);
22573 emit_insn (gen_ashr3 (high[0], high[0],
22574 GEN_INT (half_width - 1)));
22575 emit_move_insn (low[0], high[0]);
22576
22577 }
22578 else if (count >= half_width)
22579 {
22580 emit_move_insn (low[0], high[1]);
22581 emit_move_insn (high[0], low[0]);
22582 emit_insn (gen_ashr3 (high[0], high[0],
22583 GEN_INT (half_width - 1)));
22584
22585 if (count > half_width)
22586 emit_insn (gen_ashr3 (low[0], low[0],
22587 GEN_INT (count - half_width)));
22588 }
22589 else
22590 {
22591 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22592
22593 if (!rtx_equal_p (operands[0], operands[1]))
22594 emit_move_insn (operands[0], operands[1]);
22595
22596 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22597 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22598 }
22599 }
22600 else
22601 {
22602 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22603
22604 if (!rtx_equal_p (operands[0], operands[1]))
22605 emit_move_insn (operands[0], operands[1]);
22606
22607 split_double_mode (mode, operands, 1, low, high);
22608
22609 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22610 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22611
22612 if (TARGET_CMOVE && scratch)
22613 {
22614 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22615 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22616
22617 emit_move_insn (scratch, high[0]);
22618 emit_insn (gen_ashr3 (scratch, scratch,
22619 GEN_INT (half_width - 1)));
22620 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22621 scratch));
22622 }
22623 else
22624 {
22625 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22626 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22627
22628 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22629 }
22630 }
22631 }
22632
22633 void
22634 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22635 {
22636 rtx (*gen_lshr3)(rtx, rtx, rtx)
22637 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22638 rtx (*gen_shrd)(rtx, rtx, rtx);
22639 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22640
22641 rtx low[2], high[2];
22642 int count;
22643
22644 if (CONST_INT_P (operands[2]))
22645 {
22646 split_double_mode (mode, operands, 2, low, high);
22647 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22648
22649 if (count >= half_width)
22650 {
22651 emit_move_insn (low[0], high[1]);
22652 ix86_expand_clear (high[0]);
22653
22654 if (count > half_width)
22655 emit_insn (gen_lshr3 (low[0], low[0],
22656 GEN_INT (count - half_width)));
22657 }
22658 else
22659 {
22660 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22661
22662 if (!rtx_equal_p (operands[0], operands[1]))
22663 emit_move_insn (operands[0], operands[1]);
22664
22665 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22666 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22667 }
22668 }
22669 else
22670 {
22671 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22672
22673 if (!rtx_equal_p (operands[0], operands[1]))
22674 emit_move_insn (operands[0], operands[1]);
22675
22676 split_double_mode (mode, operands, 1, low, high);
22677
22678 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22679 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22680
22681 if (TARGET_CMOVE && scratch)
22682 {
22683 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22684 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22685
22686 ix86_expand_clear (scratch);
22687 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22688 scratch));
22689 }
22690 else
22691 {
22692 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22693 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22694
22695 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22696 }
22697 }
22698 }
22699
22700 /* Predict just emitted jump instruction to be taken with probability PROB. */
22701 static void
22702 predict_jump (int prob)
22703 {
22704 rtx insn = get_last_insn ();
22705 gcc_assert (JUMP_P (insn));
22706 add_int_reg_note (insn, REG_BR_PROB, prob);
22707 }
22708
22709 /* Helper function for the string operations below. Dest VARIABLE whether
22710 it is aligned to VALUE bytes. If true, jump to the label. */
22711 static rtx_code_label *
22712 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22713 {
22714 rtx_code_label *label = gen_label_rtx ();
22715 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22716 if (GET_MODE (variable) == DImode)
22717 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22718 else
22719 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22720 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22721 1, label);
22722 if (epilogue)
22723 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22724 else
22725 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22726 return label;
22727 }
22728
22729 /* Adjust COUNTER by the VALUE. */
22730 static void
22731 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22732 {
22733 rtx (*gen_add)(rtx, rtx, rtx)
22734 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22735
22736 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22737 }
22738
22739 /* Zero extend possibly SImode EXP to Pmode register. */
22740 rtx
22741 ix86_zero_extend_to_Pmode (rtx exp)
22742 {
22743 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22744 }
22745
22746 /* Divide COUNTREG by SCALE. */
22747 static rtx
22748 scale_counter (rtx countreg, int scale)
22749 {
22750 rtx sc;
22751
22752 if (scale == 1)
22753 return countreg;
22754 if (CONST_INT_P (countreg))
22755 return GEN_INT (INTVAL (countreg) / scale);
22756 gcc_assert (REG_P (countreg));
22757
22758 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22759 GEN_INT (exact_log2 (scale)),
22760 NULL, 1, OPTAB_DIRECT);
22761 return sc;
22762 }
22763
22764 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22765 DImode for constant loop counts. */
22766
22767 static enum machine_mode
22768 counter_mode (rtx count_exp)
22769 {
22770 if (GET_MODE (count_exp) != VOIDmode)
22771 return GET_MODE (count_exp);
22772 if (!CONST_INT_P (count_exp))
22773 return Pmode;
22774 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22775 return DImode;
22776 return SImode;
22777 }
22778
22779 /* Copy the address to a Pmode register. This is used for x32 to
22780 truncate DImode TLS address to a SImode register. */
22781
22782 static rtx
22783 ix86_copy_addr_to_reg (rtx addr)
22784 {
22785 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22786 return copy_addr_to_reg (addr);
22787 else
22788 {
22789 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22790 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22791 }
22792 }
22793
22794 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22795 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22796 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22797 memory by VALUE (supposed to be in MODE).
22798
22799 The size is rounded down to whole number of chunk size moved at once.
22800 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22801
22802
22803 static void
22804 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22805 rtx destptr, rtx srcptr, rtx value,
22806 rtx count, enum machine_mode mode, int unroll,
22807 int expected_size, bool issetmem)
22808 {
22809 rtx_code_label *out_label, *top_label;
22810 rtx iter, tmp;
22811 enum machine_mode iter_mode = counter_mode (count);
22812 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22813 rtx piece_size = GEN_INT (piece_size_n);
22814 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22815 rtx size;
22816 int i;
22817
22818 top_label = gen_label_rtx ();
22819 out_label = gen_label_rtx ();
22820 iter = gen_reg_rtx (iter_mode);
22821
22822 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22823 NULL, 1, OPTAB_DIRECT);
22824 /* Those two should combine. */
22825 if (piece_size == const1_rtx)
22826 {
22827 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22828 true, out_label);
22829 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22830 }
22831 emit_move_insn (iter, const0_rtx);
22832
22833 emit_label (top_label);
22834
22835 tmp = convert_modes (Pmode, iter_mode, iter, true);
22836
22837 /* This assert could be relaxed - in this case we'll need to compute
22838 smallest power of two, containing in PIECE_SIZE_N and pass it to
22839 offset_address. */
22840 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22841 destmem = offset_address (destmem, tmp, piece_size_n);
22842 destmem = adjust_address (destmem, mode, 0);
22843
22844 if (!issetmem)
22845 {
22846 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22847 srcmem = adjust_address (srcmem, mode, 0);
22848
22849 /* When unrolling for chips that reorder memory reads and writes,
22850 we can save registers by using single temporary.
22851 Also using 4 temporaries is overkill in 32bit mode. */
22852 if (!TARGET_64BIT && 0)
22853 {
22854 for (i = 0; i < unroll; i++)
22855 {
22856 if (i)
22857 {
22858 destmem =
22859 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22860 srcmem =
22861 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22862 }
22863 emit_move_insn (destmem, srcmem);
22864 }
22865 }
22866 else
22867 {
22868 rtx tmpreg[4];
22869 gcc_assert (unroll <= 4);
22870 for (i = 0; i < unroll; i++)
22871 {
22872 tmpreg[i] = gen_reg_rtx (mode);
22873 if (i)
22874 {
22875 srcmem =
22876 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22877 }
22878 emit_move_insn (tmpreg[i], srcmem);
22879 }
22880 for (i = 0; i < unroll; i++)
22881 {
22882 if (i)
22883 {
22884 destmem =
22885 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22886 }
22887 emit_move_insn (destmem, tmpreg[i]);
22888 }
22889 }
22890 }
22891 else
22892 for (i = 0; i < unroll; i++)
22893 {
22894 if (i)
22895 destmem =
22896 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22897 emit_move_insn (destmem, value);
22898 }
22899
22900 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22901 true, OPTAB_LIB_WIDEN);
22902 if (tmp != iter)
22903 emit_move_insn (iter, tmp);
22904
22905 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22906 true, top_label);
22907 if (expected_size != -1)
22908 {
22909 expected_size /= GET_MODE_SIZE (mode) * unroll;
22910 if (expected_size == 0)
22911 predict_jump (0);
22912 else if (expected_size > REG_BR_PROB_BASE)
22913 predict_jump (REG_BR_PROB_BASE - 1);
22914 else
22915 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22916 }
22917 else
22918 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22919 iter = ix86_zero_extend_to_Pmode (iter);
22920 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22921 true, OPTAB_LIB_WIDEN);
22922 if (tmp != destptr)
22923 emit_move_insn (destptr, tmp);
22924 if (!issetmem)
22925 {
22926 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22927 true, OPTAB_LIB_WIDEN);
22928 if (tmp != srcptr)
22929 emit_move_insn (srcptr, tmp);
22930 }
22931 emit_label (out_label);
22932 }
22933
22934 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22935 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22936 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22937 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22938 ORIG_VALUE is the original value passed to memset to fill the memory with.
22939 Other arguments have same meaning as for previous function. */
22940
22941 static void
22942 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22943 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22944 rtx count,
22945 enum machine_mode mode, bool issetmem)
22946 {
22947 rtx destexp;
22948 rtx srcexp;
22949 rtx countreg;
22950 HOST_WIDE_INT rounded_count;
22951
22952 /* If possible, it is shorter to use rep movs.
22953 TODO: Maybe it is better to move this logic to decide_alg. */
22954 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22955 && (!issetmem || orig_value == const0_rtx))
22956 mode = SImode;
22957
22958 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22959 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22960
22961 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22962 GET_MODE_SIZE (mode)));
22963 if (mode != QImode)
22964 {
22965 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22966 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22967 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22968 }
22969 else
22970 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22971 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22972 {
22973 rounded_count = (INTVAL (count)
22974 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22975 destmem = shallow_copy_rtx (destmem);
22976 set_mem_size (destmem, rounded_count);
22977 }
22978 else if (MEM_SIZE_KNOWN_P (destmem))
22979 clear_mem_size (destmem);
22980
22981 if (issetmem)
22982 {
22983 value = force_reg (mode, gen_lowpart (mode, value));
22984 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22985 }
22986 else
22987 {
22988 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22989 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22990 if (mode != QImode)
22991 {
22992 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22993 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22994 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22995 }
22996 else
22997 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22998 if (CONST_INT_P (count))
22999 {
23000 rounded_count = (INTVAL (count)
23001 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23002 srcmem = shallow_copy_rtx (srcmem);
23003 set_mem_size (srcmem, rounded_count);
23004 }
23005 else
23006 {
23007 if (MEM_SIZE_KNOWN_P (srcmem))
23008 clear_mem_size (srcmem);
23009 }
23010 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23011 destexp, srcexp));
23012 }
23013 }
23014
23015 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23016 DESTMEM.
23017 SRC is passed by pointer to be updated on return.
23018 Return value is updated DST. */
23019 static rtx
23020 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23021 HOST_WIDE_INT size_to_move)
23022 {
23023 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23024 enum insn_code code;
23025 enum machine_mode move_mode;
23026 int piece_size, i;
23027
23028 /* Find the widest mode in which we could perform moves.
23029 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23030 it until move of such size is supported. */
23031 piece_size = 1 << floor_log2 (size_to_move);
23032 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23033 code = optab_handler (mov_optab, move_mode);
23034 while (code == CODE_FOR_nothing && piece_size > 1)
23035 {
23036 piece_size >>= 1;
23037 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23038 code = optab_handler (mov_optab, move_mode);
23039 }
23040
23041 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23042 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23043 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23044 {
23045 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23046 move_mode = mode_for_vector (word_mode, nunits);
23047 code = optab_handler (mov_optab, move_mode);
23048 if (code == CODE_FOR_nothing)
23049 {
23050 move_mode = word_mode;
23051 piece_size = GET_MODE_SIZE (move_mode);
23052 code = optab_handler (mov_optab, move_mode);
23053 }
23054 }
23055 gcc_assert (code != CODE_FOR_nothing);
23056
23057 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23058 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23059
23060 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23061 gcc_assert (size_to_move % piece_size == 0);
23062 adjust = GEN_INT (piece_size);
23063 for (i = 0; i < size_to_move; i += piece_size)
23064 {
23065 /* We move from memory to memory, so we'll need to do it via
23066 a temporary register. */
23067 tempreg = gen_reg_rtx (move_mode);
23068 emit_insn (GEN_FCN (code) (tempreg, src));
23069 emit_insn (GEN_FCN (code) (dst, tempreg));
23070
23071 emit_move_insn (destptr,
23072 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23073 emit_move_insn (srcptr,
23074 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23075
23076 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23077 piece_size);
23078 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23079 piece_size);
23080 }
23081
23082 /* Update DST and SRC rtx. */
23083 *srcmem = src;
23084 return dst;
23085 }
23086
23087 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23088 static void
23089 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23090 rtx destptr, rtx srcptr, rtx count, int max_size)
23091 {
23092 rtx src, dest;
23093 if (CONST_INT_P (count))
23094 {
23095 HOST_WIDE_INT countval = INTVAL (count);
23096 HOST_WIDE_INT epilogue_size = countval % max_size;
23097 int i;
23098
23099 /* For now MAX_SIZE should be a power of 2. This assert could be
23100 relaxed, but it'll require a bit more complicated epilogue
23101 expanding. */
23102 gcc_assert ((max_size & (max_size - 1)) == 0);
23103 for (i = max_size; i >= 1; i >>= 1)
23104 {
23105 if (epilogue_size & i)
23106 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23107 }
23108 return;
23109 }
23110 if (max_size > 8)
23111 {
23112 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23113 count, 1, OPTAB_DIRECT);
23114 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23115 count, QImode, 1, 4, false);
23116 return;
23117 }
23118
23119 /* When there are stringops, we can cheaply increase dest and src pointers.
23120 Otherwise we save code size by maintaining offset (zero is readily
23121 available from preceding rep operation) and using x86 addressing modes.
23122 */
23123 if (TARGET_SINGLE_STRINGOP)
23124 {
23125 if (max_size > 4)
23126 {
23127 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23128 src = change_address (srcmem, SImode, srcptr);
23129 dest = change_address (destmem, SImode, destptr);
23130 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23131 emit_label (label);
23132 LABEL_NUSES (label) = 1;
23133 }
23134 if (max_size > 2)
23135 {
23136 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23137 src = change_address (srcmem, HImode, srcptr);
23138 dest = change_address (destmem, HImode, destptr);
23139 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23140 emit_label (label);
23141 LABEL_NUSES (label) = 1;
23142 }
23143 if (max_size > 1)
23144 {
23145 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23146 src = change_address (srcmem, QImode, srcptr);
23147 dest = change_address (destmem, QImode, destptr);
23148 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23149 emit_label (label);
23150 LABEL_NUSES (label) = 1;
23151 }
23152 }
23153 else
23154 {
23155 rtx offset = force_reg (Pmode, const0_rtx);
23156 rtx tmp;
23157
23158 if (max_size > 4)
23159 {
23160 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23161 src = change_address (srcmem, SImode, srcptr);
23162 dest = change_address (destmem, SImode, destptr);
23163 emit_move_insn (dest, src);
23164 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23165 true, OPTAB_LIB_WIDEN);
23166 if (tmp != offset)
23167 emit_move_insn (offset, tmp);
23168 emit_label (label);
23169 LABEL_NUSES (label) = 1;
23170 }
23171 if (max_size > 2)
23172 {
23173 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23174 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23175 src = change_address (srcmem, HImode, tmp);
23176 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23177 dest = change_address (destmem, HImode, tmp);
23178 emit_move_insn (dest, src);
23179 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23180 true, OPTAB_LIB_WIDEN);
23181 if (tmp != offset)
23182 emit_move_insn (offset, tmp);
23183 emit_label (label);
23184 LABEL_NUSES (label) = 1;
23185 }
23186 if (max_size > 1)
23187 {
23188 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23189 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23190 src = change_address (srcmem, QImode, tmp);
23191 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23192 dest = change_address (destmem, QImode, tmp);
23193 emit_move_insn (dest, src);
23194 emit_label (label);
23195 LABEL_NUSES (label) = 1;
23196 }
23197 }
23198 }
23199
23200 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23201 with value PROMOTED_VAL.
23202 SRC is passed by pointer to be updated on return.
23203 Return value is updated DST. */
23204 static rtx
23205 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23206 HOST_WIDE_INT size_to_move)
23207 {
23208 rtx dst = destmem, adjust;
23209 enum insn_code code;
23210 enum machine_mode move_mode;
23211 int piece_size, i;
23212
23213 /* Find the widest mode in which we could perform moves.
23214 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23215 it until move of such size is supported. */
23216 move_mode = GET_MODE (promoted_val);
23217 if (move_mode == VOIDmode)
23218 move_mode = QImode;
23219 if (size_to_move < GET_MODE_SIZE (move_mode))
23220 {
23221 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23222 promoted_val = gen_lowpart (move_mode, promoted_val);
23223 }
23224 piece_size = GET_MODE_SIZE (move_mode);
23225 code = optab_handler (mov_optab, move_mode);
23226 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23227
23228 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23229
23230 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23231 gcc_assert (size_to_move % piece_size == 0);
23232 adjust = GEN_INT (piece_size);
23233 for (i = 0; i < size_to_move; i += piece_size)
23234 {
23235 if (piece_size <= GET_MODE_SIZE (word_mode))
23236 {
23237 emit_insn (gen_strset (destptr, dst, promoted_val));
23238 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23239 piece_size);
23240 continue;
23241 }
23242
23243 emit_insn (GEN_FCN (code) (dst, promoted_val));
23244
23245 emit_move_insn (destptr,
23246 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23247
23248 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23249 piece_size);
23250 }
23251
23252 /* Update DST rtx. */
23253 return dst;
23254 }
23255 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23256 static void
23257 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23258 rtx count, int max_size)
23259 {
23260 count =
23261 expand_simple_binop (counter_mode (count), AND, count,
23262 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23263 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23264 gen_lowpart (QImode, value), count, QImode,
23265 1, max_size / 2, true);
23266 }
23267
23268 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23269 static void
23270 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23271 rtx count, int max_size)
23272 {
23273 rtx dest;
23274
23275 if (CONST_INT_P (count))
23276 {
23277 HOST_WIDE_INT countval = INTVAL (count);
23278 HOST_WIDE_INT epilogue_size = countval % max_size;
23279 int i;
23280
23281 /* For now MAX_SIZE should be a power of 2. This assert could be
23282 relaxed, but it'll require a bit more complicated epilogue
23283 expanding. */
23284 gcc_assert ((max_size & (max_size - 1)) == 0);
23285 for (i = max_size; i >= 1; i >>= 1)
23286 {
23287 if (epilogue_size & i)
23288 {
23289 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23290 destmem = emit_memset (destmem, destptr, vec_value, i);
23291 else
23292 destmem = emit_memset (destmem, destptr, value, i);
23293 }
23294 }
23295 return;
23296 }
23297 if (max_size > 32)
23298 {
23299 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23300 return;
23301 }
23302 if (max_size > 16)
23303 {
23304 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23305 if (TARGET_64BIT)
23306 {
23307 dest = change_address (destmem, DImode, destptr);
23308 emit_insn (gen_strset (destptr, dest, value));
23309 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23310 emit_insn (gen_strset (destptr, dest, value));
23311 }
23312 else
23313 {
23314 dest = change_address (destmem, SImode, destptr);
23315 emit_insn (gen_strset (destptr, dest, value));
23316 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23317 emit_insn (gen_strset (destptr, dest, value));
23318 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23319 emit_insn (gen_strset (destptr, dest, value));
23320 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23321 emit_insn (gen_strset (destptr, dest, value));
23322 }
23323 emit_label (label);
23324 LABEL_NUSES (label) = 1;
23325 }
23326 if (max_size > 8)
23327 {
23328 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23329 if (TARGET_64BIT)
23330 {
23331 dest = change_address (destmem, DImode, destptr);
23332 emit_insn (gen_strset (destptr, dest, value));
23333 }
23334 else
23335 {
23336 dest = change_address (destmem, SImode, destptr);
23337 emit_insn (gen_strset (destptr, dest, value));
23338 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23339 emit_insn (gen_strset (destptr, dest, value));
23340 }
23341 emit_label (label);
23342 LABEL_NUSES (label) = 1;
23343 }
23344 if (max_size > 4)
23345 {
23346 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23347 dest = change_address (destmem, SImode, destptr);
23348 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23349 emit_label (label);
23350 LABEL_NUSES (label) = 1;
23351 }
23352 if (max_size > 2)
23353 {
23354 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23355 dest = change_address (destmem, HImode, destptr);
23356 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23357 emit_label (label);
23358 LABEL_NUSES (label) = 1;
23359 }
23360 if (max_size > 1)
23361 {
23362 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23363 dest = change_address (destmem, QImode, destptr);
23364 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23365 emit_label (label);
23366 LABEL_NUSES (label) = 1;
23367 }
23368 }
23369
23370 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23371 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23372 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23373 ignored.
23374 Return value is updated DESTMEM. */
23375 static rtx
23376 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23377 rtx destptr, rtx srcptr, rtx value,
23378 rtx vec_value, rtx count, int align,
23379 int desired_alignment, bool issetmem)
23380 {
23381 int i;
23382 for (i = 1; i < desired_alignment; i <<= 1)
23383 {
23384 if (align <= i)
23385 {
23386 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23387 if (issetmem)
23388 {
23389 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23390 destmem = emit_memset (destmem, destptr, vec_value, i);
23391 else
23392 destmem = emit_memset (destmem, destptr, value, i);
23393 }
23394 else
23395 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23396 ix86_adjust_counter (count, i);
23397 emit_label (label);
23398 LABEL_NUSES (label) = 1;
23399 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23400 }
23401 }
23402 return destmem;
23403 }
23404
23405 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23406 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23407 and jump to DONE_LABEL. */
23408 static void
23409 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23410 rtx destptr, rtx srcptr,
23411 rtx value, rtx vec_value,
23412 rtx count, int size,
23413 rtx done_label, bool issetmem)
23414 {
23415 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23416 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23417 rtx modesize;
23418 int n;
23419
23420 /* If we do not have vector value to copy, we must reduce size. */
23421 if (issetmem)
23422 {
23423 if (!vec_value)
23424 {
23425 if (GET_MODE (value) == VOIDmode && size > 8)
23426 mode = Pmode;
23427 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23428 mode = GET_MODE (value);
23429 }
23430 else
23431 mode = GET_MODE (vec_value), value = vec_value;
23432 }
23433 else
23434 {
23435 /* Choose appropriate vector mode. */
23436 if (size >= 32)
23437 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23438 else if (size >= 16)
23439 mode = TARGET_SSE ? V16QImode : DImode;
23440 srcmem = change_address (srcmem, mode, srcptr);
23441 }
23442 destmem = change_address (destmem, mode, destptr);
23443 modesize = GEN_INT (GET_MODE_SIZE (mode));
23444 gcc_assert (GET_MODE_SIZE (mode) <= size);
23445 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23446 {
23447 if (issetmem)
23448 emit_move_insn (destmem, gen_lowpart (mode, value));
23449 else
23450 {
23451 emit_move_insn (destmem, srcmem);
23452 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23453 }
23454 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23455 }
23456
23457 destmem = offset_address (destmem, count, 1);
23458 destmem = offset_address (destmem, GEN_INT (-2 * size),
23459 GET_MODE_SIZE (mode));
23460 if (!issetmem)
23461 {
23462 srcmem = offset_address (srcmem, count, 1);
23463 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23464 GET_MODE_SIZE (mode));
23465 }
23466 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23467 {
23468 if (issetmem)
23469 emit_move_insn (destmem, gen_lowpart (mode, value));
23470 else
23471 {
23472 emit_move_insn (destmem, srcmem);
23473 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23474 }
23475 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23476 }
23477 emit_jump_insn (gen_jump (done_label));
23478 emit_barrier ();
23479
23480 emit_label (label);
23481 LABEL_NUSES (label) = 1;
23482 }
23483
23484 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23485 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23486 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23487 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23488 DONE_LABEL is a label after the whole copying sequence. The label is created
23489 on demand if *DONE_LABEL is NULL.
23490 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23491 bounds after the initial copies.
23492
23493 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23494 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23495 we will dispatch to a library call for large blocks.
23496
23497 In pseudocode we do:
23498
23499 if (COUNT < SIZE)
23500 {
23501 Assume that SIZE is 4. Bigger sizes are handled analogously
23502 if (COUNT & 4)
23503 {
23504 copy 4 bytes from SRCPTR to DESTPTR
23505 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23506 goto done_label
23507 }
23508 if (!COUNT)
23509 goto done_label;
23510 copy 1 byte from SRCPTR to DESTPTR
23511 if (COUNT & 2)
23512 {
23513 copy 2 bytes from SRCPTR to DESTPTR
23514 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23515 }
23516 }
23517 else
23518 {
23519 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23520 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23521
23522 OLD_DESPTR = DESTPTR;
23523 Align DESTPTR up to DESIRED_ALIGN
23524 SRCPTR += DESTPTR - OLD_DESTPTR
23525 COUNT -= DEST_PTR - OLD_DESTPTR
23526 if (DYNAMIC_CHECK)
23527 Round COUNT down to multiple of SIZE
23528 << optional caller supplied zero size guard is here >>
23529 << optional caller suppplied dynamic check is here >>
23530 << caller supplied main copy loop is here >>
23531 }
23532 done_label:
23533 */
23534 static void
23535 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23536 rtx *destptr, rtx *srcptr,
23537 enum machine_mode mode,
23538 rtx value, rtx vec_value,
23539 rtx *count,
23540 rtx_code_label **done_label,
23541 int size,
23542 int desired_align,
23543 int align,
23544 unsigned HOST_WIDE_INT *min_size,
23545 bool dynamic_check,
23546 bool issetmem)
23547 {
23548 rtx_code_label *loop_label = NULL, *label;
23549 int n;
23550 rtx modesize;
23551 int prolog_size = 0;
23552 rtx mode_value;
23553
23554 /* Chose proper value to copy. */
23555 if (issetmem && VECTOR_MODE_P (mode))
23556 mode_value = vec_value;
23557 else
23558 mode_value = value;
23559 gcc_assert (GET_MODE_SIZE (mode) <= size);
23560
23561 /* See if block is big or small, handle small blocks. */
23562 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23563 {
23564 int size2 = size;
23565 loop_label = gen_label_rtx ();
23566
23567 if (!*done_label)
23568 *done_label = gen_label_rtx ();
23569
23570 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23571 1, loop_label);
23572 size2 >>= 1;
23573
23574 /* Handle sizes > 3. */
23575 for (;size2 > 2; size2 >>= 1)
23576 expand_small_movmem_or_setmem (destmem, srcmem,
23577 *destptr, *srcptr,
23578 value, vec_value,
23579 *count,
23580 size2, *done_label, issetmem);
23581 /* Nothing to copy? Jump to DONE_LABEL if so */
23582 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23583 1, *done_label);
23584
23585 /* Do a byte copy. */
23586 destmem = change_address (destmem, QImode, *destptr);
23587 if (issetmem)
23588 emit_move_insn (destmem, gen_lowpart (QImode, value));
23589 else
23590 {
23591 srcmem = change_address (srcmem, QImode, *srcptr);
23592 emit_move_insn (destmem, srcmem);
23593 }
23594
23595 /* Handle sizes 2 and 3. */
23596 label = ix86_expand_aligntest (*count, 2, false);
23597 destmem = change_address (destmem, HImode, *destptr);
23598 destmem = offset_address (destmem, *count, 1);
23599 destmem = offset_address (destmem, GEN_INT (-2), 2);
23600 if (issetmem)
23601 emit_move_insn (destmem, gen_lowpart (HImode, value));
23602 else
23603 {
23604 srcmem = change_address (srcmem, HImode, *srcptr);
23605 srcmem = offset_address (srcmem, *count, 1);
23606 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23607 emit_move_insn (destmem, srcmem);
23608 }
23609
23610 emit_label (label);
23611 LABEL_NUSES (label) = 1;
23612 emit_jump_insn (gen_jump (*done_label));
23613 emit_barrier ();
23614 }
23615 else
23616 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23617 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23618
23619 /* Start memcpy for COUNT >= SIZE. */
23620 if (loop_label)
23621 {
23622 emit_label (loop_label);
23623 LABEL_NUSES (loop_label) = 1;
23624 }
23625
23626 /* Copy first desired_align bytes. */
23627 if (!issetmem)
23628 srcmem = change_address (srcmem, mode, *srcptr);
23629 destmem = change_address (destmem, mode, *destptr);
23630 modesize = GEN_INT (GET_MODE_SIZE (mode));
23631 for (n = 0; prolog_size < desired_align - align; n++)
23632 {
23633 if (issetmem)
23634 emit_move_insn (destmem, mode_value);
23635 else
23636 {
23637 emit_move_insn (destmem, srcmem);
23638 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23639 }
23640 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23641 prolog_size += GET_MODE_SIZE (mode);
23642 }
23643
23644
23645 /* Copy last SIZE bytes. */
23646 destmem = offset_address (destmem, *count, 1);
23647 destmem = offset_address (destmem,
23648 GEN_INT (-size - prolog_size),
23649 1);
23650 if (issetmem)
23651 emit_move_insn (destmem, mode_value);
23652 else
23653 {
23654 srcmem = offset_address (srcmem, *count, 1);
23655 srcmem = offset_address (srcmem,
23656 GEN_INT (-size - prolog_size),
23657 1);
23658 emit_move_insn (destmem, srcmem);
23659 }
23660 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23661 {
23662 destmem = offset_address (destmem, modesize, 1);
23663 if (issetmem)
23664 emit_move_insn (destmem, mode_value);
23665 else
23666 {
23667 srcmem = offset_address (srcmem, modesize, 1);
23668 emit_move_insn (destmem, srcmem);
23669 }
23670 }
23671
23672 /* Align destination. */
23673 if (desired_align > 1 && desired_align > align)
23674 {
23675 rtx saveddest = *destptr;
23676
23677 gcc_assert (desired_align <= size);
23678 /* Align destptr up, place it to new register. */
23679 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23680 GEN_INT (prolog_size),
23681 NULL_RTX, 1, OPTAB_DIRECT);
23682 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23683 GEN_INT (-desired_align),
23684 *destptr, 1, OPTAB_DIRECT);
23685 /* See how many bytes we skipped. */
23686 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23687 *destptr,
23688 saveddest, 1, OPTAB_DIRECT);
23689 /* Adjust srcptr and count. */
23690 if (!issetmem)
23691 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23692 *srcptr, 1, OPTAB_DIRECT);
23693 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23694 saveddest, *count, 1, OPTAB_DIRECT);
23695 /* We copied at most size + prolog_size. */
23696 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23697 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23698 else
23699 *min_size = 0;
23700
23701 /* Our loops always round down the bock size, but for dispatch to library
23702 we need precise value. */
23703 if (dynamic_check)
23704 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23705 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23706 }
23707 else
23708 {
23709 gcc_assert (prolog_size == 0);
23710 /* Decrease count, so we won't end up copying last word twice. */
23711 if (!CONST_INT_P (*count))
23712 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23713 constm1_rtx, *count, 1, OPTAB_DIRECT);
23714 else
23715 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23716 if (*min_size)
23717 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23718 }
23719 }
23720
23721
23722 /* This function is like the previous one, except here we know how many bytes
23723 need to be copied. That allows us to update alignment not only of DST, which
23724 is returned, but also of SRC, which is passed as a pointer for that
23725 reason. */
23726 static rtx
23727 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23728 rtx srcreg, rtx value, rtx vec_value,
23729 int desired_align, int align_bytes,
23730 bool issetmem)
23731 {
23732 rtx src = NULL;
23733 rtx orig_dst = dst;
23734 rtx orig_src = NULL;
23735 int piece_size = 1;
23736 int copied_bytes = 0;
23737
23738 if (!issetmem)
23739 {
23740 gcc_assert (srcp != NULL);
23741 src = *srcp;
23742 orig_src = src;
23743 }
23744
23745 for (piece_size = 1;
23746 piece_size <= desired_align && copied_bytes < align_bytes;
23747 piece_size <<= 1)
23748 {
23749 if (align_bytes & piece_size)
23750 {
23751 if (issetmem)
23752 {
23753 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23754 dst = emit_memset (dst, destreg, vec_value, piece_size);
23755 else
23756 dst = emit_memset (dst, destreg, value, piece_size);
23757 }
23758 else
23759 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23760 copied_bytes += piece_size;
23761 }
23762 }
23763 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23764 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23765 if (MEM_SIZE_KNOWN_P (orig_dst))
23766 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23767
23768 if (!issetmem)
23769 {
23770 int src_align_bytes = get_mem_align_offset (src, desired_align
23771 * BITS_PER_UNIT);
23772 if (src_align_bytes >= 0)
23773 src_align_bytes = desired_align - src_align_bytes;
23774 if (src_align_bytes >= 0)
23775 {
23776 unsigned int src_align;
23777 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23778 {
23779 if ((src_align_bytes & (src_align - 1))
23780 == (align_bytes & (src_align - 1)))
23781 break;
23782 }
23783 if (src_align > (unsigned int) desired_align)
23784 src_align = desired_align;
23785 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23786 set_mem_align (src, src_align * BITS_PER_UNIT);
23787 }
23788 if (MEM_SIZE_KNOWN_P (orig_src))
23789 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23790 *srcp = src;
23791 }
23792
23793 return dst;
23794 }
23795
23796 /* Return true if ALG can be used in current context.
23797 Assume we expand memset if MEMSET is true. */
23798 static bool
23799 alg_usable_p (enum stringop_alg alg, bool memset)
23800 {
23801 if (alg == no_stringop)
23802 return false;
23803 if (alg == vector_loop)
23804 return TARGET_SSE || TARGET_AVX;
23805 /* Algorithms using the rep prefix want at least edi and ecx;
23806 additionally, memset wants eax and memcpy wants esi. Don't
23807 consider such algorithms if the user has appropriated those
23808 registers for their own purposes. */
23809 if (alg == rep_prefix_1_byte
23810 || alg == rep_prefix_4_byte
23811 || alg == rep_prefix_8_byte)
23812 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23813 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23814 return true;
23815 }
23816
23817 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23818 static enum stringop_alg
23819 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23820 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23821 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23822 {
23823 const struct stringop_algs * algs;
23824 bool optimize_for_speed;
23825 int max = 0;
23826 const struct processor_costs *cost;
23827 int i;
23828 bool any_alg_usable_p = false;
23829
23830 *noalign = false;
23831 *dynamic_check = -1;
23832
23833 /* Even if the string operation call is cold, we still might spend a lot
23834 of time processing large blocks. */
23835 if (optimize_function_for_size_p (cfun)
23836 || (optimize_insn_for_size_p ()
23837 && (max_size < 256
23838 || (expected_size != -1 && expected_size < 256))))
23839 optimize_for_speed = false;
23840 else
23841 optimize_for_speed = true;
23842
23843 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23844 if (memset)
23845 algs = &cost->memset[TARGET_64BIT != 0];
23846 else
23847 algs = &cost->memcpy[TARGET_64BIT != 0];
23848
23849 /* See maximal size for user defined algorithm. */
23850 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23851 {
23852 enum stringop_alg candidate = algs->size[i].alg;
23853 bool usable = alg_usable_p (candidate, memset);
23854 any_alg_usable_p |= usable;
23855
23856 if (candidate != libcall && candidate && usable)
23857 max = algs->size[i].max;
23858 }
23859
23860 /* If expected size is not known but max size is small enough
23861 so inline version is a win, set expected size into
23862 the range. */
23863 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23864 && expected_size == -1)
23865 expected_size = min_size / 2 + max_size / 2;
23866
23867 /* If user specified the algorithm, honnor it if possible. */
23868 if (ix86_stringop_alg != no_stringop
23869 && alg_usable_p (ix86_stringop_alg, memset))
23870 return ix86_stringop_alg;
23871 /* rep; movq or rep; movl is the smallest variant. */
23872 else if (!optimize_for_speed)
23873 {
23874 *noalign = true;
23875 if (!count || (count & 3) || (memset && !zero_memset))
23876 return alg_usable_p (rep_prefix_1_byte, memset)
23877 ? rep_prefix_1_byte : loop_1_byte;
23878 else
23879 return alg_usable_p (rep_prefix_4_byte, memset)
23880 ? rep_prefix_4_byte : loop;
23881 }
23882 /* Very tiny blocks are best handled via the loop, REP is expensive to
23883 setup. */
23884 else if (expected_size != -1 && expected_size < 4)
23885 return loop_1_byte;
23886 else if (expected_size != -1)
23887 {
23888 enum stringop_alg alg = libcall;
23889 bool alg_noalign = false;
23890 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23891 {
23892 /* We get here if the algorithms that were not libcall-based
23893 were rep-prefix based and we are unable to use rep prefixes
23894 based on global register usage. Break out of the loop and
23895 use the heuristic below. */
23896 if (algs->size[i].max == 0)
23897 break;
23898 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23899 {
23900 enum stringop_alg candidate = algs->size[i].alg;
23901
23902 if (candidate != libcall && alg_usable_p (candidate, memset))
23903 {
23904 alg = candidate;
23905 alg_noalign = algs->size[i].noalign;
23906 }
23907 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23908 last non-libcall inline algorithm. */
23909 if (TARGET_INLINE_ALL_STRINGOPS)
23910 {
23911 /* When the current size is best to be copied by a libcall,
23912 but we are still forced to inline, run the heuristic below
23913 that will pick code for medium sized blocks. */
23914 if (alg != libcall)
23915 {
23916 *noalign = alg_noalign;
23917 return alg;
23918 }
23919 break;
23920 }
23921 else if (alg_usable_p (candidate, memset))
23922 {
23923 *noalign = algs->size[i].noalign;
23924 return candidate;
23925 }
23926 }
23927 }
23928 }
23929 /* When asked to inline the call anyway, try to pick meaningful choice.
23930 We look for maximal size of block that is faster to copy by hand and
23931 take blocks of at most of that size guessing that average size will
23932 be roughly half of the block.
23933
23934 If this turns out to be bad, we might simply specify the preferred
23935 choice in ix86_costs. */
23936 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23937 && (algs->unknown_size == libcall
23938 || !alg_usable_p (algs->unknown_size, memset)))
23939 {
23940 enum stringop_alg alg;
23941
23942 /* If there aren't any usable algorithms, then recursing on
23943 smaller sizes isn't going to find anything. Just return the
23944 simple byte-at-a-time copy loop. */
23945 if (!any_alg_usable_p)
23946 {
23947 /* Pick something reasonable. */
23948 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23949 *dynamic_check = 128;
23950 return loop_1_byte;
23951 }
23952 if (max <= 0)
23953 max = 4096;
23954 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23955 zero_memset, dynamic_check, noalign);
23956 gcc_assert (*dynamic_check == -1);
23957 gcc_assert (alg != libcall);
23958 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23959 *dynamic_check = max;
23960 return alg;
23961 }
23962 return (alg_usable_p (algs->unknown_size, memset)
23963 ? algs->unknown_size : libcall);
23964 }
23965
23966 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23967 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23968 static int
23969 decide_alignment (int align,
23970 enum stringop_alg alg,
23971 int expected_size,
23972 enum machine_mode move_mode)
23973 {
23974 int desired_align = 0;
23975
23976 gcc_assert (alg != no_stringop);
23977
23978 if (alg == libcall)
23979 return 0;
23980 if (move_mode == VOIDmode)
23981 return 0;
23982
23983 desired_align = GET_MODE_SIZE (move_mode);
23984 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23985 copying whole cacheline at once. */
23986 if (TARGET_PENTIUMPRO
23987 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23988 desired_align = 8;
23989
23990 if (optimize_size)
23991 desired_align = 1;
23992 if (desired_align < align)
23993 desired_align = align;
23994 if (expected_size != -1 && expected_size < 4)
23995 desired_align = align;
23996
23997 return desired_align;
23998 }
23999
24000
24001 /* Helper function for memcpy. For QImode value 0xXY produce
24002 0xXYXYXYXY of wide specified by MODE. This is essentially
24003 a * 0x10101010, but we can do slightly better than
24004 synth_mult by unwinding the sequence by hand on CPUs with
24005 slow multiply. */
24006 static rtx
24007 promote_duplicated_reg (enum machine_mode mode, rtx val)
24008 {
24009 enum machine_mode valmode = GET_MODE (val);
24010 rtx tmp;
24011 int nops = mode == DImode ? 3 : 2;
24012
24013 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24014 if (val == const0_rtx)
24015 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24016 if (CONST_INT_P (val))
24017 {
24018 HOST_WIDE_INT v = INTVAL (val) & 255;
24019
24020 v |= v << 8;
24021 v |= v << 16;
24022 if (mode == DImode)
24023 v |= (v << 16) << 16;
24024 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24025 }
24026
24027 if (valmode == VOIDmode)
24028 valmode = QImode;
24029 if (valmode != QImode)
24030 val = gen_lowpart (QImode, val);
24031 if (mode == QImode)
24032 return val;
24033 if (!TARGET_PARTIAL_REG_STALL)
24034 nops--;
24035 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24036 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24037 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24038 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24039 {
24040 rtx reg = convert_modes (mode, QImode, val, true);
24041 tmp = promote_duplicated_reg (mode, const1_rtx);
24042 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24043 OPTAB_DIRECT);
24044 }
24045 else
24046 {
24047 rtx reg = convert_modes (mode, QImode, val, true);
24048
24049 if (!TARGET_PARTIAL_REG_STALL)
24050 if (mode == SImode)
24051 emit_insn (gen_movsi_insv_1 (reg, reg));
24052 else
24053 emit_insn (gen_movdi_insv_1 (reg, reg));
24054 else
24055 {
24056 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24057 NULL, 1, OPTAB_DIRECT);
24058 reg =
24059 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24060 }
24061 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24062 NULL, 1, OPTAB_DIRECT);
24063 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24064 if (mode == SImode)
24065 return reg;
24066 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24067 NULL, 1, OPTAB_DIRECT);
24068 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24069 return reg;
24070 }
24071 }
24072
24073 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24074 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24075 alignment from ALIGN to DESIRED_ALIGN. */
24076 static rtx
24077 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24078 int align)
24079 {
24080 rtx promoted_val;
24081
24082 if (TARGET_64BIT
24083 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24084 promoted_val = promote_duplicated_reg (DImode, val);
24085 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24086 promoted_val = promote_duplicated_reg (SImode, val);
24087 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24088 promoted_val = promote_duplicated_reg (HImode, val);
24089 else
24090 promoted_val = val;
24091
24092 return promoted_val;
24093 }
24094
24095 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24096 operations when profitable. The code depends upon architecture, block size
24097 and alignment, but always has one of the following overall structures:
24098
24099 Aligned move sequence:
24100
24101 1) Prologue guard: Conditional that jumps up to epilogues for small
24102 blocks that can be handled by epilogue alone. This is faster
24103 but also needed for correctness, since prologue assume the block
24104 is larger than the desired alignment.
24105
24106 Optional dynamic check for size and libcall for large
24107 blocks is emitted here too, with -minline-stringops-dynamically.
24108
24109 2) Prologue: copy first few bytes in order to get destination
24110 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24111 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24112 copied. We emit either a jump tree on power of two sized
24113 blocks, or a byte loop.
24114
24115 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24116 with specified algorithm.
24117
24118 4) Epilogue: code copying tail of the block that is too small to be
24119 handled by main body (or up to size guarded by prologue guard).
24120
24121 Misaligned move sequence
24122
24123 1) missaligned move prologue/epilogue containing:
24124 a) Prologue handling small memory blocks and jumping to done_label
24125 (skipped if blocks are known to be large enough)
24126 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24127 needed by single possibly misaligned move
24128 (skipped if alignment is not needed)
24129 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24130
24131 2) Zero size guard dispatching to done_label, if needed
24132
24133 3) dispatch to library call, if needed,
24134
24135 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24136 with specified algorithm. */
24137 bool
24138 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24139 rtx align_exp, rtx expected_align_exp,
24140 rtx expected_size_exp, rtx min_size_exp,
24141 rtx max_size_exp, rtx probable_max_size_exp,
24142 bool issetmem)
24143 {
24144 rtx destreg;
24145 rtx srcreg = NULL;
24146 rtx_code_label *label = NULL;
24147 rtx tmp;
24148 rtx_code_label *jump_around_label = NULL;
24149 HOST_WIDE_INT align = 1;
24150 unsigned HOST_WIDE_INT count = 0;
24151 HOST_WIDE_INT expected_size = -1;
24152 int size_needed = 0, epilogue_size_needed;
24153 int desired_align = 0, align_bytes = 0;
24154 enum stringop_alg alg;
24155 rtx promoted_val = NULL;
24156 rtx vec_promoted_val = NULL;
24157 bool force_loopy_epilogue = false;
24158 int dynamic_check;
24159 bool need_zero_guard = false;
24160 bool noalign;
24161 enum machine_mode move_mode = VOIDmode;
24162 int unroll_factor = 1;
24163 /* TODO: Once value ranges are available, fill in proper data. */
24164 unsigned HOST_WIDE_INT min_size = 0;
24165 unsigned HOST_WIDE_INT max_size = -1;
24166 unsigned HOST_WIDE_INT probable_max_size = -1;
24167 bool misaligned_prologue_used = false;
24168
24169 if (CONST_INT_P (align_exp))
24170 align = INTVAL (align_exp);
24171 /* i386 can do misaligned access on reasonably increased cost. */
24172 if (CONST_INT_P (expected_align_exp)
24173 && INTVAL (expected_align_exp) > align)
24174 align = INTVAL (expected_align_exp);
24175 /* ALIGN is the minimum of destination and source alignment, but we care here
24176 just about destination alignment. */
24177 else if (!issetmem
24178 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24179 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24180
24181 if (CONST_INT_P (count_exp))
24182 {
24183 min_size = max_size = probable_max_size = count = expected_size
24184 = INTVAL (count_exp);
24185 /* When COUNT is 0, there is nothing to do. */
24186 if (!count)
24187 return true;
24188 }
24189 else
24190 {
24191 if (min_size_exp)
24192 min_size = INTVAL (min_size_exp);
24193 if (max_size_exp)
24194 max_size = INTVAL (max_size_exp);
24195 if (probable_max_size_exp)
24196 probable_max_size = INTVAL (probable_max_size_exp);
24197 if (CONST_INT_P (expected_size_exp))
24198 expected_size = INTVAL (expected_size_exp);
24199 }
24200
24201 /* Make sure we don't need to care about overflow later on. */
24202 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24203 return false;
24204
24205 /* Step 0: Decide on preferred algorithm, desired alignment and
24206 size of chunks to be copied by main loop. */
24207 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24208 issetmem,
24209 issetmem && val_exp == const0_rtx,
24210 &dynamic_check, &noalign);
24211 if (alg == libcall)
24212 return false;
24213 gcc_assert (alg != no_stringop);
24214
24215 /* For now vector-version of memset is generated only for memory zeroing, as
24216 creating of promoted vector value is very cheap in this case. */
24217 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24218 alg = unrolled_loop;
24219
24220 if (!count)
24221 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24222 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24223 if (!issetmem)
24224 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24225
24226 unroll_factor = 1;
24227 move_mode = word_mode;
24228 switch (alg)
24229 {
24230 case libcall:
24231 case no_stringop:
24232 case last_alg:
24233 gcc_unreachable ();
24234 case loop_1_byte:
24235 need_zero_guard = true;
24236 move_mode = QImode;
24237 break;
24238 case loop:
24239 need_zero_guard = true;
24240 break;
24241 case unrolled_loop:
24242 need_zero_guard = true;
24243 unroll_factor = (TARGET_64BIT ? 4 : 2);
24244 break;
24245 case vector_loop:
24246 need_zero_guard = true;
24247 unroll_factor = 4;
24248 /* Find the widest supported mode. */
24249 move_mode = word_mode;
24250 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24251 != CODE_FOR_nothing)
24252 move_mode = GET_MODE_WIDER_MODE (move_mode);
24253
24254 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24255 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24256 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24257 {
24258 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24259 move_mode = mode_for_vector (word_mode, nunits);
24260 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24261 move_mode = word_mode;
24262 }
24263 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24264 break;
24265 case rep_prefix_8_byte:
24266 move_mode = DImode;
24267 break;
24268 case rep_prefix_4_byte:
24269 move_mode = SImode;
24270 break;
24271 case rep_prefix_1_byte:
24272 move_mode = QImode;
24273 break;
24274 }
24275 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24276 epilogue_size_needed = size_needed;
24277
24278 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24279 if (!TARGET_ALIGN_STRINGOPS || noalign)
24280 align = desired_align;
24281
24282 /* Step 1: Prologue guard. */
24283
24284 /* Alignment code needs count to be in register. */
24285 if (CONST_INT_P (count_exp) && desired_align > align)
24286 {
24287 if (INTVAL (count_exp) > desired_align
24288 && INTVAL (count_exp) > size_needed)
24289 {
24290 align_bytes
24291 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24292 if (align_bytes <= 0)
24293 align_bytes = 0;
24294 else
24295 align_bytes = desired_align - align_bytes;
24296 }
24297 if (align_bytes == 0)
24298 count_exp = force_reg (counter_mode (count_exp), count_exp);
24299 }
24300 gcc_assert (desired_align >= 1 && align >= 1);
24301
24302 /* Misaligned move sequences handle both prologue and epilogue at once.
24303 Default code generation results in a smaller code for large alignments
24304 and also avoids redundant job when sizes are known precisely. */
24305 misaligned_prologue_used
24306 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24307 && MAX (desired_align, epilogue_size_needed) <= 32
24308 && desired_align <= epilogue_size_needed
24309 && ((desired_align > align && !align_bytes)
24310 || (!count && epilogue_size_needed > 1)));
24311
24312 /* Do the cheap promotion to allow better CSE across the
24313 main loop and epilogue (ie one load of the big constant in the
24314 front of all code.
24315 For now the misaligned move sequences do not have fast path
24316 without broadcasting. */
24317 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24318 {
24319 if (alg == vector_loop)
24320 {
24321 gcc_assert (val_exp == const0_rtx);
24322 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24323 promoted_val = promote_duplicated_reg_to_size (val_exp,
24324 GET_MODE_SIZE (word_mode),
24325 desired_align, align);
24326 }
24327 else
24328 {
24329 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24330 desired_align, align);
24331 }
24332 }
24333 /* Misaligned move sequences handles both prologues and epilogues at once.
24334 Default code generation results in smaller code for large alignments and
24335 also avoids redundant job when sizes are known precisely. */
24336 if (misaligned_prologue_used)
24337 {
24338 /* Misaligned move prologue handled small blocks by itself. */
24339 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24340 (dst, src, &destreg, &srcreg,
24341 move_mode, promoted_val, vec_promoted_val,
24342 &count_exp,
24343 &jump_around_label,
24344 desired_align < align
24345 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24346 desired_align, align, &min_size, dynamic_check, issetmem);
24347 if (!issetmem)
24348 src = change_address (src, BLKmode, srcreg);
24349 dst = change_address (dst, BLKmode, destreg);
24350 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24351 epilogue_size_needed = 0;
24352 if (need_zero_guard && !min_size)
24353 {
24354 /* It is possible that we copied enough so the main loop will not
24355 execute. */
24356 gcc_assert (size_needed > 1);
24357 if (jump_around_label == NULL_RTX)
24358 jump_around_label = gen_label_rtx ();
24359 emit_cmp_and_jump_insns (count_exp,
24360 GEN_INT (size_needed),
24361 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24362 if (expected_size == -1
24363 || expected_size < (desired_align - align) / 2 + size_needed)
24364 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24365 else
24366 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24367 }
24368 }
24369 /* Ensure that alignment prologue won't copy past end of block. */
24370 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24371 {
24372 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24373 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24374 Make sure it is power of 2. */
24375 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24376
24377 /* To improve performance of small blocks, we jump around the VAL
24378 promoting mode. This mean that if the promoted VAL is not constant,
24379 we might not use it in the epilogue and have to use byte
24380 loop variant. */
24381 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24382 force_loopy_epilogue = true;
24383 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24384 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24385 {
24386 /* If main algorithm works on QImode, no epilogue is needed.
24387 For small sizes just don't align anything. */
24388 if (size_needed == 1)
24389 desired_align = align;
24390 else
24391 goto epilogue;
24392 }
24393 else if (!count
24394 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24395 {
24396 label = gen_label_rtx ();
24397 emit_cmp_and_jump_insns (count_exp,
24398 GEN_INT (epilogue_size_needed),
24399 LTU, 0, counter_mode (count_exp), 1, label);
24400 if (expected_size == -1 || expected_size < epilogue_size_needed)
24401 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24402 else
24403 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24404 }
24405 }
24406
24407 /* Emit code to decide on runtime whether library call or inline should be
24408 used. */
24409 if (dynamic_check != -1)
24410 {
24411 if (!issetmem && CONST_INT_P (count_exp))
24412 {
24413 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24414 {
24415 emit_block_move_via_libcall (dst, src, count_exp, false);
24416 count_exp = const0_rtx;
24417 goto epilogue;
24418 }
24419 }
24420 else
24421 {
24422 rtx_code_label *hot_label = gen_label_rtx ();
24423 if (jump_around_label == NULL_RTX)
24424 jump_around_label = gen_label_rtx ();
24425 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24426 LEU, 0, counter_mode (count_exp),
24427 1, hot_label);
24428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24429 if (issetmem)
24430 set_storage_via_libcall (dst, count_exp, val_exp, false);
24431 else
24432 emit_block_move_via_libcall (dst, src, count_exp, false);
24433 emit_jump (jump_around_label);
24434 emit_label (hot_label);
24435 }
24436 }
24437
24438 /* Step 2: Alignment prologue. */
24439 /* Do the expensive promotion once we branched off the small blocks. */
24440 if (issetmem && !promoted_val)
24441 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24442 desired_align, align);
24443
24444 if (desired_align > align && !misaligned_prologue_used)
24445 {
24446 if (align_bytes == 0)
24447 {
24448 /* Except for the first move in prologue, we no longer know
24449 constant offset in aliasing info. It don't seems to worth
24450 the pain to maintain it for the first move, so throw away
24451 the info early. */
24452 dst = change_address (dst, BLKmode, destreg);
24453 if (!issetmem)
24454 src = change_address (src, BLKmode, srcreg);
24455 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24456 promoted_val, vec_promoted_val,
24457 count_exp, align, desired_align,
24458 issetmem);
24459 /* At most desired_align - align bytes are copied. */
24460 if (min_size < (unsigned)(desired_align - align))
24461 min_size = 0;
24462 else
24463 min_size -= desired_align - align;
24464 }
24465 else
24466 {
24467 /* If we know how many bytes need to be stored before dst is
24468 sufficiently aligned, maintain aliasing info accurately. */
24469 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24470 srcreg,
24471 promoted_val,
24472 vec_promoted_val,
24473 desired_align,
24474 align_bytes,
24475 issetmem);
24476
24477 count_exp = plus_constant (counter_mode (count_exp),
24478 count_exp, -align_bytes);
24479 count -= align_bytes;
24480 min_size -= align_bytes;
24481 max_size -= align_bytes;
24482 }
24483 if (need_zero_guard
24484 && !min_size
24485 && (count < (unsigned HOST_WIDE_INT) size_needed
24486 || (align_bytes == 0
24487 && count < ((unsigned HOST_WIDE_INT) size_needed
24488 + desired_align - align))))
24489 {
24490 /* It is possible that we copied enough so the main loop will not
24491 execute. */
24492 gcc_assert (size_needed > 1);
24493 if (label == NULL_RTX)
24494 label = gen_label_rtx ();
24495 emit_cmp_and_jump_insns (count_exp,
24496 GEN_INT (size_needed),
24497 LTU, 0, counter_mode (count_exp), 1, label);
24498 if (expected_size == -1
24499 || expected_size < (desired_align - align) / 2 + size_needed)
24500 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24501 else
24502 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24503 }
24504 }
24505 if (label && size_needed == 1)
24506 {
24507 emit_label (label);
24508 LABEL_NUSES (label) = 1;
24509 label = NULL;
24510 epilogue_size_needed = 1;
24511 if (issetmem)
24512 promoted_val = val_exp;
24513 }
24514 else if (label == NULL_RTX && !misaligned_prologue_used)
24515 epilogue_size_needed = size_needed;
24516
24517 /* Step 3: Main loop. */
24518
24519 switch (alg)
24520 {
24521 case libcall:
24522 case no_stringop:
24523 case last_alg:
24524 gcc_unreachable ();
24525 case loop_1_byte:
24526 case loop:
24527 case unrolled_loop:
24528 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24529 count_exp, move_mode, unroll_factor,
24530 expected_size, issetmem);
24531 break;
24532 case vector_loop:
24533 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24534 vec_promoted_val, count_exp, move_mode,
24535 unroll_factor, expected_size, issetmem);
24536 break;
24537 case rep_prefix_8_byte:
24538 case rep_prefix_4_byte:
24539 case rep_prefix_1_byte:
24540 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24541 val_exp, count_exp, move_mode, issetmem);
24542 break;
24543 }
24544 /* Adjust properly the offset of src and dest memory for aliasing. */
24545 if (CONST_INT_P (count_exp))
24546 {
24547 if (!issetmem)
24548 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24549 (count / size_needed) * size_needed);
24550 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24551 (count / size_needed) * size_needed);
24552 }
24553 else
24554 {
24555 if (!issetmem)
24556 src = change_address (src, BLKmode, srcreg);
24557 dst = change_address (dst, BLKmode, destreg);
24558 }
24559
24560 /* Step 4: Epilogue to copy the remaining bytes. */
24561 epilogue:
24562 if (label)
24563 {
24564 /* When the main loop is done, COUNT_EXP might hold original count,
24565 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24566 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24567 bytes. Compensate if needed. */
24568
24569 if (size_needed < epilogue_size_needed)
24570 {
24571 tmp =
24572 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24573 GEN_INT (size_needed - 1), count_exp, 1,
24574 OPTAB_DIRECT);
24575 if (tmp != count_exp)
24576 emit_move_insn (count_exp, tmp);
24577 }
24578 emit_label (label);
24579 LABEL_NUSES (label) = 1;
24580 }
24581
24582 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24583 {
24584 if (force_loopy_epilogue)
24585 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24586 epilogue_size_needed);
24587 else
24588 {
24589 if (issetmem)
24590 expand_setmem_epilogue (dst, destreg, promoted_val,
24591 vec_promoted_val, count_exp,
24592 epilogue_size_needed);
24593 else
24594 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24595 epilogue_size_needed);
24596 }
24597 }
24598 if (jump_around_label)
24599 emit_label (jump_around_label);
24600 return true;
24601 }
24602
24603
24604 /* Expand the appropriate insns for doing strlen if not just doing
24605 repnz; scasb
24606
24607 out = result, initialized with the start address
24608 align_rtx = alignment of the address.
24609 scratch = scratch register, initialized with the startaddress when
24610 not aligned, otherwise undefined
24611
24612 This is just the body. It needs the initializations mentioned above and
24613 some address computing at the end. These things are done in i386.md. */
24614
24615 static void
24616 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24617 {
24618 int align;
24619 rtx tmp;
24620 rtx_code_label *align_2_label = NULL;
24621 rtx_code_label *align_3_label = NULL;
24622 rtx_code_label *align_4_label = gen_label_rtx ();
24623 rtx_code_label *end_0_label = gen_label_rtx ();
24624 rtx mem;
24625 rtx tmpreg = gen_reg_rtx (SImode);
24626 rtx scratch = gen_reg_rtx (SImode);
24627 rtx cmp;
24628
24629 align = 0;
24630 if (CONST_INT_P (align_rtx))
24631 align = INTVAL (align_rtx);
24632
24633 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24634
24635 /* Is there a known alignment and is it less than 4? */
24636 if (align < 4)
24637 {
24638 rtx scratch1 = gen_reg_rtx (Pmode);
24639 emit_move_insn (scratch1, out);
24640 /* Is there a known alignment and is it not 2? */
24641 if (align != 2)
24642 {
24643 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24644 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24645
24646 /* Leave just the 3 lower bits. */
24647 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24648 NULL_RTX, 0, OPTAB_WIDEN);
24649
24650 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24651 Pmode, 1, align_4_label);
24652 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24653 Pmode, 1, align_2_label);
24654 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24655 Pmode, 1, align_3_label);
24656 }
24657 else
24658 {
24659 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24660 check if is aligned to 4 - byte. */
24661
24662 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24663 NULL_RTX, 0, OPTAB_WIDEN);
24664
24665 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24666 Pmode, 1, align_4_label);
24667 }
24668
24669 mem = change_address (src, QImode, out);
24670
24671 /* Now compare the bytes. */
24672
24673 /* Compare the first n unaligned byte on a byte per byte basis. */
24674 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24675 QImode, 1, end_0_label);
24676
24677 /* Increment the address. */
24678 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24679
24680 /* Not needed with an alignment of 2 */
24681 if (align != 2)
24682 {
24683 emit_label (align_2_label);
24684
24685 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24686 end_0_label);
24687
24688 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24689
24690 emit_label (align_3_label);
24691 }
24692
24693 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24694 end_0_label);
24695
24696 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24697 }
24698
24699 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24700 align this loop. It gives only huge programs, but does not help to
24701 speed up. */
24702 emit_label (align_4_label);
24703
24704 mem = change_address (src, SImode, out);
24705 emit_move_insn (scratch, mem);
24706 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24707
24708 /* This formula yields a nonzero result iff one of the bytes is zero.
24709 This saves three branches inside loop and many cycles. */
24710
24711 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24712 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24713 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24714 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24715 gen_int_mode (0x80808080, SImode)));
24716 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24717 align_4_label);
24718
24719 if (TARGET_CMOVE)
24720 {
24721 rtx reg = gen_reg_rtx (SImode);
24722 rtx reg2 = gen_reg_rtx (Pmode);
24723 emit_move_insn (reg, tmpreg);
24724 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24725
24726 /* If zero is not in the first two bytes, move two bytes forward. */
24727 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24728 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24729 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24730 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24731 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24732 reg,
24733 tmpreg)));
24734 /* Emit lea manually to avoid clobbering of flags. */
24735 emit_insn (gen_rtx_SET (SImode, reg2,
24736 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24737
24738 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24739 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24740 emit_insn (gen_rtx_SET (VOIDmode, out,
24741 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24742 reg2,
24743 out)));
24744 }
24745 else
24746 {
24747 rtx_code_label *end_2_label = gen_label_rtx ();
24748 /* Is zero in the first two bytes? */
24749
24750 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24751 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24752 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24753 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24754 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24755 pc_rtx);
24756 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24757 JUMP_LABEL (tmp) = end_2_label;
24758
24759 /* Not in the first two. Move two bytes forward. */
24760 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24761 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24762
24763 emit_label (end_2_label);
24764
24765 }
24766
24767 /* Avoid branch in fixing the byte. */
24768 tmpreg = gen_lowpart (QImode, tmpreg);
24769 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24770 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24771 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24772 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24773
24774 emit_label (end_0_label);
24775 }
24776
24777 /* Expand strlen. */
24778
24779 bool
24780 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24781 {
24782 rtx addr, scratch1, scratch2, scratch3, scratch4;
24783
24784 /* The generic case of strlen expander is long. Avoid it's
24785 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24786
24787 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24788 && !TARGET_INLINE_ALL_STRINGOPS
24789 && !optimize_insn_for_size_p ()
24790 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24791 return false;
24792
24793 addr = force_reg (Pmode, XEXP (src, 0));
24794 scratch1 = gen_reg_rtx (Pmode);
24795
24796 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24797 && !optimize_insn_for_size_p ())
24798 {
24799 /* Well it seems that some optimizer does not combine a call like
24800 foo(strlen(bar), strlen(bar));
24801 when the move and the subtraction is done here. It does calculate
24802 the length just once when these instructions are done inside of
24803 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24804 often used and I use one fewer register for the lifetime of
24805 output_strlen_unroll() this is better. */
24806
24807 emit_move_insn (out, addr);
24808
24809 ix86_expand_strlensi_unroll_1 (out, src, align);
24810
24811 /* strlensi_unroll_1 returns the address of the zero at the end of
24812 the string, like memchr(), so compute the length by subtracting
24813 the start address. */
24814 emit_insn (ix86_gen_sub3 (out, out, addr));
24815 }
24816 else
24817 {
24818 rtx unspec;
24819
24820 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24821 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24822 return false;
24823
24824 scratch2 = gen_reg_rtx (Pmode);
24825 scratch3 = gen_reg_rtx (Pmode);
24826 scratch4 = force_reg (Pmode, constm1_rtx);
24827
24828 emit_move_insn (scratch3, addr);
24829 eoschar = force_reg (QImode, eoschar);
24830
24831 src = replace_equiv_address_nv (src, scratch3);
24832
24833 /* If .md starts supporting :P, this can be done in .md. */
24834 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24835 scratch4), UNSPEC_SCAS);
24836 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24837 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24838 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24839 }
24840 return true;
24841 }
24842
24843 /* For given symbol (function) construct code to compute address of it's PLT
24844 entry in large x86-64 PIC model. */
24845 static rtx
24846 construct_plt_address (rtx symbol)
24847 {
24848 rtx tmp, unspec;
24849
24850 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24851 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24852 gcc_assert (Pmode == DImode);
24853
24854 tmp = gen_reg_rtx (Pmode);
24855 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24856
24857 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24858 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24859 return tmp;
24860 }
24861
24862 rtx
24863 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24864 rtx callarg2,
24865 rtx pop, bool sibcall)
24866 {
24867 unsigned int const cregs_size
24868 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24869 rtx vec[3 + cregs_size];
24870 rtx use = NULL, call;
24871 unsigned int vec_len = 0;
24872
24873 if (pop == const0_rtx)
24874 pop = NULL;
24875 gcc_assert (!TARGET_64BIT || !pop);
24876
24877 if (TARGET_MACHO && !TARGET_64BIT)
24878 {
24879 #if TARGET_MACHO
24880 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24881 fnaddr = machopic_indirect_call_target (fnaddr);
24882 #endif
24883 }
24884 else
24885 {
24886 /* Static functions and indirect calls don't need the pic register. */
24887 if (flag_pic
24888 && (!TARGET_64BIT
24889 || (ix86_cmodel == CM_LARGE_PIC
24890 && DEFAULT_ABI != MS_ABI))
24891 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24892 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24893 use_reg (&use, pic_offset_table_rtx);
24894 }
24895
24896 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24897 {
24898 rtx al = gen_rtx_REG (QImode, AX_REG);
24899 emit_move_insn (al, callarg2);
24900 use_reg (&use, al);
24901 }
24902
24903 if (ix86_cmodel == CM_LARGE_PIC
24904 && !TARGET_PECOFF
24905 && MEM_P (fnaddr)
24906 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24907 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24908 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24909 else if (sibcall
24910 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24911 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24912 {
24913 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24914 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24915 }
24916
24917 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24918 if (retval)
24919 call = gen_rtx_SET (VOIDmode, retval, call);
24920 vec[vec_len++] = call;
24921
24922 if (pop)
24923 {
24924 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24925 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24926 vec[vec_len++] = pop;
24927 }
24928
24929 if (TARGET_64BIT_MS_ABI
24930 && (!callarg2 || INTVAL (callarg2) != -2))
24931 {
24932 unsigned i;
24933
24934 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24935 UNSPEC_MS_TO_SYSV_CALL);
24936
24937 for (i = 0; i < cregs_size; i++)
24938 {
24939 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24940 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24941
24942 vec[vec_len++]
24943 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24944 }
24945 }
24946
24947 if (vec_len > 1)
24948 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24949 call = emit_call_insn (call);
24950 if (use)
24951 CALL_INSN_FUNCTION_USAGE (call) = use;
24952
24953 return call;
24954 }
24955
24956 /* Output the assembly for a call instruction. */
24957
24958 const char *
24959 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
24960 {
24961 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24962 bool seh_nop_p = false;
24963 const char *xasm;
24964
24965 if (SIBLING_CALL_P (insn))
24966 {
24967 if (direct_p)
24968 xasm = "jmp\t%P0";
24969 /* SEH epilogue detection requires the indirect branch case
24970 to include REX.W. */
24971 else if (TARGET_SEH)
24972 xasm = "rex.W jmp %A0";
24973 else
24974 xasm = "jmp\t%A0";
24975
24976 output_asm_insn (xasm, &call_op);
24977 return "";
24978 }
24979
24980 /* SEH unwinding can require an extra nop to be emitted in several
24981 circumstances. Determine if we have one of those. */
24982 if (TARGET_SEH)
24983 {
24984 rtx_insn *i;
24985
24986 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24987 {
24988 /* If we get to another real insn, we don't need the nop. */
24989 if (INSN_P (i))
24990 break;
24991
24992 /* If we get to the epilogue note, prevent a catch region from
24993 being adjacent to the standard epilogue sequence. If non-
24994 call-exceptions, we'll have done this during epilogue emission. */
24995 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24996 && !flag_non_call_exceptions
24997 && !can_throw_internal (insn))
24998 {
24999 seh_nop_p = true;
25000 break;
25001 }
25002 }
25003
25004 /* If we didn't find a real insn following the call, prevent the
25005 unwinder from looking into the next function. */
25006 if (i == NULL)
25007 seh_nop_p = true;
25008 }
25009
25010 if (direct_p)
25011 xasm = "call\t%P0";
25012 else
25013 xasm = "call\t%A0";
25014
25015 output_asm_insn (xasm, &call_op);
25016
25017 if (seh_nop_p)
25018 return "nop";
25019
25020 return "";
25021 }
25022 \f
25023 /* Clear stack slot assignments remembered from previous functions.
25024 This is called from INIT_EXPANDERS once before RTL is emitted for each
25025 function. */
25026
25027 static struct machine_function *
25028 ix86_init_machine_status (void)
25029 {
25030 struct machine_function *f;
25031
25032 f = ggc_cleared_alloc<machine_function> ();
25033 f->use_fast_prologue_epilogue_nregs = -1;
25034 f->call_abi = ix86_abi;
25035
25036 return f;
25037 }
25038
25039 /* Return a MEM corresponding to a stack slot with mode MODE.
25040 Allocate a new slot if necessary.
25041
25042 The RTL for a function can have several slots available: N is
25043 which slot to use. */
25044
25045 rtx
25046 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25047 {
25048 struct stack_local_entry *s;
25049
25050 gcc_assert (n < MAX_386_STACK_LOCALS);
25051
25052 for (s = ix86_stack_locals; s; s = s->next)
25053 if (s->mode == mode && s->n == n)
25054 return validize_mem (copy_rtx (s->rtl));
25055
25056 s = ggc_alloc<stack_local_entry> ();
25057 s->n = n;
25058 s->mode = mode;
25059 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25060
25061 s->next = ix86_stack_locals;
25062 ix86_stack_locals = s;
25063 return validize_mem (copy_rtx (s->rtl));
25064 }
25065
25066 static void
25067 ix86_instantiate_decls (void)
25068 {
25069 struct stack_local_entry *s;
25070
25071 for (s = ix86_stack_locals; s; s = s->next)
25072 if (s->rtl != NULL_RTX)
25073 instantiate_decl_rtl (s->rtl);
25074 }
25075 \f
25076 /* Check whether x86 address PARTS is a pc-relative address. */
25077
25078 static bool
25079 rip_relative_addr_p (struct ix86_address *parts)
25080 {
25081 rtx base, index, disp;
25082
25083 base = parts->base;
25084 index = parts->index;
25085 disp = parts->disp;
25086
25087 if (disp && !base && !index)
25088 {
25089 if (TARGET_64BIT)
25090 {
25091 rtx symbol = disp;
25092
25093 if (GET_CODE (disp) == CONST)
25094 symbol = XEXP (disp, 0);
25095 if (GET_CODE (symbol) == PLUS
25096 && CONST_INT_P (XEXP (symbol, 1)))
25097 symbol = XEXP (symbol, 0);
25098
25099 if (GET_CODE (symbol) == LABEL_REF
25100 || (GET_CODE (symbol) == SYMBOL_REF
25101 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25102 || (GET_CODE (symbol) == UNSPEC
25103 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25104 || XINT (symbol, 1) == UNSPEC_PCREL
25105 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25106 return true;
25107 }
25108 }
25109 return false;
25110 }
25111
25112 /* Calculate the length of the memory address in the instruction encoding.
25113 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25114 or other prefixes. We never generate addr32 prefix for LEA insn. */
25115
25116 int
25117 memory_address_length (rtx addr, bool lea)
25118 {
25119 struct ix86_address parts;
25120 rtx base, index, disp;
25121 int len;
25122 int ok;
25123
25124 if (GET_CODE (addr) == PRE_DEC
25125 || GET_CODE (addr) == POST_INC
25126 || GET_CODE (addr) == PRE_MODIFY
25127 || GET_CODE (addr) == POST_MODIFY)
25128 return 0;
25129
25130 ok = ix86_decompose_address (addr, &parts);
25131 gcc_assert (ok);
25132
25133 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25134
25135 /* If this is not LEA instruction, add the length of addr32 prefix. */
25136 if (TARGET_64BIT && !lea
25137 && (SImode_address_operand (addr, VOIDmode)
25138 || (parts.base && GET_MODE (parts.base) == SImode)
25139 || (parts.index && GET_MODE (parts.index) == SImode)))
25140 len++;
25141
25142 base = parts.base;
25143 index = parts.index;
25144 disp = parts.disp;
25145
25146 if (base && GET_CODE (base) == SUBREG)
25147 base = SUBREG_REG (base);
25148 if (index && GET_CODE (index) == SUBREG)
25149 index = SUBREG_REG (index);
25150
25151 gcc_assert (base == NULL_RTX || REG_P (base));
25152 gcc_assert (index == NULL_RTX || REG_P (index));
25153
25154 /* Rule of thumb:
25155 - esp as the base always wants an index,
25156 - ebp as the base always wants a displacement,
25157 - r12 as the base always wants an index,
25158 - r13 as the base always wants a displacement. */
25159
25160 /* Register Indirect. */
25161 if (base && !index && !disp)
25162 {
25163 /* esp (for its index) and ebp (for its displacement) need
25164 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25165 code. */
25166 if (base == arg_pointer_rtx
25167 || base == frame_pointer_rtx
25168 || REGNO (base) == SP_REG
25169 || REGNO (base) == BP_REG
25170 || REGNO (base) == R12_REG
25171 || REGNO (base) == R13_REG)
25172 len++;
25173 }
25174
25175 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25176 is not disp32, but disp32(%rip), so for disp32
25177 SIB byte is needed, unless print_operand_address
25178 optimizes it into disp32(%rip) or (%rip) is implied
25179 by UNSPEC. */
25180 else if (disp && !base && !index)
25181 {
25182 len += 4;
25183 if (rip_relative_addr_p (&parts))
25184 len++;
25185 }
25186 else
25187 {
25188 /* Find the length of the displacement constant. */
25189 if (disp)
25190 {
25191 if (base && satisfies_constraint_K (disp))
25192 len += 1;
25193 else
25194 len += 4;
25195 }
25196 /* ebp always wants a displacement. Similarly r13. */
25197 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25198 len++;
25199
25200 /* An index requires the two-byte modrm form.... */
25201 if (index
25202 /* ...like esp (or r12), which always wants an index. */
25203 || base == arg_pointer_rtx
25204 || base == frame_pointer_rtx
25205 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25206 len++;
25207 }
25208
25209 return len;
25210 }
25211
25212 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25213 is set, expect that insn have 8bit immediate alternative. */
25214 int
25215 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25216 {
25217 int len = 0;
25218 int i;
25219 extract_insn_cached (insn);
25220 for (i = recog_data.n_operands - 1; i >= 0; --i)
25221 if (CONSTANT_P (recog_data.operand[i]))
25222 {
25223 enum attr_mode mode = get_attr_mode (insn);
25224
25225 gcc_assert (!len);
25226 if (shortform && CONST_INT_P (recog_data.operand[i]))
25227 {
25228 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25229 switch (mode)
25230 {
25231 case MODE_QI:
25232 len = 1;
25233 continue;
25234 case MODE_HI:
25235 ival = trunc_int_for_mode (ival, HImode);
25236 break;
25237 case MODE_SI:
25238 ival = trunc_int_for_mode (ival, SImode);
25239 break;
25240 default:
25241 break;
25242 }
25243 if (IN_RANGE (ival, -128, 127))
25244 {
25245 len = 1;
25246 continue;
25247 }
25248 }
25249 switch (mode)
25250 {
25251 case MODE_QI:
25252 len = 1;
25253 break;
25254 case MODE_HI:
25255 len = 2;
25256 break;
25257 case MODE_SI:
25258 len = 4;
25259 break;
25260 /* Immediates for DImode instructions are encoded
25261 as 32bit sign extended values. */
25262 case MODE_DI:
25263 len = 4;
25264 break;
25265 default:
25266 fatal_insn ("unknown insn mode", insn);
25267 }
25268 }
25269 return len;
25270 }
25271
25272 /* Compute default value for "length_address" attribute. */
25273 int
25274 ix86_attr_length_address_default (rtx_insn *insn)
25275 {
25276 int i;
25277
25278 if (get_attr_type (insn) == TYPE_LEA)
25279 {
25280 rtx set = PATTERN (insn), addr;
25281
25282 if (GET_CODE (set) == PARALLEL)
25283 set = XVECEXP (set, 0, 0);
25284
25285 gcc_assert (GET_CODE (set) == SET);
25286
25287 addr = SET_SRC (set);
25288
25289 return memory_address_length (addr, true);
25290 }
25291
25292 extract_insn_cached (insn);
25293 for (i = recog_data.n_operands - 1; i >= 0; --i)
25294 if (MEM_P (recog_data.operand[i]))
25295 {
25296 constrain_operands_cached (reload_completed);
25297 if (which_alternative != -1)
25298 {
25299 const char *constraints = recog_data.constraints[i];
25300 int alt = which_alternative;
25301
25302 while (*constraints == '=' || *constraints == '+')
25303 constraints++;
25304 while (alt-- > 0)
25305 while (*constraints++ != ',')
25306 ;
25307 /* Skip ignored operands. */
25308 if (*constraints == 'X')
25309 continue;
25310 }
25311 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25312 }
25313 return 0;
25314 }
25315
25316 /* Compute default value for "length_vex" attribute. It includes
25317 2 or 3 byte VEX prefix and 1 opcode byte. */
25318
25319 int
25320 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25321 bool has_vex_w)
25322 {
25323 int i;
25324
25325 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25326 byte VEX prefix. */
25327 if (!has_0f_opcode || has_vex_w)
25328 return 3 + 1;
25329
25330 /* We can always use 2 byte VEX prefix in 32bit. */
25331 if (!TARGET_64BIT)
25332 return 2 + 1;
25333
25334 extract_insn_cached (insn);
25335
25336 for (i = recog_data.n_operands - 1; i >= 0; --i)
25337 if (REG_P (recog_data.operand[i]))
25338 {
25339 /* REX.W bit uses 3 byte VEX prefix. */
25340 if (GET_MODE (recog_data.operand[i]) == DImode
25341 && GENERAL_REG_P (recog_data.operand[i]))
25342 return 3 + 1;
25343 }
25344 else
25345 {
25346 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25347 if (MEM_P (recog_data.operand[i])
25348 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25349 return 3 + 1;
25350 }
25351
25352 return 2 + 1;
25353 }
25354 \f
25355 /* Return the maximum number of instructions a cpu can issue. */
25356
25357 static int
25358 ix86_issue_rate (void)
25359 {
25360 switch (ix86_tune)
25361 {
25362 case PROCESSOR_PENTIUM:
25363 case PROCESSOR_BONNELL:
25364 case PROCESSOR_SILVERMONT:
25365 case PROCESSOR_INTEL:
25366 case PROCESSOR_K6:
25367 case PROCESSOR_BTVER2:
25368 case PROCESSOR_PENTIUM4:
25369 case PROCESSOR_NOCONA:
25370 return 2;
25371
25372 case PROCESSOR_PENTIUMPRO:
25373 case PROCESSOR_ATHLON:
25374 case PROCESSOR_K8:
25375 case PROCESSOR_AMDFAM10:
25376 case PROCESSOR_GENERIC:
25377 case PROCESSOR_BTVER1:
25378 return 3;
25379
25380 case PROCESSOR_BDVER1:
25381 case PROCESSOR_BDVER2:
25382 case PROCESSOR_BDVER3:
25383 case PROCESSOR_BDVER4:
25384 case PROCESSOR_CORE2:
25385 case PROCESSOR_NEHALEM:
25386 case PROCESSOR_SANDYBRIDGE:
25387 case PROCESSOR_HASWELL:
25388 return 4;
25389
25390 default:
25391 return 1;
25392 }
25393 }
25394
25395 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25396 by DEP_INSN and nothing set by DEP_INSN. */
25397
25398 static bool
25399 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25400 {
25401 rtx set, set2;
25402
25403 /* Simplify the test for uninteresting insns. */
25404 if (insn_type != TYPE_SETCC
25405 && insn_type != TYPE_ICMOV
25406 && insn_type != TYPE_FCMOV
25407 && insn_type != TYPE_IBR)
25408 return false;
25409
25410 if ((set = single_set (dep_insn)) != 0)
25411 {
25412 set = SET_DEST (set);
25413 set2 = NULL_RTX;
25414 }
25415 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25416 && XVECLEN (PATTERN (dep_insn), 0) == 2
25417 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25418 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25419 {
25420 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25421 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25422 }
25423 else
25424 return false;
25425
25426 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25427 return false;
25428
25429 /* This test is true if the dependent insn reads the flags but
25430 not any other potentially set register. */
25431 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25432 return false;
25433
25434 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25435 return false;
25436
25437 return true;
25438 }
25439
25440 /* Return true iff USE_INSN has a memory address with operands set by
25441 SET_INSN. */
25442
25443 bool
25444 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25445 {
25446 int i;
25447 extract_insn_cached (use_insn);
25448 for (i = recog_data.n_operands - 1; i >= 0; --i)
25449 if (MEM_P (recog_data.operand[i]))
25450 {
25451 rtx addr = XEXP (recog_data.operand[i], 0);
25452 return modified_in_p (addr, set_insn) != 0;
25453 }
25454 return false;
25455 }
25456
25457 /* Helper function for exact_store_load_dependency.
25458 Return true if addr is found in insn. */
25459 static bool
25460 exact_dependency_1 (rtx addr, rtx insn)
25461 {
25462 enum rtx_code code;
25463 const char *format_ptr;
25464 int i, j;
25465
25466 code = GET_CODE (insn);
25467 switch (code)
25468 {
25469 case MEM:
25470 if (rtx_equal_p (addr, insn))
25471 return true;
25472 break;
25473 case REG:
25474 CASE_CONST_ANY:
25475 case SYMBOL_REF:
25476 case CODE_LABEL:
25477 case PC:
25478 case CC0:
25479 case EXPR_LIST:
25480 return false;
25481 default:
25482 break;
25483 }
25484
25485 format_ptr = GET_RTX_FORMAT (code);
25486 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25487 {
25488 switch (*format_ptr++)
25489 {
25490 case 'e':
25491 if (exact_dependency_1 (addr, XEXP (insn, i)))
25492 return true;
25493 break;
25494 case 'E':
25495 for (j = 0; j < XVECLEN (insn, i); j++)
25496 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25497 return true;
25498 break;
25499 }
25500 }
25501 return false;
25502 }
25503
25504 /* Return true if there exists exact dependency for store & load, i.e.
25505 the same memory address is used in them. */
25506 static bool
25507 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25508 {
25509 rtx set1, set2;
25510
25511 set1 = single_set (store);
25512 if (!set1)
25513 return false;
25514 if (!MEM_P (SET_DEST (set1)))
25515 return false;
25516 set2 = single_set (load);
25517 if (!set2)
25518 return false;
25519 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25520 return true;
25521 return false;
25522 }
25523
25524 static int
25525 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25526 {
25527 enum attr_type insn_type, dep_insn_type;
25528 enum attr_memory memory;
25529 rtx set, set2;
25530 int dep_insn_code_number;
25531
25532 /* Anti and output dependencies have zero cost on all CPUs. */
25533 if (REG_NOTE_KIND (link) != 0)
25534 return 0;
25535
25536 dep_insn_code_number = recog_memoized (dep_insn);
25537
25538 /* If we can't recognize the insns, we can't really do anything. */
25539 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25540 return cost;
25541
25542 insn_type = get_attr_type (insn);
25543 dep_insn_type = get_attr_type (dep_insn);
25544
25545 switch (ix86_tune)
25546 {
25547 case PROCESSOR_PENTIUM:
25548 /* Address Generation Interlock adds a cycle of latency. */
25549 if (insn_type == TYPE_LEA)
25550 {
25551 rtx addr = PATTERN (insn);
25552
25553 if (GET_CODE (addr) == PARALLEL)
25554 addr = XVECEXP (addr, 0, 0);
25555
25556 gcc_assert (GET_CODE (addr) == SET);
25557
25558 addr = SET_SRC (addr);
25559 if (modified_in_p (addr, dep_insn))
25560 cost += 1;
25561 }
25562 else if (ix86_agi_dependent (dep_insn, insn))
25563 cost += 1;
25564
25565 /* ??? Compares pair with jump/setcc. */
25566 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25567 cost = 0;
25568
25569 /* Floating point stores require value to be ready one cycle earlier. */
25570 if (insn_type == TYPE_FMOV
25571 && get_attr_memory (insn) == MEMORY_STORE
25572 && !ix86_agi_dependent (dep_insn, insn))
25573 cost += 1;
25574 break;
25575
25576 case PROCESSOR_PENTIUMPRO:
25577 /* INT->FP conversion is expensive. */
25578 if (get_attr_fp_int_src (dep_insn))
25579 cost += 5;
25580
25581 /* There is one cycle extra latency between an FP op and a store. */
25582 if (insn_type == TYPE_FMOV
25583 && (set = single_set (dep_insn)) != NULL_RTX
25584 && (set2 = single_set (insn)) != NULL_RTX
25585 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25586 && MEM_P (SET_DEST (set2)))
25587 cost += 1;
25588
25589 memory = get_attr_memory (insn);
25590
25591 /* Show ability of reorder buffer to hide latency of load by executing
25592 in parallel with previous instruction in case
25593 previous instruction is not needed to compute the address. */
25594 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25595 && !ix86_agi_dependent (dep_insn, insn))
25596 {
25597 /* Claim moves to take one cycle, as core can issue one load
25598 at time and the next load can start cycle later. */
25599 if (dep_insn_type == TYPE_IMOV
25600 || dep_insn_type == TYPE_FMOV)
25601 cost = 1;
25602 else if (cost > 1)
25603 cost--;
25604 }
25605 break;
25606
25607 case PROCESSOR_K6:
25608 /* The esp dependency is resolved before
25609 the instruction is really finished. */
25610 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25611 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25612 return 1;
25613
25614 /* INT->FP conversion is expensive. */
25615 if (get_attr_fp_int_src (dep_insn))
25616 cost += 5;
25617
25618 memory = get_attr_memory (insn);
25619
25620 /* Show ability of reorder buffer to hide latency of load by executing
25621 in parallel with previous instruction in case
25622 previous instruction is not needed to compute the address. */
25623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25624 && !ix86_agi_dependent (dep_insn, insn))
25625 {
25626 /* Claim moves to take one cycle, as core can issue one load
25627 at time and the next load can start cycle later. */
25628 if (dep_insn_type == TYPE_IMOV
25629 || dep_insn_type == TYPE_FMOV)
25630 cost = 1;
25631 else if (cost > 2)
25632 cost -= 2;
25633 else
25634 cost = 1;
25635 }
25636 break;
25637
25638 case PROCESSOR_AMDFAM10:
25639 case PROCESSOR_BDVER1:
25640 case PROCESSOR_BDVER2:
25641 case PROCESSOR_BDVER3:
25642 case PROCESSOR_BDVER4:
25643 case PROCESSOR_BTVER1:
25644 case PROCESSOR_BTVER2:
25645 case PROCESSOR_GENERIC:
25646 /* Stack engine allows to execute push&pop instructions in parall. */
25647 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25648 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25649 return 0;
25650 /* FALLTHRU */
25651
25652 case PROCESSOR_ATHLON:
25653 case PROCESSOR_K8:
25654 memory = get_attr_memory (insn);
25655
25656 /* Show ability of reorder buffer to hide latency of load by executing
25657 in parallel with previous instruction in case
25658 previous instruction is not needed to compute the address. */
25659 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25660 && !ix86_agi_dependent (dep_insn, insn))
25661 {
25662 enum attr_unit unit = get_attr_unit (insn);
25663 int loadcost = 3;
25664
25665 /* Because of the difference between the length of integer and
25666 floating unit pipeline preparation stages, the memory operands
25667 for floating point are cheaper.
25668
25669 ??? For Athlon it the difference is most probably 2. */
25670 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25671 loadcost = 3;
25672 else
25673 loadcost = TARGET_ATHLON ? 2 : 0;
25674
25675 if (cost >= loadcost)
25676 cost -= loadcost;
25677 else
25678 cost = 0;
25679 }
25680 break;
25681
25682 case PROCESSOR_CORE2:
25683 case PROCESSOR_NEHALEM:
25684 case PROCESSOR_SANDYBRIDGE:
25685 case PROCESSOR_HASWELL:
25686 /* Stack engine allows to execute push&pop instructions in parall. */
25687 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25688 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25689 return 0;
25690
25691 memory = get_attr_memory (insn);
25692
25693 /* Show ability of reorder buffer to hide latency of load by executing
25694 in parallel with previous instruction in case
25695 previous instruction is not needed to compute the address. */
25696 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25697 && !ix86_agi_dependent (dep_insn, insn))
25698 {
25699 if (cost >= 4)
25700 cost -= 4;
25701 else
25702 cost = 0;
25703 }
25704 break;
25705
25706 case PROCESSOR_SILVERMONT:
25707 case PROCESSOR_INTEL:
25708 if (!reload_completed)
25709 return cost;
25710
25711 /* Increase cost of integer loads. */
25712 memory = get_attr_memory (dep_insn);
25713 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25714 {
25715 enum attr_unit unit = get_attr_unit (dep_insn);
25716 if (unit == UNIT_INTEGER && cost == 1)
25717 {
25718 if (memory == MEMORY_LOAD)
25719 cost = 3;
25720 else
25721 {
25722 /* Increase cost of ld/st for short int types only
25723 because of store forwarding issue. */
25724 rtx set = single_set (dep_insn);
25725 if (set && (GET_MODE (SET_DEST (set)) == QImode
25726 || GET_MODE (SET_DEST (set)) == HImode))
25727 {
25728 /* Increase cost of store/load insn if exact
25729 dependence exists and it is load insn. */
25730 enum attr_memory insn_memory = get_attr_memory (insn);
25731 if (insn_memory == MEMORY_LOAD
25732 && exact_store_load_dependency (dep_insn, insn))
25733 cost = 3;
25734 }
25735 }
25736 }
25737 }
25738
25739 default:
25740 break;
25741 }
25742
25743 return cost;
25744 }
25745
25746 /* How many alternative schedules to try. This should be as wide as the
25747 scheduling freedom in the DFA, but no wider. Making this value too
25748 large results extra work for the scheduler. */
25749
25750 static int
25751 ia32_multipass_dfa_lookahead (void)
25752 {
25753 switch (ix86_tune)
25754 {
25755 case PROCESSOR_PENTIUM:
25756 return 2;
25757
25758 case PROCESSOR_PENTIUMPRO:
25759 case PROCESSOR_K6:
25760 return 1;
25761
25762 case PROCESSOR_BDVER1:
25763 case PROCESSOR_BDVER2:
25764 case PROCESSOR_BDVER3:
25765 case PROCESSOR_BDVER4:
25766 /* We use lookahead value 4 for BD both before and after reload
25767 schedules. Plan is to have value 8 included for O3. */
25768 return 4;
25769
25770 case PROCESSOR_CORE2:
25771 case PROCESSOR_NEHALEM:
25772 case PROCESSOR_SANDYBRIDGE:
25773 case PROCESSOR_HASWELL:
25774 case PROCESSOR_BONNELL:
25775 case PROCESSOR_SILVERMONT:
25776 case PROCESSOR_INTEL:
25777 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25778 as many instructions can be executed on a cycle, i.e.,
25779 issue_rate. I wonder why tuning for many CPUs does not do this. */
25780 if (reload_completed)
25781 return ix86_issue_rate ();
25782 /* Don't use lookahead for pre-reload schedule to save compile time. */
25783 return 0;
25784
25785 default:
25786 return 0;
25787 }
25788 }
25789
25790 /* Return true if target platform supports macro-fusion. */
25791
25792 static bool
25793 ix86_macro_fusion_p ()
25794 {
25795 return TARGET_FUSE_CMP_AND_BRANCH;
25796 }
25797
25798 /* Check whether current microarchitecture support macro fusion
25799 for insn pair "CONDGEN + CONDJMP". Refer to
25800 "Intel Architectures Optimization Reference Manual". */
25801
25802 static bool
25803 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25804 {
25805 rtx src, dest;
25806 enum rtx_code ccode;
25807 rtx compare_set = NULL_RTX, test_if, cond;
25808 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25809
25810 if (!any_condjump_p (condjmp))
25811 return false;
25812
25813 if (get_attr_type (condgen) != TYPE_TEST
25814 && get_attr_type (condgen) != TYPE_ICMP
25815 && get_attr_type (condgen) != TYPE_INCDEC
25816 && get_attr_type (condgen) != TYPE_ALU)
25817 return false;
25818
25819 compare_set = single_set (condgen);
25820 if (compare_set == NULL_RTX
25821 && !TARGET_FUSE_ALU_AND_BRANCH)
25822 return false;
25823
25824 if (compare_set == NULL_RTX)
25825 {
25826 int i;
25827 rtx pat = PATTERN (condgen);
25828 for (i = 0; i < XVECLEN (pat, 0); i++)
25829 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25830 {
25831 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25832 if (GET_CODE (set_src) == COMPARE)
25833 compare_set = XVECEXP (pat, 0, i);
25834 else
25835 alu_set = XVECEXP (pat, 0, i);
25836 }
25837 }
25838 if (compare_set == NULL_RTX)
25839 return false;
25840 src = SET_SRC (compare_set);
25841 if (GET_CODE (src) != COMPARE)
25842 return false;
25843
25844 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25845 supported. */
25846 if ((MEM_P (XEXP (src, 0))
25847 && CONST_INT_P (XEXP (src, 1)))
25848 || (MEM_P (XEXP (src, 1))
25849 && CONST_INT_P (XEXP (src, 0))))
25850 return false;
25851
25852 /* No fusion for RIP-relative address. */
25853 if (MEM_P (XEXP (src, 0)))
25854 addr = XEXP (XEXP (src, 0), 0);
25855 else if (MEM_P (XEXP (src, 1)))
25856 addr = XEXP (XEXP (src, 1), 0);
25857
25858 if (addr) {
25859 ix86_address parts;
25860 int ok = ix86_decompose_address (addr, &parts);
25861 gcc_assert (ok);
25862
25863 if (rip_relative_addr_p (&parts))
25864 return false;
25865 }
25866
25867 test_if = SET_SRC (pc_set (condjmp));
25868 cond = XEXP (test_if, 0);
25869 ccode = GET_CODE (cond);
25870 /* Check whether conditional jump use Sign or Overflow Flags. */
25871 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25872 && (ccode == GE
25873 || ccode == GT
25874 || ccode == LE
25875 || ccode == LT))
25876 return false;
25877
25878 /* Return true for TYPE_TEST and TYPE_ICMP. */
25879 if (get_attr_type (condgen) == TYPE_TEST
25880 || get_attr_type (condgen) == TYPE_ICMP)
25881 return true;
25882
25883 /* The following is the case that macro-fusion for alu + jmp. */
25884 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25885 return false;
25886
25887 /* No fusion for alu op with memory destination operand. */
25888 dest = SET_DEST (alu_set);
25889 if (MEM_P (dest))
25890 return false;
25891
25892 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25893 supported. */
25894 if (get_attr_type (condgen) == TYPE_INCDEC
25895 && (ccode == GEU
25896 || ccode == GTU
25897 || ccode == LEU
25898 || ccode == LTU))
25899 return false;
25900
25901 return true;
25902 }
25903
25904 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25905 execution. It is applied if
25906 (1) IMUL instruction is on the top of list;
25907 (2) There exists the only producer of independent IMUL instruction in
25908 ready list.
25909 Return index of IMUL producer if it was found and -1 otherwise. */
25910 static int
25911 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25912 {
25913 rtx_insn *insn;
25914 rtx set, insn1, insn2;
25915 sd_iterator_def sd_it;
25916 dep_t dep;
25917 int index = -1;
25918 int i;
25919
25920 if (!TARGET_BONNELL)
25921 return index;
25922
25923 /* Check that IMUL instruction is on the top of ready list. */
25924 insn = ready[n_ready - 1];
25925 set = single_set (insn);
25926 if (!set)
25927 return index;
25928 if (!(GET_CODE (SET_SRC (set)) == MULT
25929 && GET_MODE (SET_SRC (set)) == SImode))
25930 return index;
25931
25932 /* Search for producer of independent IMUL instruction. */
25933 for (i = n_ready - 2; i >= 0; i--)
25934 {
25935 insn = ready[i];
25936 if (!NONDEBUG_INSN_P (insn))
25937 continue;
25938 /* Skip IMUL instruction. */
25939 insn2 = PATTERN (insn);
25940 if (GET_CODE (insn2) == PARALLEL)
25941 insn2 = XVECEXP (insn2, 0, 0);
25942 if (GET_CODE (insn2) == SET
25943 && GET_CODE (SET_SRC (insn2)) == MULT
25944 && GET_MODE (SET_SRC (insn2)) == SImode)
25945 continue;
25946
25947 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25948 {
25949 rtx con;
25950 con = DEP_CON (dep);
25951 if (!NONDEBUG_INSN_P (con))
25952 continue;
25953 insn1 = PATTERN (con);
25954 if (GET_CODE (insn1) == PARALLEL)
25955 insn1 = XVECEXP (insn1, 0, 0);
25956
25957 if (GET_CODE (insn1) == SET
25958 && GET_CODE (SET_SRC (insn1)) == MULT
25959 && GET_MODE (SET_SRC (insn1)) == SImode)
25960 {
25961 sd_iterator_def sd_it1;
25962 dep_t dep1;
25963 /* Check if there is no other dependee for IMUL. */
25964 index = i;
25965 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25966 {
25967 rtx pro;
25968 pro = DEP_PRO (dep1);
25969 if (!NONDEBUG_INSN_P (pro))
25970 continue;
25971 if (pro != insn)
25972 index = -1;
25973 }
25974 if (index >= 0)
25975 break;
25976 }
25977 }
25978 if (index >= 0)
25979 break;
25980 }
25981 return index;
25982 }
25983
25984 /* Try to find the best candidate on the top of ready list if two insns
25985 have the same priority - candidate is best if its dependees were
25986 scheduled earlier. Applied for Silvermont only.
25987 Return true if top 2 insns must be interchanged. */
25988 static bool
25989 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
25990 {
25991 rtx_insn *top = ready[n_ready - 1];
25992 rtx_insn *next = ready[n_ready - 2];
25993 rtx set;
25994 sd_iterator_def sd_it;
25995 dep_t dep;
25996 int clock1 = -1;
25997 int clock2 = -1;
25998 #define INSN_TICK(INSN) (HID (INSN)->tick)
25999
26000 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26001 return false;
26002
26003 if (!NONDEBUG_INSN_P (top))
26004 return false;
26005 if (!NONJUMP_INSN_P (top))
26006 return false;
26007 if (!NONDEBUG_INSN_P (next))
26008 return false;
26009 if (!NONJUMP_INSN_P (next))
26010 return false;
26011 set = single_set (top);
26012 if (!set)
26013 return false;
26014 set = single_set (next);
26015 if (!set)
26016 return false;
26017
26018 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26019 {
26020 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26021 return false;
26022 /* Determine winner more precise. */
26023 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26024 {
26025 rtx pro;
26026 pro = DEP_PRO (dep);
26027 if (!NONDEBUG_INSN_P (pro))
26028 continue;
26029 if (INSN_TICK (pro) > clock1)
26030 clock1 = INSN_TICK (pro);
26031 }
26032 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26033 {
26034 rtx pro;
26035 pro = DEP_PRO (dep);
26036 if (!NONDEBUG_INSN_P (pro))
26037 continue;
26038 if (INSN_TICK (pro) > clock2)
26039 clock2 = INSN_TICK (pro);
26040 }
26041
26042 if (clock1 == clock2)
26043 {
26044 /* Determine winner - load must win. */
26045 enum attr_memory memory1, memory2;
26046 memory1 = get_attr_memory (top);
26047 memory2 = get_attr_memory (next);
26048 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26049 return true;
26050 }
26051 return (bool) (clock2 < clock1);
26052 }
26053 return false;
26054 #undef INSN_TICK
26055 }
26056
26057 /* Perform possible reodering of ready list for Atom/Silvermont only.
26058 Return issue rate. */
26059 static int
26060 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26061 int *pn_ready, int clock_var)
26062 {
26063 int issue_rate = -1;
26064 int n_ready = *pn_ready;
26065 int i;
26066 rtx_insn *insn;
26067 int index = -1;
26068
26069 /* Set up issue rate. */
26070 issue_rate = ix86_issue_rate ();
26071
26072 /* Do reodering for BONNELL/SILVERMONT only. */
26073 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26074 return issue_rate;
26075
26076 /* Nothing to do if ready list contains only 1 instruction. */
26077 if (n_ready <= 1)
26078 return issue_rate;
26079
26080 /* Do reodering for post-reload scheduler only. */
26081 if (!reload_completed)
26082 return issue_rate;
26083
26084 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26085 {
26086 if (sched_verbose > 1)
26087 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26088 INSN_UID (ready[index]));
26089
26090 /* Put IMUL producer (ready[index]) at the top of ready list. */
26091 insn = ready[index];
26092 for (i = index; i < n_ready - 1; i++)
26093 ready[i] = ready[i + 1];
26094 ready[n_ready - 1] = insn;
26095 return issue_rate;
26096 }
26097 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26098 {
26099 if (sched_verbose > 1)
26100 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26101 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26102 /* Swap 2 top elements of ready list. */
26103 insn = ready[n_ready - 1];
26104 ready[n_ready - 1] = ready[n_ready - 2];
26105 ready[n_ready - 2] = insn;
26106 }
26107 return issue_rate;
26108 }
26109
26110 static bool
26111 ix86_class_likely_spilled_p (reg_class_t);
26112
26113 /* Returns true if lhs of insn is HW function argument register and set up
26114 is_spilled to true if it is likely spilled HW register. */
26115 static bool
26116 insn_is_function_arg (rtx insn, bool* is_spilled)
26117 {
26118 rtx dst;
26119
26120 if (!NONDEBUG_INSN_P (insn))
26121 return false;
26122 /* Call instructions are not movable, ignore it. */
26123 if (CALL_P (insn))
26124 return false;
26125 insn = PATTERN (insn);
26126 if (GET_CODE (insn) == PARALLEL)
26127 insn = XVECEXP (insn, 0, 0);
26128 if (GET_CODE (insn) != SET)
26129 return false;
26130 dst = SET_DEST (insn);
26131 if (REG_P (dst) && HARD_REGISTER_P (dst)
26132 && ix86_function_arg_regno_p (REGNO (dst)))
26133 {
26134 /* Is it likely spilled HW register? */
26135 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26136 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26137 *is_spilled = true;
26138 return true;
26139 }
26140 return false;
26141 }
26142
26143 /* Add output dependencies for chain of function adjacent arguments if only
26144 there is a move to likely spilled HW register. Return first argument
26145 if at least one dependence was added or NULL otherwise. */
26146 static rtx_insn *
26147 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26148 {
26149 rtx_insn *insn;
26150 rtx_insn *last = call;
26151 rtx_insn *first_arg = NULL;
26152 bool is_spilled = false;
26153
26154 head = PREV_INSN (head);
26155
26156 /* Find nearest to call argument passing instruction. */
26157 while (true)
26158 {
26159 last = PREV_INSN (last);
26160 if (last == head)
26161 return NULL;
26162 if (!NONDEBUG_INSN_P (last))
26163 continue;
26164 if (insn_is_function_arg (last, &is_spilled))
26165 break;
26166 return NULL;
26167 }
26168
26169 first_arg = last;
26170 while (true)
26171 {
26172 insn = PREV_INSN (last);
26173 if (!INSN_P (insn))
26174 break;
26175 if (insn == head)
26176 break;
26177 if (!NONDEBUG_INSN_P (insn))
26178 {
26179 last = insn;
26180 continue;
26181 }
26182 if (insn_is_function_arg (insn, &is_spilled))
26183 {
26184 /* Add output depdendence between two function arguments if chain
26185 of output arguments contains likely spilled HW registers. */
26186 if (is_spilled)
26187 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26188 first_arg = last = insn;
26189 }
26190 else
26191 break;
26192 }
26193 if (!is_spilled)
26194 return NULL;
26195 return first_arg;
26196 }
26197
26198 /* Add output or anti dependency from insn to first_arg to restrict its code
26199 motion. */
26200 static void
26201 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26202 {
26203 rtx set;
26204 rtx tmp;
26205
26206 set = single_set (insn);
26207 if (!set)
26208 return;
26209 tmp = SET_DEST (set);
26210 if (REG_P (tmp))
26211 {
26212 /* Add output dependency to the first function argument. */
26213 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26214 return;
26215 }
26216 /* Add anti dependency. */
26217 add_dependence (first_arg, insn, REG_DEP_ANTI);
26218 }
26219
26220 /* Avoid cross block motion of function argument through adding dependency
26221 from the first non-jump instruction in bb. */
26222 static void
26223 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26224 {
26225 rtx_insn *insn = BB_END (bb);
26226
26227 while (insn)
26228 {
26229 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26230 {
26231 rtx set = single_set (insn);
26232 if (set)
26233 {
26234 avoid_func_arg_motion (arg, insn);
26235 return;
26236 }
26237 }
26238 if (insn == BB_HEAD (bb))
26239 return;
26240 insn = PREV_INSN (insn);
26241 }
26242 }
26243
26244 /* Hook for pre-reload schedule - avoid motion of function arguments
26245 passed in likely spilled HW registers. */
26246 static void
26247 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26248 {
26249 rtx_insn *insn;
26250 rtx_insn *first_arg = NULL;
26251 if (reload_completed)
26252 return;
26253 while (head != tail && DEBUG_INSN_P (head))
26254 head = NEXT_INSN (head);
26255 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26256 if (INSN_P (insn) && CALL_P (insn))
26257 {
26258 first_arg = add_parameter_dependencies (insn, head);
26259 if (first_arg)
26260 {
26261 /* Add dependee for first argument to predecessors if only
26262 region contains more than one block. */
26263 basic_block bb = BLOCK_FOR_INSN (insn);
26264 int rgn = CONTAINING_RGN (bb->index);
26265 int nr_blks = RGN_NR_BLOCKS (rgn);
26266 /* Skip trivial regions and region head blocks that can have
26267 predecessors outside of region. */
26268 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26269 {
26270 edge e;
26271 edge_iterator ei;
26272
26273 /* Regions are SCCs with the exception of selective
26274 scheduling with pipelining of outer blocks enabled.
26275 So also check that immediate predecessors of a non-head
26276 block are in the same region. */
26277 FOR_EACH_EDGE (e, ei, bb->preds)
26278 {
26279 /* Avoid creating of loop-carried dependencies through
26280 using topological ordering in the region. */
26281 if (rgn == CONTAINING_RGN (e->src->index)
26282 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26283 add_dependee_for_func_arg (first_arg, e->src);
26284 }
26285 }
26286 insn = first_arg;
26287 if (insn == head)
26288 break;
26289 }
26290 }
26291 else if (first_arg)
26292 avoid_func_arg_motion (first_arg, insn);
26293 }
26294
26295 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26296 HW registers to maximum, to schedule them at soon as possible. These are
26297 moves from function argument registers at the top of the function entry
26298 and moves from function return value registers after call. */
26299 static int
26300 ix86_adjust_priority (rtx_insn *insn, int priority)
26301 {
26302 rtx set;
26303
26304 if (reload_completed)
26305 return priority;
26306
26307 if (!NONDEBUG_INSN_P (insn))
26308 return priority;
26309
26310 set = single_set (insn);
26311 if (set)
26312 {
26313 rtx tmp = SET_SRC (set);
26314 if (REG_P (tmp)
26315 && HARD_REGISTER_P (tmp)
26316 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26317 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26318 return current_sched_info->sched_max_insns_priority;
26319 }
26320
26321 return priority;
26322 }
26323
26324 /* Model decoder of Core 2/i7.
26325 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26326 track the instruction fetch block boundaries and make sure that long
26327 (9+ bytes) instructions are assigned to D0. */
26328
26329 /* Maximum length of an insn that can be handled by
26330 a secondary decoder unit. '8' for Core 2/i7. */
26331 static int core2i7_secondary_decoder_max_insn_size;
26332
26333 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26334 '16' for Core 2/i7. */
26335 static int core2i7_ifetch_block_size;
26336
26337 /* Maximum number of instructions decoder can handle per cycle.
26338 '6' for Core 2/i7. */
26339 static int core2i7_ifetch_block_max_insns;
26340
26341 typedef struct ix86_first_cycle_multipass_data_ *
26342 ix86_first_cycle_multipass_data_t;
26343 typedef const struct ix86_first_cycle_multipass_data_ *
26344 const_ix86_first_cycle_multipass_data_t;
26345
26346 /* A variable to store target state across calls to max_issue within
26347 one cycle. */
26348 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26349 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26350
26351 /* Initialize DATA. */
26352 static void
26353 core2i7_first_cycle_multipass_init (void *_data)
26354 {
26355 ix86_first_cycle_multipass_data_t data
26356 = (ix86_first_cycle_multipass_data_t) _data;
26357
26358 data->ifetch_block_len = 0;
26359 data->ifetch_block_n_insns = 0;
26360 data->ready_try_change = NULL;
26361 data->ready_try_change_size = 0;
26362 }
26363
26364 /* Advancing the cycle; reset ifetch block counts. */
26365 static void
26366 core2i7_dfa_post_advance_cycle (void)
26367 {
26368 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26369
26370 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26371
26372 data->ifetch_block_len = 0;
26373 data->ifetch_block_n_insns = 0;
26374 }
26375
26376 static int min_insn_size (rtx);
26377
26378 /* Filter out insns from ready_try that the core will not be able to issue
26379 on current cycle due to decoder. */
26380 static void
26381 core2i7_first_cycle_multipass_filter_ready_try
26382 (const_ix86_first_cycle_multipass_data_t data,
26383 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26384 {
26385 while (n_ready--)
26386 {
26387 rtx insn;
26388 int insn_size;
26389
26390 if (ready_try[n_ready])
26391 continue;
26392
26393 insn = get_ready_element (n_ready);
26394 insn_size = min_insn_size (insn);
26395
26396 if (/* If this is a too long an insn for a secondary decoder ... */
26397 (!first_cycle_insn_p
26398 && insn_size > core2i7_secondary_decoder_max_insn_size)
26399 /* ... or it would not fit into the ifetch block ... */
26400 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26401 /* ... or the decoder is full already ... */
26402 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26403 /* ... mask the insn out. */
26404 {
26405 ready_try[n_ready] = 1;
26406
26407 if (data->ready_try_change)
26408 bitmap_set_bit (data->ready_try_change, n_ready);
26409 }
26410 }
26411 }
26412
26413 /* Prepare for a new round of multipass lookahead scheduling. */
26414 static void
26415 core2i7_first_cycle_multipass_begin (void *_data,
26416 signed char *ready_try, int n_ready,
26417 bool first_cycle_insn_p)
26418 {
26419 ix86_first_cycle_multipass_data_t data
26420 = (ix86_first_cycle_multipass_data_t) _data;
26421 const_ix86_first_cycle_multipass_data_t prev_data
26422 = ix86_first_cycle_multipass_data;
26423
26424 /* Restore the state from the end of the previous round. */
26425 data->ifetch_block_len = prev_data->ifetch_block_len;
26426 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26427
26428 /* Filter instructions that cannot be issued on current cycle due to
26429 decoder restrictions. */
26430 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26431 first_cycle_insn_p);
26432 }
26433
26434 /* INSN is being issued in current solution. Account for its impact on
26435 the decoder model. */
26436 static void
26437 core2i7_first_cycle_multipass_issue (void *_data,
26438 signed char *ready_try, int n_ready,
26439 rtx_insn *insn, const void *_prev_data)
26440 {
26441 ix86_first_cycle_multipass_data_t data
26442 = (ix86_first_cycle_multipass_data_t) _data;
26443 const_ix86_first_cycle_multipass_data_t prev_data
26444 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26445
26446 int insn_size = min_insn_size (insn);
26447
26448 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26449 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26450 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26451 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26452
26453 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26454 if (!data->ready_try_change)
26455 {
26456 data->ready_try_change = sbitmap_alloc (n_ready);
26457 data->ready_try_change_size = n_ready;
26458 }
26459 else if (data->ready_try_change_size < n_ready)
26460 {
26461 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26462 n_ready, 0);
26463 data->ready_try_change_size = n_ready;
26464 }
26465 bitmap_clear (data->ready_try_change);
26466
26467 /* Filter out insns from ready_try that the core will not be able to issue
26468 on current cycle due to decoder. */
26469 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26470 false);
26471 }
26472
26473 /* Revert the effect on ready_try. */
26474 static void
26475 core2i7_first_cycle_multipass_backtrack (const void *_data,
26476 signed char *ready_try,
26477 int n_ready ATTRIBUTE_UNUSED)
26478 {
26479 const_ix86_first_cycle_multipass_data_t data
26480 = (const_ix86_first_cycle_multipass_data_t) _data;
26481 unsigned int i = 0;
26482 sbitmap_iterator sbi;
26483
26484 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26485 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26486 {
26487 ready_try[i] = 0;
26488 }
26489 }
26490
26491 /* Save the result of multipass lookahead scheduling for the next round. */
26492 static void
26493 core2i7_first_cycle_multipass_end (const void *_data)
26494 {
26495 const_ix86_first_cycle_multipass_data_t data
26496 = (const_ix86_first_cycle_multipass_data_t) _data;
26497 ix86_first_cycle_multipass_data_t next_data
26498 = ix86_first_cycle_multipass_data;
26499
26500 if (data != NULL)
26501 {
26502 next_data->ifetch_block_len = data->ifetch_block_len;
26503 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26504 }
26505 }
26506
26507 /* Deallocate target data. */
26508 static void
26509 core2i7_first_cycle_multipass_fini (void *_data)
26510 {
26511 ix86_first_cycle_multipass_data_t data
26512 = (ix86_first_cycle_multipass_data_t) _data;
26513
26514 if (data->ready_try_change)
26515 {
26516 sbitmap_free (data->ready_try_change);
26517 data->ready_try_change = NULL;
26518 data->ready_try_change_size = 0;
26519 }
26520 }
26521
26522 /* Prepare for scheduling pass. */
26523 static void
26524 ix86_sched_init_global (FILE *, int, int)
26525 {
26526 /* Install scheduling hooks for current CPU. Some of these hooks are used
26527 in time-critical parts of the scheduler, so we only set them up when
26528 they are actually used. */
26529 switch (ix86_tune)
26530 {
26531 case PROCESSOR_CORE2:
26532 case PROCESSOR_NEHALEM:
26533 case PROCESSOR_SANDYBRIDGE:
26534 case PROCESSOR_HASWELL:
26535 /* Do not perform multipass scheduling for pre-reload schedule
26536 to save compile time. */
26537 if (reload_completed)
26538 {
26539 targetm.sched.dfa_post_advance_cycle
26540 = core2i7_dfa_post_advance_cycle;
26541 targetm.sched.first_cycle_multipass_init
26542 = core2i7_first_cycle_multipass_init;
26543 targetm.sched.first_cycle_multipass_begin
26544 = core2i7_first_cycle_multipass_begin;
26545 targetm.sched.first_cycle_multipass_issue
26546 = core2i7_first_cycle_multipass_issue;
26547 targetm.sched.first_cycle_multipass_backtrack
26548 = core2i7_first_cycle_multipass_backtrack;
26549 targetm.sched.first_cycle_multipass_end
26550 = core2i7_first_cycle_multipass_end;
26551 targetm.sched.first_cycle_multipass_fini
26552 = core2i7_first_cycle_multipass_fini;
26553
26554 /* Set decoder parameters. */
26555 core2i7_secondary_decoder_max_insn_size = 8;
26556 core2i7_ifetch_block_size = 16;
26557 core2i7_ifetch_block_max_insns = 6;
26558 break;
26559 }
26560 /* ... Fall through ... */
26561 default:
26562 targetm.sched.dfa_post_advance_cycle = NULL;
26563 targetm.sched.first_cycle_multipass_init = NULL;
26564 targetm.sched.first_cycle_multipass_begin = NULL;
26565 targetm.sched.first_cycle_multipass_issue = NULL;
26566 targetm.sched.first_cycle_multipass_backtrack = NULL;
26567 targetm.sched.first_cycle_multipass_end = NULL;
26568 targetm.sched.first_cycle_multipass_fini = NULL;
26569 break;
26570 }
26571 }
26572
26573 \f
26574 /* Compute the alignment given to a constant that is being placed in memory.
26575 EXP is the constant and ALIGN is the alignment that the object would
26576 ordinarily have.
26577 The value of this function is used instead of that alignment to align
26578 the object. */
26579
26580 int
26581 ix86_constant_alignment (tree exp, int align)
26582 {
26583 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26584 || TREE_CODE (exp) == INTEGER_CST)
26585 {
26586 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26587 return 64;
26588 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26589 return 128;
26590 }
26591 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26592 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26593 return BITS_PER_WORD;
26594
26595 return align;
26596 }
26597
26598 /* Compute the alignment for a static variable.
26599 TYPE is the data type, and ALIGN is the alignment that
26600 the object would ordinarily have. The value of this function is used
26601 instead of that alignment to align the object. */
26602
26603 int
26604 ix86_data_alignment (tree type, int align, bool opt)
26605 {
26606 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26607 for symbols from other compilation units or symbols that don't need
26608 to bind locally. In order to preserve some ABI compatibility with
26609 those compilers, ensure we don't decrease alignment from what we
26610 used to assume. */
26611
26612 int max_align_compat
26613 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26614
26615 /* A data structure, equal or greater than the size of a cache line
26616 (64 bytes in the Pentium 4 and other recent Intel processors, including
26617 processors based on Intel Core microarchitecture) should be aligned
26618 so that its base address is a multiple of a cache line size. */
26619
26620 int max_align
26621 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26622
26623 if (max_align < BITS_PER_WORD)
26624 max_align = BITS_PER_WORD;
26625
26626 if (opt
26627 && AGGREGATE_TYPE_P (type)
26628 && TYPE_SIZE (type)
26629 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26630 {
26631 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26632 && align < max_align_compat)
26633 align = max_align_compat;
26634 if (wi::geu_p (TYPE_SIZE (type), max_align)
26635 && align < max_align)
26636 align = max_align;
26637 }
26638
26639 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26640 to 16byte boundary. */
26641 if (TARGET_64BIT)
26642 {
26643 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26644 && TYPE_SIZE (type)
26645 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26646 && wi::geu_p (TYPE_SIZE (type), 128)
26647 && align < 128)
26648 return 128;
26649 }
26650
26651 if (!opt)
26652 return align;
26653
26654 if (TREE_CODE (type) == ARRAY_TYPE)
26655 {
26656 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26657 return 64;
26658 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26659 return 128;
26660 }
26661 else if (TREE_CODE (type) == COMPLEX_TYPE)
26662 {
26663
26664 if (TYPE_MODE (type) == DCmode && align < 64)
26665 return 64;
26666 if ((TYPE_MODE (type) == XCmode
26667 || TYPE_MODE (type) == TCmode) && align < 128)
26668 return 128;
26669 }
26670 else if ((TREE_CODE (type) == RECORD_TYPE
26671 || TREE_CODE (type) == UNION_TYPE
26672 || TREE_CODE (type) == QUAL_UNION_TYPE)
26673 && TYPE_FIELDS (type))
26674 {
26675 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26676 return 64;
26677 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26678 return 128;
26679 }
26680 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26681 || TREE_CODE (type) == INTEGER_TYPE)
26682 {
26683 if (TYPE_MODE (type) == DFmode && align < 64)
26684 return 64;
26685 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26686 return 128;
26687 }
26688
26689 return align;
26690 }
26691
26692 /* Compute the alignment for a local variable or a stack slot. EXP is
26693 the data type or decl itself, MODE is the widest mode available and
26694 ALIGN is the alignment that the object would ordinarily have. The
26695 value of this macro is used instead of that alignment to align the
26696 object. */
26697
26698 unsigned int
26699 ix86_local_alignment (tree exp, enum machine_mode mode,
26700 unsigned int align)
26701 {
26702 tree type, decl;
26703
26704 if (exp && DECL_P (exp))
26705 {
26706 type = TREE_TYPE (exp);
26707 decl = exp;
26708 }
26709 else
26710 {
26711 type = exp;
26712 decl = NULL;
26713 }
26714
26715 /* Don't do dynamic stack realignment for long long objects with
26716 -mpreferred-stack-boundary=2. */
26717 if (!TARGET_64BIT
26718 && align == 64
26719 && ix86_preferred_stack_boundary < 64
26720 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26721 && (!type || !TYPE_USER_ALIGN (type))
26722 && (!decl || !DECL_USER_ALIGN (decl)))
26723 align = 32;
26724
26725 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26726 register in MODE. We will return the largest alignment of XF
26727 and DF. */
26728 if (!type)
26729 {
26730 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26731 align = GET_MODE_ALIGNMENT (DFmode);
26732 return align;
26733 }
26734
26735 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26736 to 16byte boundary. Exact wording is:
26737
26738 An array uses the same alignment as its elements, except that a local or
26739 global array variable of length at least 16 bytes or
26740 a C99 variable-length array variable always has alignment of at least 16 bytes.
26741
26742 This was added to allow use of aligned SSE instructions at arrays. This
26743 rule is meant for static storage (where compiler can not do the analysis
26744 by itself). We follow it for automatic variables only when convenient.
26745 We fully control everything in the function compiled and functions from
26746 other unit can not rely on the alignment.
26747
26748 Exclude va_list type. It is the common case of local array where
26749 we can not benefit from the alignment.
26750
26751 TODO: Probably one should optimize for size only when var is not escaping. */
26752 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26753 && TARGET_SSE)
26754 {
26755 if (AGGREGATE_TYPE_P (type)
26756 && (va_list_type_node == NULL_TREE
26757 || (TYPE_MAIN_VARIANT (type)
26758 != TYPE_MAIN_VARIANT (va_list_type_node)))
26759 && TYPE_SIZE (type)
26760 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26761 && wi::geu_p (TYPE_SIZE (type), 16)
26762 && align < 128)
26763 return 128;
26764 }
26765 if (TREE_CODE (type) == ARRAY_TYPE)
26766 {
26767 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26768 return 64;
26769 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26770 return 128;
26771 }
26772 else if (TREE_CODE (type) == COMPLEX_TYPE)
26773 {
26774 if (TYPE_MODE (type) == DCmode && align < 64)
26775 return 64;
26776 if ((TYPE_MODE (type) == XCmode
26777 || TYPE_MODE (type) == TCmode) && align < 128)
26778 return 128;
26779 }
26780 else if ((TREE_CODE (type) == RECORD_TYPE
26781 || TREE_CODE (type) == UNION_TYPE
26782 || TREE_CODE (type) == QUAL_UNION_TYPE)
26783 && TYPE_FIELDS (type))
26784 {
26785 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26786 return 64;
26787 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26788 return 128;
26789 }
26790 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26791 || TREE_CODE (type) == INTEGER_TYPE)
26792 {
26793
26794 if (TYPE_MODE (type) == DFmode && align < 64)
26795 return 64;
26796 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26797 return 128;
26798 }
26799 return align;
26800 }
26801
26802 /* Compute the minimum required alignment for dynamic stack realignment
26803 purposes for a local variable, parameter or a stack slot. EXP is
26804 the data type or decl itself, MODE is its mode and ALIGN is the
26805 alignment that the object would ordinarily have. */
26806
26807 unsigned int
26808 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26809 unsigned int align)
26810 {
26811 tree type, decl;
26812
26813 if (exp && DECL_P (exp))
26814 {
26815 type = TREE_TYPE (exp);
26816 decl = exp;
26817 }
26818 else
26819 {
26820 type = exp;
26821 decl = NULL;
26822 }
26823
26824 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26825 return align;
26826
26827 /* Don't do dynamic stack realignment for long long objects with
26828 -mpreferred-stack-boundary=2. */
26829 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26830 && (!type || !TYPE_USER_ALIGN (type))
26831 && (!decl || !DECL_USER_ALIGN (decl)))
26832 return 32;
26833
26834 return align;
26835 }
26836 \f
26837 /* Find a location for the static chain incoming to a nested function.
26838 This is a register, unless all free registers are used by arguments. */
26839
26840 static rtx
26841 ix86_static_chain (const_tree fndecl, bool incoming_p)
26842 {
26843 unsigned regno;
26844
26845 if (!DECL_STATIC_CHAIN (fndecl))
26846 return NULL;
26847
26848 if (TARGET_64BIT)
26849 {
26850 /* We always use R10 in 64-bit mode. */
26851 regno = R10_REG;
26852 }
26853 else
26854 {
26855 tree fntype;
26856 unsigned int ccvt;
26857
26858 /* By default in 32-bit mode we use ECX to pass the static chain. */
26859 regno = CX_REG;
26860
26861 fntype = TREE_TYPE (fndecl);
26862 ccvt = ix86_get_callcvt (fntype);
26863 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26864 {
26865 /* Fastcall functions use ecx/edx for arguments, which leaves
26866 us with EAX for the static chain.
26867 Thiscall functions use ecx for arguments, which also
26868 leaves us with EAX for the static chain. */
26869 regno = AX_REG;
26870 }
26871 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26872 {
26873 /* Thiscall functions use ecx for arguments, which leaves
26874 us with EAX and EDX for the static chain.
26875 We are using for abi-compatibility EAX. */
26876 regno = AX_REG;
26877 }
26878 else if (ix86_function_regparm (fntype, fndecl) == 3)
26879 {
26880 /* For regparm 3, we have no free call-clobbered registers in
26881 which to store the static chain. In order to implement this,
26882 we have the trampoline push the static chain to the stack.
26883 However, we can't push a value below the return address when
26884 we call the nested function directly, so we have to use an
26885 alternate entry point. For this we use ESI, and have the
26886 alternate entry point push ESI, so that things appear the
26887 same once we're executing the nested function. */
26888 if (incoming_p)
26889 {
26890 if (fndecl == current_function_decl)
26891 ix86_static_chain_on_stack = true;
26892 return gen_frame_mem (SImode,
26893 plus_constant (Pmode,
26894 arg_pointer_rtx, -8));
26895 }
26896 regno = SI_REG;
26897 }
26898 }
26899
26900 return gen_rtx_REG (Pmode, regno);
26901 }
26902
26903 /* Emit RTL insns to initialize the variable parts of a trampoline.
26904 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26905 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26906 to be passed to the target function. */
26907
26908 static void
26909 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26910 {
26911 rtx mem, fnaddr;
26912 int opcode;
26913 int offset = 0;
26914
26915 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26916
26917 if (TARGET_64BIT)
26918 {
26919 int size;
26920
26921 /* Load the function address to r11. Try to load address using
26922 the shorter movl instead of movabs. We may want to support
26923 movq for kernel mode, but kernel does not use trampolines at
26924 the moment. FNADDR is a 32bit address and may not be in
26925 DImode when ptr_mode == SImode. Always use movl in this
26926 case. */
26927 if (ptr_mode == SImode
26928 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26929 {
26930 fnaddr = copy_addr_to_reg (fnaddr);
26931
26932 mem = adjust_address (m_tramp, HImode, offset);
26933 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26934
26935 mem = adjust_address (m_tramp, SImode, offset + 2);
26936 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26937 offset += 6;
26938 }
26939 else
26940 {
26941 mem = adjust_address (m_tramp, HImode, offset);
26942 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26943
26944 mem = adjust_address (m_tramp, DImode, offset + 2);
26945 emit_move_insn (mem, fnaddr);
26946 offset += 10;
26947 }
26948
26949 /* Load static chain using movabs to r10. Use the shorter movl
26950 instead of movabs when ptr_mode == SImode. */
26951 if (ptr_mode == SImode)
26952 {
26953 opcode = 0xba41;
26954 size = 6;
26955 }
26956 else
26957 {
26958 opcode = 0xba49;
26959 size = 10;
26960 }
26961
26962 mem = adjust_address (m_tramp, HImode, offset);
26963 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26964
26965 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26966 emit_move_insn (mem, chain_value);
26967 offset += size;
26968
26969 /* Jump to r11; the last (unused) byte is a nop, only there to
26970 pad the write out to a single 32-bit store. */
26971 mem = adjust_address (m_tramp, SImode, offset);
26972 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26973 offset += 4;
26974 }
26975 else
26976 {
26977 rtx disp, chain;
26978
26979 /* Depending on the static chain location, either load a register
26980 with a constant, or push the constant to the stack. All of the
26981 instructions are the same size. */
26982 chain = ix86_static_chain (fndecl, true);
26983 if (REG_P (chain))
26984 {
26985 switch (REGNO (chain))
26986 {
26987 case AX_REG:
26988 opcode = 0xb8; break;
26989 case CX_REG:
26990 opcode = 0xb9; break;
26991 default:
26992 gcc_unreachable ();
26993 }
26994 }
26995 else
26996 opcode = 0x68;
26997
26998 mem = adjust_address (m_tramp, QImode, offset);
26999 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27000
27001 mem = adjust_address (m_tramp, SImode, offset + 1);
27002 emit_move_insn (mem, chain_value);
27003 offset += 5;
27004
27005 mem = adjust_address (m_tramp, QImode, offset);
27006 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27007
27008 mem = adjust_address (m_tramp, SImode, offset + 1);
27009
27010 /* Compute offset from the end of the jmp to the target function.
27011 In the case in which the trampoline stores the static chain on
27012 the stack, we need to skip the first insn which pushes the
27013 (call-saved) register static chain; this push is 1 byte. */
27014 offset += 5;
27015 disp = expand_binop (SImode, sub_optab, fnaddr,
27016 plus_constant (Pmode, XEXP (m_tramp, 0),
27017 offset - (MEM_P (chain) ? 1 : 0)),
27018 NULL_RTX, 1, OPTAB_DIRECT);
27019 emit_move_insn (mem, disp);
27020 }
27021
27022 gcc_assert (offset <= TRAMPOLINE_SIZE);
27023
27024 #ifdef HAVE_ENABLE_EXECUTE_STACK
27025 #ifdef CHECK_EXECUTE_STACK_ENABLED
27026 if (CHECK_EXECUTE_STACK_ENABLED)
27027 #endif
27028 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27029 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27030 #endif
27031 }
27032 \f
27033 /* The following file contains several enumerations and data structures
27034 built from the definitions in i386-builtin-types.def. */
27035
27036 #include "i386-builtin-types.inc"
27037
27038 /* Table for the ix86 builtin non-function types. */
27039 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27040
27041 /* Retrieve an element from the above table, building some of
27042 the types lazily. */
27043
27044 static tree
27045 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27046 {
27047 unsigned int index;
27048 tree type, itype;
27049
27050 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27051
27052 type = ix86_builtin_type_tab[(int) tcode];
27053 if (type != NULL)
27054 return type;
27055
27056 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27057 if (tcode <= IX86_BT_LAST_VECT)
27058 {
27059 enum machine_mode mode;
27060
27061 index = tcode - IX86_BT_LAST_PRIM - 1;
27062 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27063 mode = ix86_builtin_type_vect_mode[index];
27064
27065 type = build_vector_type_for_mode (itype, mode);
27066 }
27067 else
27068 {
27069 int quals;
27070
27071 index = tcode - IX86_BT_LAST_VECT - 1;
27072 if (tcode <= IX86_BT_LAST_PTR)
27073 quals = TYPE_UNQUALIFIED;
27074 else
27075 quals = TYPE_QUAL_CONST;
27076
27077 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27078 if (quals != TYPE_UNQUALIFIED)
27079 itype = build_qualified_type (itype, quals);
27080
27081 type = build_pointer_type (itype);
27082 }
27083
27084 ix86_builtin_type_tab[(int) tcode] = type;
27085 return type;
27086 }
27087
27088 /* Table for the ix86 builtin function types. */
27089 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27090
27091 /* Retrieve an element from the above table, building some of
27092 the types lazily. */
27093
27094 static tree
27095 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27096 {
27097 tree type;
27098
27099 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27100
27101 type = ix86_builtin_func_type_tab[(int) tcode];
27102 if (type != NULL)
27103 return type;
27104
27105 if (tcode <= IX86_BT_LAST_FUNC)
27106 {
27107 unsigned start = ix86_builtin_func_start[(int) tcode];
27108 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27109 tree rtype, atype, args = void_list_node;
27110 unsigned i;
27111
27112 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27113 for (i = after - 1; i > start; --i)
27114 {
27115 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27116 args = tree_cons (NULL, atype, args);
27117 }
27118
27119 type = build_function_type (rtype, args);
27120 }
27121 else
27122 {
27123 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27124 enum ix86_builtin_func_type icode;
27125
27126 icode = ix86_builtin_func_alias_base[index];
27127 type = ix86_get_builtin_func_type (icode);
27128 }
27129
27130 ix86_builtin_func_type_tab[(int) tcode] = type;
27131 return type;
27132 }
27133
27134
27135 /* Codes for all the SSE/MMX builtins. */
27136 enum ix86_builtins
27137 {
27138 IX86_BUILTIN_ADDPS,
27139 IX86_BUILTIN_ADDSS,
27140 IX86_BUILTIN_DIVPS,
27141 IX86_BUILTIN_DIVSS,
27142 IX86_BUILTIN_MULPS,
27143 IX86_BUILTIN_MULSS,
27144 IX86_BUILTIN_SUBPS,
27145 IX86_BUILTIN_SUBSS,
27146
27147 IX86_BUILTIN_CMPEQPS,
27148 IX86_BUILTIN_CMPLTPS,
27149 IX86_BUILTIN_CMPLEPS,
27150 IX86_BUILTIN_CMPGTPS,
27151 IX86_BUILTIN_CMPGEPS,
27152 IX86_BUILTIN_CMPNEQPS,
27153 IX86_BUILTIN_CMPNLTPS,
27154 IX86_BUILTIN_CMPNLEPS,
27155 IX86_BUILTIN_CMPNGTPS,
27156 IX86_BUILTIN_CMPNGEPS,
27157 IX86_BUILTIN_CMPORDPS,
27158 IX86_BUILTIN_CMPUNORDPS,
27159 IX86_BUILTIN_CMPEQSS,
27160 IX86_BUILTIN_CMPLTSS,
27161 IX86_BUILTIN_CMPLESS,
27162 IX86_BUILTIN_CMPNEQSS,
27163 IX86_BUILTIN_CMPNLTSS,
27164 IX86_BUILTIN_CMPNLESS,
27165 IX86_BUILTIN_CMPORDSS,
27166 IX86_BUILTIN_CMPUNORDSS,
27167
27168 IX86_BUILTIN_COMIEQSS,
27169 IX86_BUILTIN_COMILTSS,
27170 IX86_BUILTIN_COMILESS,
27171 IX86_BUILTIN_COMIGTSS,
27172 IX86_BUILTIN_COMIGESS,
27173 IX86_BUILTIN_COMINEQSS,
27174 IX86_BUILTIN_UCOMIEQSS,
27175 IX86_BUILTIN_UCOMILTSS,
27176 IX86_BUILTIN_UCOMILESS,
27177 IX86_BUILTIN_UCOMIGTSS,
27178 IX86_BUILTIN_UCOMIGESS,
27179 IX86_BUILTIN_UCOMINEQSS,
27180
27181 IX86_BUILTIN_CVTPI2PS,
27182 IX86_BUILTIN_CVTPS2PI,
27183 IX86_BUILTIN_CVTSI2SS,
27184 IX86_BUILTIN_CVTSI642SS,
27185 IX86_BUILTIN_CVTSS2SI,
27186 IX86_BUILTIN_CVTSS2SI64,
27187 IX86_BUILTIN_CVTTPS2PI,
27188 IX86_BUILTIN_CVTTSS2SI,
27189 IX86_BUILTIN_CVTTSS2SI64,
27190
27191 IX86_BUILTIN_MAXPS,
27192 IX86_BUILTIN_MAXSS,
27193 IX86_BUILTIN_MINPS,
27194 IX86_BUILTIN_MINSS,
27195
27196 IX86_BUILTIN_LOADUPS,
27197 IX86_BUILTIN_STOREUPS,
27198 IX86_BUILTIN_MOVSS,
27199
27200 IX86_BUILTIN_MOVHLPS,
27201 IX86_BUILTIN_MOVLHPS,
27202 IX86_BUILTIN_LOADHPS,
27203 IX86_BUILTIN_LOADLPS,
27204 IX86_BUILTIN_STOREHPS,
27205 IX86_BUILTIN_STORELPS,
27206
27207 IX86_BUILTIN_MASKMOVQ,
27208 IX86_BUILTIN_MOVMSKPS,
27209 IX86_BUILTIN_PMOVMSKB,
27210
27211 IX86_BUILTIN_MOVNTPS,
27212 IX86_BUILTIN_MOVNTQ,
27213
27214 IX86_BUILTIN_LOADDQU,
27215 IX86_BUILTIN_STOREDQU,
27216
27217 IX86_BUILTIN_PACKSSWB,
27218 IX86_BUILTIN_PACKSSDW,
27219 IX86_BUILTIN_PACKUSWB,
27220
27221 IX86_BUILTIN_PADDB,
27222 IX86_BUILTIN_PADDW,
27223 IX86_BUILTIN_PADDD,
27224 IX86_BUILTIN_PADDQ,
27225 IX86_BUILTIN_PADDSB,
27226 IX86_BUILTIN_PADDSW,
27227 IX86_BUILTIN_PADDUSB,
27228 IX86_BUILTIN_PADDUSW,
27229 IX86_BUILTIN_PSUBB,
27230 IX86_BUILTIN_PSUBW,
27231 IX86_BUILTIN_PSUBD,
27232 IX86_BUILTIN_PSUBQ,
27233 IX86_BUILTIN_PSUBSB,
27234 IX86_BUILTIN_PSUBSW,
27235 IX86_BUILTIN_PSUBUSB,
27236 IX86_BUILTIN_PSUBUSW,
27237
27238 IX86_BUILTIN_PAND,
27239 IX86_BUILTIN_PANDN,
27240 IX86_BUILTIN_POR,
27241 IX86_BUILTIN_PXOR,
27242
27243 IX86_BUILTIN_PAVGB,
27244 IX86_BUILTIN_PAVGW,
27245
27246 IX86_BUILTIN_PCMPEQB,
27247 IX86_BUILTIN_PCMPEQW,
27248 IX86_BUILTIN_PCMPEQD,
27249 IX86_BUILTIN_PCMPGTB,
27250 IX86_BUILTIN_PCMPGTW,
27251 IX86_BUILTIN_PCMPGTD,
27252
27253 IX86_BUILTIN_PMADDWD,
27254
27255 IX86_BUILTIN_PMAXSW,
27256 IX86_BUILTIN_PMAXUB,
27257 IX86_BUILTIN_PMINSW,
27258 IX86_BUILTIN_PMINUB,
27259
27260 IX86_BUILTIN_PMULHUW,
27261 IX86_BUILTIN_PMULHW,
27262 IX86_BUILTIN_PMULLW,
27263
27264 IX86_BUILTIN_PSADBW,
27265 IX86_BUILTIN_PSHUFW,
27266
27267 IX86_BUILTIN_PSLLW,
27268 IX86_BUILTIN_PSLLD,
27269 IX86_BUILTIN_PSLLQ,
27270 IX86_BUILTIN_PSRAW,
27271 IX86_BUILTIN_PSRAD,
27272 IX86_BUILTIN_PSRLW,
27273 IX86_BUILTIN_PSRLD,
27274 IX86_BUILTIN_PSRLQ,
27275 IX86_BUILTIN_PSLLWI,
27276 IX86_BUILTIN_PSLLDI,
27277 IX86_BUILTIN_PSLLQI,
27278 IX86_BUILTIN_PSRAWI,
27279 IX86_BUILTIN_PSRADI,
27280 IX86_BUILTIN_PSRLWI,
27281 IX86_BUILTIN_PSRLDI,
27282 IX86_BUILTIN_PSRLQI,
27283
27284 IX86_BUILTIN_PUNPCKHBW,
27285 IX86_BUILTIN_PUNPCKHWD,
27286 IX86_BUILTIN_PUNPCKHDQ,
27287 IX86_BUILTIN_PUNPCKLBW,
27288 IX86_BUILTIN_PUNPCKLWD,
27289 IX86_BUILTIN_PUNPCKLDQ,
27290
27291 IX86_BUILTIN_SHUFPS,
27292
27293 IX86_BUILTIN_RCPPS,
27294 IX86_BUILTIN_RCPSS,
27295 IX86_BUILTIN_RSQRTPS,
27296 IX86_BUILTIN_RSQRTPS_NR,
27297 IX86_BUILTIN_RSQRTSS,
27298 IX86_BUILTIN_RSQRTF,
27299 IX86_BUILTIN_SQRTPS,
27300 IX86_BUILTIN_SQRTPS_NR,
27301 IX86_BUILTIN_SQRTSS,
27302
27303 IX86_BUILTIN_UNPCKHPS,
27304 IX86_BUILTIN_UNPCKLPS,
27305
27306 IX86_BUILTIN_ANDPS,
27307 IX86_BUILTIN_ANDNPS,
27308 IX86_BUILTIN_ORPS,
27309 IX86_BUILTIN_XORPS,
27310
27311 IX86_BUILTIN_EMMS,
27312 IX86_BUILTIN_LDMXCSR,
27313 IX86_BUILTIN_STMXCSR,
27314 IX86_BUILTIN_SFENCE,
27315
27316 IX86_BUILTIN_FXSAVE,
27317 IX86_BUILTIN_FXRSTOR,
27318 IX86_BUILTIN_FXSAVE64,
27319 IX86_BUILTIN_FXRSTOR64,
27320
27321 IX86_BUILTIN_XSAVE,
27322 IX86_BUILTIN_XRSTOR,
27323 IX86_BUILTIN_XSAVE64,
27324 IX86_BUILTIN_XRSTOR64,
27325
27326 IX86_BUILTIN_XSAVEOPT,
27327 IX86_BUILTIN_XSAVEOPT64,
27328
27329 IX86_BUILTIN_XSAVEC,
27330 IX86_BUILTIN_XSAVEC64,
27331
27332 IX86_BUILTIN_XSAVES,
27333 IX86_BUILTIN_XRSTORS,
27334 IX86_BUILTIN_XSAVES64,
27335 IX86_BUILTIN_XRSTORS64,
27336
27337 /* 3DNow! Original */
27338 IX86_BUILTIN_FEMMS,
27339 IX86_BUILTIN_PAVGUSB,
27340 IX86_BUILTIN_PF2ID,
27341 IX86_BUILTIN_PFACC,
27342 IX86_BUILTIN_PFADD,
27343 IX86_BUILTIN_PFCMPEQ,
27344 IX86_BUILTIN_PFCMPGE,
27345 IX86_BUILTIN_PFCMPGT,
27346 IX86_BUILTIN_PFMAX,
27347 IX86_BUILTIN_PFMIN,
27348 IX86_BUILTIN_PFMUL,
27349 IX86_BUILTIN_PFRCP,
27350 IX86_BUILTIN_PFRCPIT1,
27351 IX86_BUILTIN_PFRCPIT2,
27352 IX86_BUILTIN_PFRSQIT1,
27353 IX86_BUILTIN_PFRSQRT,
27354 IX86_BUILTIN_PFSUB,
27355 IX86_BUILTIN_PFSUBR,
27356 IX86_BUILTIN_PI2FD,
27357 IX86_BUILTIN_PMULHRW,
27358
27359 /* 3DNow! Athlon Extensions */
27360 IX86_BUILTIN_PF2IW,
27361 IX86_BUILTIN_PFNACC,
27362 IX86_BUILTIN_PFPNACC,
27363 IX86_BUILTIN_PI2FW,
27364 IX86_BUILTIN_PSWAPDSI,
27365 IX86_BUILTIN_PSWAPDSF,
27366
27367 /* SSE2 */
27368 IX86_BUILTIN_ADDPD,
27369 IX86_BUILTIN_ADDSD,
27370 IX86_BUILTIN_DIVPD,
27371 IX86_BUILTIN_DIVSD,
27372 IX86_BUILTIN_MULPD,
27373 IX86_BUILTIN_MULSD,
27374 IX86_BUILTIN_SUBPD,
27375 IX86_BUILTIN_SUBSD,
27376
27377 IX86_BUILTIN_CMPEQPD,
27378 IX86_BUILTIN_CMPLTPD,
27379 IX86_BUILTIN_CMPLEPD,
27380 IX86_BUILTIN_CMPGTPD,
27381 IX86_BUILTIN_CMPGEPD,
27382 IX86_BUILTIN_CMPNEQPD,
27383 IX86_BUILTIN_CMPNLTPD,
27384 IX86_BUILTIN_CMPNLEPD,
27385 IX86_BUILTIN_CMPNGTPD,
27386 IX86_BUILTIN_CMPNGEPD,
27387 IX86_BUILTIN_CMPORDPD,
27388 IX86_BUILTIN_CMPUNORDPD,
27389 IX86_BUILTIN_CMPEQSD,
27390 IX86_BUILTIN_CMPLTSD,
27391 IX86_BUILTIN_CMPLESD,
27392 IX86_BUILTIN_CMPNEQSD,
27393 IX86_BUILTIN_CMPNLTSD,
27394 IX86_BUILTIN_CMPNLESD,
27395 IX86_BUILTIN_CMPORDSD,
27396 IX86_BUILTIN_CMPUNORDSD,
27397
27398 IX86_BUILTIN_COMIEQSD,
27399 IX86_BUILTIN_COMILTSD,
27400 IX86_BUILTIN_COMILESD,
27401 IX86_BUILTIN_COMIGTSD,
27402 IX86_BUILTIN_COMIGESD,
27403 IX86_BUILTIN_COMINEQSD,
27404 IX86_BUILTIN_UCOMIEQSD,
27405 IX86_BUILTIN_UCOMILTSD,
27406 IX86_BUILTIN_UCOMILESD,
27407 IX86_BUILTIN_UCOMIGTSD,
27408 IX86_BUILTIN_UCOMIGESD,
27409 IX86_BUILTIN_UCOMINEQSD,
27410
27411 IX86_BUILTIN_MAXPD,
27412 IX86_BUILTIN_MAXSD,
27413 IX86_BUILTIN_MINPD,
27414 IX86_BUILTIN_MINSD,
27415
27416 IX86_BUILTIN_ANDPD,
27417 IX86_BUILTIN_ANDNPD,
27418 IX86_BUILTIN_ORPD,
27419 IX86_BUILTIN_XORPD,
27420
27421 IX86_BUILTIN_SQRTPD,
27422 IX86_BUILTIN_SQRTSD,
27423
27424 IX86_BUILTIN_UNPCKHPD,
27425 IX86_BUILTIN_UNPCKLPD,
27426
27427 IX86_BUILTIN_SHUFPD,
27428
27429 IX86_BUILTIN_LOADUPD,
27430 IX86_BUILTIN_STOREUPD,
27431 IX86_BUILTIN_MOVSD,
27432
27433 IX86_BUILTIN_LOADHPD,
27434 IX86_BUILTIN_LOADLPD,
27435
27436 IX86_BUILTIN_CVTDQ2PD,
27437 IX86_BUILTIN_CVTDQ2PS,
27438
27439 IX86_BUILTIN_CVTPD2DQ,
27440 IX86_BUILTIN_CVTPD2PI,
27441 IX86_BUILTIN_CVTPD2PS,
27442 IX86_BUILTIN_CVTTPD2DQ,
27443 IX86_BUILTIN_CVTTPD2PI,
27444
27445 IX86_BUILTIN_CVTPI2PD,
27446 IX86_BUILTIN_CVTSI2SD,
27447 IX86_BUILTIN_CVTSI642SD,
27448
27449 IX86_BUILTIN_CVTSD2SI,
27450 IX86_BUILTIN_CVTSD2SI64,
27451 IX86_BUILTIN_CVTSD2SS,
27452 IX86_BUILTIN_CVTSS2SD,
27453 IX86_BUILTIN_CVTTSD2SI,
27454 IX86_BUILTIN_CVTTSD2SI64,
27455
27456 IX86_BUILTIN_CVTPS2DQ,
27457 IX86_BUILTIN_CVTPS2PD,
27458 IX86_BUILTIN_CVTTPS2DQ,
27459
27460 IX86_BUILTIN_MOVNTI,
27461 IX86_BUILTIN_MOVNTI64,
27462 IX86_BUILTIN_MOVNTPD,
27463 IX86_BUILTIN_MOVNTDQ,
27464
27465 IX86_BUILTIN_MOVQ128,
27466
27467 /* SSE2 MMX */
27468 IX86_BUILTIN_MASKMOVDQU,
27469 IX86_BUILTIN_MOVMSKPD,
27470 IX86_BUILTIN_PMOVMSKB128,
27471
27472 IX86_BUILTIN_PACKSSWB128,
27473 IX86_BUILTIN_PACKSSDW128,
27474 IX86_BUILTIN_PACKUSWB128,
27475
27476 IX86_BUILTIN_PADDB128,
27477 IX86_BUILTIN_PADDW128,
27478 IX86_BUILTIN_PADDD128,
27479 IX86_BUILTIN_PADDQ128,
27480 IX86_BUILTIN_PADDSB128,
27481 IX86_BUILTIN_PADDSW128,
27482 IX86_BUILTIN_PADDUSB128,
27483 IX86_BUILTIN_PADDUSW128,
27484 IX86_BUILTIN_PSUBB128,
27485 IX86_BUILTIN_PSUBW128,
27486 IX86_BUILTIN_PSUBD128,
27487 IX86_BUILTIN_PSUBQ128,
27488 IX86_BUILTIN_PSUBSB128,
27489 IX86_BUILTIN_PSUBSW128,
27490 IX86_BUILTIN_PSUBUSB128,
27491 IX86_BUILTIN_PSUBUSW128,
27492
27493 IX86_BUILTIN_PAND128,
27494 IX86_BUILTIN_PANDN128,
27495 IX86_BUILTIN_POR128,
27496 IX86_BUILTIN_PXOR128,
27497
27498 IX86_BUILTIN_PAVGB128,
27499 IX86_BUILTIN_PAVGW128,
27500
27501 IX86_BUILTIN_PCMPEQB128,
27502 IX86_BUILTIN_PCMPEQW128,
27503 IX86_BUILTIN_PCMPEQD128,
27504 IX86_BUILTIN_PCMPGTB128,
27505 IX86_BUILTIN_PCMPGTW128,
27506 IX86_BUILTIN_PCMPGTD128,
27507
27508 IX86_BUILTIN_PMADDWD128,
27509
27510 IX86_BUILTIN_PMAXSW128,
27511 IX86_BUILTIN_PMAXUB128,
27512 IX86_BUILTIN_PMINSW128,
27513 IX86_BUILTIN_PMINUB128,
27514
27515 IX86_BUILTIN_PMULUDQ,
27516 IX86_BUILTIN_PMULUDQ128,
27517 IX86_BUILTIN_PMULHUW128,
27518 IX86_BUILTIN_PMULHW128,
27519 IX86_BUILTIN_PMULLW128,
27520
27521 IX86_BUILTIN_PSADBW128,
27522 IX86_BUILTIN_PSHUFHW,
27523 IX86_BUILTIN_PSHUFLW,
27524 IX86_BUILTIN_PSHUFD,
27525
27526 IX86_BUILTIN_PSLLDQI128,
27527 IX86_BUILTIN_PSLLWI128,
27528 IX86_BUILTIN_PSLLDI128,
27529 IX86_BUILTIN_PSLLQI128,
27530 IX86_BUILTIN_PSRAWI128,
27531 IX86_BUILTIN_PSRADI128,
27532 IX86_BUILTIN_PSRLDQI128,
27533 IX86_BUILTIN_PSRLWI128,
27534 IX86_BUILTIN_PSRLDI128,
27535 IX86_BUILTIN_PSRLQI128,
27536
27537 IX86_BUILTIN_PSLLDQ128,
27538 IX86_BUILTIN_PSLLW128,
27539 IX86_BUILTIN_PSLLD128,
27540 IX86_BUILTIN_PSLLQ128,
27541 IX86_BUILTIN_PSRAW128,
27542 IX86_BUILTIN_PSRAD128,
27543 IX86_BUILTIN_PSRLW128,
27544 IX86_BUILTIN_PSRLD128,
27545 IX86_BUILTIN_PSRLQ128,
27546
27547 IX86_BUILTIN_PUNPCKHBW128,
27548 IX86_BUILTIN_PUNPCKHWD128,
27549 IX86_BUILTIN_PUNPCKHDQ128,
27550 IX86_BUILTIN_PUNPCKHQDQ128,
27551 IX86_BUILTIN_PUNPCKLBW128,
27552 IX86_BUILTIN_PUNPCKLWD128,
27553 IX86_BUILTIN_PUNPCKLDQ128,
27554 IX86_BUILTIN_PUNPCKLQDQ128,
27555
27556 IX86_BUILTIN_CLFLUSH,
27557 IX86_BUILTIN_MFENCE,
27558 IX86_BUILTIN_LFENCE,
27559 IX86_BUILTIN_PAUSE,
27560
27561 IX86_BUILTIN_FNSTENV,
27562 IX86_BUILTIN_FLDENV,
27563 IX86_BUILTIN_FNSTSW,
27564 IX86_BUILTIN_FNCLEX,
27565
27566 IX86_BUILTIN_BSRSI,
27567 IX86_BUILTIN_BSRDI,
27568 IX86_BUILTIN_RDPMC,
27569 IX86_BUILTIN_RDTSC,
27570 IX86_BUILTIN_RDTSCP,
27571 IX86_BUILTIN_ROLQI,
27572 IX86_BUILTIN_ROLHI,
27573 IX86_BUILTIN_RORQI,
27574 IX86_BUILTIN_RORHI,
27575
27576 /* SSE3. */
27577 IX86_BUILTIN_ADDSUBPS,
27578 IX86_BUILTIN_HADDPS,
27579 IX86_BUILTIN_HSUBPS,
27580 IX86_BUILTIN_MOVSHDUP,
27581 IX86_BUILTIN_MOVSLDUP,
27582 IX86_BUILTIN_ADDSUBPD,
27583 IX86_BUILTIN_HADDPD,
27584 IX86_BUILTIN_HSUBPD,
27585 IX86_BUILTIN_LDDQU,
27586
27587 IX86_BUILTIN_MONITOR,
27588 IX86_BUILTIN_MWAIT,
27589
27590 /* SSSE3. */
27591 IX86_BUILTIN_PHADDW,
27592 IX86_BUILTIN_PHADDD,
27593 IX86_BUILTIN_PHADDSW,
27594 IX86_BUILTIN_PHSUBW,
27595 IX86_BUILTIN_PHSUBD,
27596 IX86_BUILTIN_PHSUBSW,
27597 IX86_BUILTIN_PMADDUBSW,
27598 IX86_BUILTIN_PMULHRSW,
27599 IX86_BUILTIN_PSHUFB,
27600 IX86_BUILTIN_PSIGNB,
27601 IX86_BUILTIN_PSIGNW,
27602 IX86_BUILTIN_PSIGND,
27603 IX86_BUILTIN_PALIGNR,
27604 IX86_BUILTIN_PABSB,
27605 IX86_BUILTIN_PABSW,
27606 IX86_BUILTIN_PABSD,
27607
27608 IX86_BUILTIN_PHADDW128,
27609 IX86_BUILTIN_PHADDD128,
27610 IX86_BUILTIN_PHADDSW128,
27611 IX86_BUILTIN_PHSUBW128,
27612 IX86_BUILTIN_PHSUBD128,
27613 IX86_BUILTIN_PHSUBSW128,
27614 IX86_BUILTIN_PMADDUBSW128,
27615 IX86_BUILTIN_PMULHRSW128,
27616 IX86_BUILTIN_PSHUFB128,
27617 IX86_BUILTIN_PSIGNB128,
27618 IX86_BUILTIN_PSIGNW128,
27619 IX86_BUILTIN_PSIGND128,
27620 IX86_BUILTIN_PALIGNR128,
27621 IX86_BUILTIN_PABSB128,
27622 IX86_BUILTIN_PABSW128,
27623 IX86_BUILTIN_PABSD128,
27624
27625 /* AMDFAM10 - SSE4A New Instructions. */
27626 IX86_BUILTIN_MOVNTSD,
27627 IX86_BUILTIN_MOVNTSS,
27628 IX86_BUILTIN_EXTRQI,
27629 IX86_BUILTIN_EXTRQ,
27630 IX86_BUILTIN_INSERTQI,
27631 IX86_BUILTIN_INSERTQ,
27632
27633 /* SSE4.1. */
27634 IX86_BUILTIN_BLENDPD,
27635 IX86_BUILTIN_BLENDPS,
27636 IX86_BUILTIN_BLENDVPD,
27637 IX86_BUILTIN_BLENDVPS,
27638 IX86_BUILTIN_PBLENDVB128,
27639 IX86_BUILTIN_PBLENDW128,
27640
27641 IX86_BUILTIN_DPPD,
27642 IX86_BUILTIN_DPPS,
27643
27644 IX86_BUILTIN_INSERTPS128,
27645
27646 IX86_BUILTIN_MOVNTDQA,
27647 IX86_BUILTIN_MPSADBW128,
27648 IX86_BUILTIN_PACKUSDW128,
27649 IX86_BUILTIN_PCMPEQQ,
27650 IX86_BUILTIN_PHMINPOSUW128,
27651
27652 IX86_BUILTIN_PMAXSB128,
27653 IX86_BUILTIN_PMAXSD128,
27654 IX86_BUILTIN_PMAXUD128,
27655 IX86_BUILTIN_PMAXUW128,
27656
27657 IX86_BUILTIN_PMINSB128,
27658 IX86_BUILTIN_PMINSD128,
27659 IX86_BUILTIN_PMINUD128,
27660 IX86_BUILTIN_PMINUW128,
27661
27662 IX86_BUILTIN_PMOVSXBW128,
27663 IX86_BUILTIN_PMOVSXBD128,
27664 IX86_BUILTIN_PMOVSXBQ128,
27665 IX86_BUILTIN_PMOVSXWD128,
27666 IX86_BUILTIN_PMOVSXWQ128,
27667 IX86_BUILTIN_PMOVSXDQ128,
27668
27669 IX86_BUILTIN_PMOVZXBW128,
27670 IX86_BUILTIN_PMOVZXBD128,
27671 IX86_BUILTIN_PMOVZXBQ128,
27672 IX86_BUILTIN_PMOVZXWD128,
27673 IX86_BUILTIN_PMOVZXWQ128,
27674 IX86_BUILTIN_PMOVZXDQ128,
27675
27676 IX86_BUILTIN_PMULDQ128,
27677 IX86_BUILTIN_PMULLD128,
27678
27679 IX86_BUILTIN_ROUNDSD,
27680 IX86_BUILTIN_ROUNDSS,
27681
27682 IX86_BUILTIN_ROUNDPD,
27683 IX86_BUILTIN_ROUNDPS,
27684
27685 IX86_BUILTIN_FLOORPD,
27686 IX86_BUILTIN_CEILPD,
27687 IX86_BUILTIN_TRUNCPD,
27688 IX86_BUILTIN_RINTPD,
27689 IX86_BUILTIN_ROUNDPD_AZ,
27690
27691 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27692 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27693 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27694
27695 IX86_BUILTIN_FLOORPS,
27696 IX86_BUILTIN_CEILPS,
27697 IX86_BUILTIN_TRUNCPS,
27698 IX86_BUILTIN_RINTPS,
27699 IX86_BUILTIN_ROUNDPS_AZ,
27700
27701 IX86_BUILTIN_FLOORPS_SFIX,
27702 IX86_BUILTIN_CEILPS_SFIX,
27703 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27704
27705 IX86_BUILTIN_PTESTZ,
27706 IX86_BUILTIN_PTESTC,
27707 IX86_BUILTIN_PTESTNZC,
27708
27709 IX86_BUILTIN_VEC_INIT_V2SI,
27710 IX86_BUILTIN_VEC_INIT_V4HI,
27711 IX86_BUILTIN_VEC_INIT_V8QI,
27712 IX86_BUILTIN_VEC_EXT_V2DF,
27713 IX86_BUILTIN_VEC_EXT_V2DI,
27714 IX86_BUILTIN_VEC_EXT_V4SF,
27715 IX86_BUILTIN_VEC_EXT_V4SI,
27716 IX86_BUILTIN_VEC_EXT_V8HI,
27717 IX86_BUILTIN_VEC_EXT_V2SI,
27718 IX86_BUILTIN_VEC_EXT_V4HI,
27719 IX86_BUILTIN_VEC_EXT_V16QI,
27720 IX86_BUILTIN_VEC_SET_V2DI,
27721 IX86_BUILTIN_VEC_SET_V4SF,
27722 IX86_BUILTIN_VEC_SET_V4SI,
27723 IX86_BUILTIN_VEC_SET_V8HI,
27724 IX86_BUILTIN_VEC_SET_V4HI,
27725 IX86_BUILTIN_VEC_SET_V16QI,
27726
27727 IX86_BUILTIN_VEC_PACK_SFIX,
27728 IX86_BUILTIN_VEC_PACK_SFIX256,
27729
27730 /* SSE4.2. */
27731 IX86_BUILTIN_CRC32QI,
27732 IX86_BUILTIN_CRC32HI,
27733 IX86_BUILTIN_CRC32SI,
27734 IX86_BUILTIN_CRC32DI,
27735
27736 IX86_BUILTIN_PCMPESTRI128,
27737 IX86_BUILTIN_PCMPESTRM128,
27738 IX86_BUILTIN_PCMPESTRA128,
27739 IX86_BUILTIN_PCMPESTRC128,
27740 IX86_BUILTIN_PCMPESTRO128,
27741 IX86_BUILTIN_PCMPESTRS128,
27742 IX86_BUILTIN_PCMPESTRZ128,
27743 IX86_BUILTIN_PCMPISTRI128,
27744 IX86_BUILTIN_PCMPISTRM128,
27745 IX86_BUILTIN_PCMPISTRA128,
27746 IX86_BUILTIN_PCMPISTRC128,
27747 IX86_BUILTIN_PCMPISTRO128,
27748 IX86_BUILTIN_PCMPISTRS128,
27749 IX86_BUILTIN_PCMPISTRZ128,
27750
27751 IX86_BUILTIN_PCMPGTQ,
27752
27753 /* AES instructions */
27754 IX86_BUILTIN_AESENC128,
27755 IX86_BUILTIN_AESENCLAST128,
27756 IX86_BUILTIN_AESDEC128,
27757 IX86_BUILTIN_AESDECLAST128,
27758 IX86_BUILTIN_AESIMC128,
27759 IX86_BUILTIN_AESKEYGENASSIST128,
27760
27761 /* PCLMUL instruction */
27762 IX86_BUILTIN_PCLMULQDQ128,
27763
27764 /* AVX */
27765 IX86_BUILTIN_ADDPD256,
27766 IX86_BUILTIN_ADDPS256,
27767 IX86_BUILTIN_ADDSUBPD256,
27768 IX86_BUILTIN_ADDSUBPS256,
27769 IX86_BUILTIN_ANDPD256,
27770 IX86_BUILTIN_ANDPS256,
27771 IX86_BUILTIN_ANDNPD256,
27772 IX86_BUILTIN_ANDNPS256,
27773 IX86_BUILTIN_BLENDPD256,
27774 IX86_BUILTIN_BLENDPS256,
27775 IX86_BUILTIN_BLENDVPD256,
27776 IX86_BUILTIN_BLENDVPS256,
27777 IX86_BUILTIN_DIVPD256,
27778 IX86_BUILTIN_DIVPS256,
27779 IX86_BUILTIN_DPPS256,
27780 IX86_BUILTIN_HADDPD256,
27781 IX86_BUILTIN_HADDPS256,
27782 IX86_BUILTIN_HSUBPD256,
27783 IX86_BUILTIN_HSUBPS256,
27784 IX86_BUILTIN_MAXPD256,
27785 IX86_BUILTIN_MAXPS256,
27786 IX86_BUILTIN_MINPD256,
27787 IX86_BUILTIN_MINPS256,
27788 IX86_BUILTIN_MULPD256,
27789 IX86_BUILTIN_MULPS256,
27790 IX86_BUILTIN_ORPD256,
27791 IX86_BUILTIN_ORPS256,
27792 IX86_BUILTIN_SHUFPD256,
27793 IX86_BUILTIN_SHUFPS256,
27794 IX86_BUILTIN_SUBPD256,
27795 IX86_BUILTIN_SUBPS256,
27796 IX86_BUILTIN_XORPD256,
27797 IX86_BUILTIN_XORPS256,
27798 IX86_BUILTIN_CMPSD,
27799 IX86_BUILTIN_CMPSS,
27800 IX86_BUILTIN_CMPPD,
27801 IX86_BUILTIN_CMPPS,
27802 IX86_BUILTIN_CMPPD256,
27803 IX86_BUILTIN_CMPPS256,
27804 IX86_BUILTIN_CVTDQ2PD256,
27805 IX86_BUILTIN_CVTDQ2PS256,
27806 IX86_BUILTIN_CVTPD2PS256,
27807 IX86_BUILTIN_CVTPS2DQ256,
27808 IX86_BUILTIN_CVTPS2PD256,
27809 IX86_BUILTIN_CVTTPD2DQ256,
27810 IX86_BUILTIN_CVTPD2DQ256,
27811 IX86_BUILTIN_CVTTPS2DQ256,
27812 IX86_BUILTIN_EXTRACTF128PD256,
27813 IX86_BUILTIN_EXTRACTF128PS256,
27814 IX86_BUILTIN_EXTRACTF128SI256,
27815 IX86_BUILTIN_VZEROALL,
27816 IX86_BUILTIN_VZEROUPPER,
27817 IX86_BUILTIN_VPERMILVARPD,
27818 IX86_BUILTIN_VPERMILVARPS,
27819 IX86_BUILTIN_VPERMILVARPD256,
27820 IX86_BUILTIN_VPERMILVARPS256,
27821 IX86_BUILTIN_VPERMILPD,
27822 IX86_BUILTIN_VPERMILPS,
27823 IX86_BUILTIN_VPERMILPD256,
27824 IX86_BUILTIN_VPERMILPS256,
27825 IX86_BUILTIN_VPERMIL2PD,
27826 IX86_BUILTIN_VPERMIL2PS,
27827 IX86_BUILTIN_VPERMIL2PD256,
27828 IX86_BUILTIN_VPERMIL2PS256,
27829 IX86_BUILTIN_VPERM2F128PD256,
27830 IX86_BUILTIN_VPERM2F128PS256,
27831 IX86_BUILTIN_VPERM2F128SI256,
27832 IX86_BUILTIN_VBROADCASTSS,
27833 IX86_BUILTIN_VBROADCASTSD256,
27834 IX86_BUILTIN_VBROADCASTSS256,
27835 IX86_BUILTIN_VBROADCASTPD256,
27836 IX86_BUILTIN_VBROADCASTPS256,
27837 IX86_BUILTIN_VINSERTF128PD256,
27838 IX86_BUILTIN_VINSERTF128PS256,
27839 IX86_BUILTIN_VINSERTF128SI256,
27840 IX86_BUILTIN_LOADUPD256,
27841 IX86_BUILTIN_LOADUPS256,
27842 IX86_BUILTIN_STOREUPD256,
27843 IX86_BUILTIN_STOREUPS256,
27844 IX86_BUILTIN_LDDQU256,
27845 IX86_BUILTIN_MOVNTDQ256,
27846 IX86_BUILTIN_MOVNTPD256,
27847 IX86_BUILTIN_MOVNTPS256,
27848 IX86_BUILTIN_LOADDQU256,
27849 IX86_BUILTIN_STOREDQU256,
27850 IX86_BUILTIN_MASKLOADPD,
27851 IX86_BUILTIN_MASKLOADPS,
27852 IX86_BUILTIN_MASKSTOREPD,
27853 IX86_BUILTIN_MASKSTOREPS,
27854 IX86_BUILTIN_MASKLOADPD256,
27855 IX86_BUILTIN_MASKLOADPS256,
27856 IX86_BUILTIN_MASKSTOREPD256,
27857 IX86_BUILTIN_MASKSTOREPS256,
27858 IX86_BUILTIN_MOVSHDUP256,
27859 IX86_BUILTIN_MOVSLDUP256,
27860 IX86_BUILTIN_MOVDDUP256,
27861
27862 IX86_BUILTIN_SQRTPD256,
27863 IX86_BUILTIN_SQRTPS256,
27864 IX86_BUILTIN_SQRTPS_NR256,
27865 IX86_BUILTIN_RSQRTPS256,
27866 IX86_BUILTIN_RSQRTPS_NR256,
27867
27868 IX86_BUILTIN_RCPPS256,
27869
27870 IX86_BUILTIN_ROUNDPD256,
27871 IX86_BUILTIN_ROUNDPS256,
27872
27873 IX86_BUILTIN_FLOORPD256,
27874 IX86_BUILTIN_CEILPD256,
27875 IX86_BUILTIN_TRUNCPD256,
27876 IX86_BUILTIN_RINTPD256,
27877 IX86_BUILTIN_ROUNDPD_AZ256,
27878
27879 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27880 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27881 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27882
27883 IX86_BUILTIN_FLOORPS256,
27884 IX86_BUILTIN_CEILPS256,
27885 IX86_BUILTIN_TRUNCPS256,
27886 IX86_BUILTIN_RINTPS256,
27887 IX86_BUILTIN_ROUNDPS_AZ256,
27888
27889 IX86_BUILTIN_FLOORPS_SFIX256,
27890 IX86_BUILTIN_CEILPS_SFIX256,
27891 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27892
27893 IX86_BUILTIN_UNPCKHPD256,
27894 IX86_BUILTIN_UNPCKLPD256,
27895 IX86_BUILTIN_UNPCKHPS256,
27896 IX86_BUILTIN_UNPCKLPS256,
27897
27898 IX86_BUILTIN_SI256_SI,
27899 IX86_BUILTIN_PS256_PS,
27900 IX86_BUILTIN_PD256_PD,
27901 IX86_BUILTIN_SI_SI256,
27902 IX86_BUILTIN_PS_PS256,
27903 IX86_BUILTIN_PD_PD256,
27904
27905 IX86_BUILTIN_VTESTZPD,
27906 IX86_BUILTIN_VTESTCPD,
27907 IX86_BUILTIN_VTESTNZCPD,
27908 IX86_BUILTIN_VTESTZPS,
27909 IX86_BUILTIN_VTESTCPS,
27910 IX86_BUILTIN_VTESTNZCPS,
27911 IX86_BUILTIN_VTESTZPD256,
27912 IX86_BUILTIN_VTESTCPD256,
27913 IX86_BUILTIN_VTESTNZCPD256,
27914 IX86_BUILTIN_VTESTZPS256,
27915 IX86_BUILTIN_VTESTCPS256,
27916 IX86_BUILTIN_VTESTNZCPS256,
27917 IX86_BUILTIN_PTESTZ256,
27918 IX86_BUILTIN_PTESTC256,
27919 IX86_BUILTIN_PTESTNZC256,
27920
27921 IX86_BUILTIN_MOVMSKPD256,
27922 IX86_BUILTIN_MOVMSKPS256,
27923
27924 /* AVX2 */
27925 IX86_BUILTIN_MPSADBW256,
27926 IX86_BUILTIN_PABSB256,
27927 IX86_BUILTIN_PABSW256,
27928 IX86_BUILTIN_PABSD256,
27929 IX86_BUILTIN_PACKSSDW256,
27930 IX86_BUILTIN_PACKSSWB256,
27931 IX86_BUILTIN_PACKUSDW256,
27932 IX86_BUILTIN_PACKUSWB256,
27933 IX86_BUILTIN_PADDB256,
27934 IX86_BUILTIN_PADDW256,
27935 IX86_BUILTIN_PADDD256,
27936 IX86_BUILTIN_PADDQ256,
27937 IX86_BUILTIN_PADDSB256,
27938 IX86_BUILTIN_PADDSW256,
27939 IX86_BUILTIN_PADDUSB256,
27940 IX86_BUILTIN_PADDUSW256,
27941 IX86_BUILTIN_PALIGNR256,
27942 IX86_BUILTIN_AND256I,
27943 IX86_BUILTIN_ANDNOT256I,
27944 IX86_BUILTIN_PAVGB256,
27945 IX86_BUILTIN_PAVGW256,
27946 IX86_BUILTIN_PBLENDVB256,
27947 IX86_BUILTIN_PBLENDVW256,
27948 IX86_BUILTIN_PCMPEQB256,
27949 IX86_BUILTIN_PCMPEQW256,
27950 IX86_BUILTIN_PCMPEQD256,
27951 IX86_BUILTIN_PCMPEQQ256,
27952 IX86_BUILTIN_PCMPGTB256,
27953 IX86_BUILTIN_PCMPGTW256,
27954 IX86_BUILTIN_PCMPGTD256,
27955 IX86_BUILTIN_PCMPGTQ256,
27956 IX86_BUILTIN_PHADDW256,
27957 IX86_BUILTIN_PHADDD256,
27958 IX86_BUILTIN_PHADDSW256,
27959 IX86_BUILTIN_PHSUBW256,
27960 IX86_BUILTIN_PHSUBD256,
27961 IX86_BUILTIN_PHSUBSW256,
27962 IX86_BUILTIN_PMADDUBSW256,
27963 IX86_BUILTIN_PMADDWD256,
27964 IX86_BUILTIN_PMAXSB256,
27965 IX86_BUILTIN_PMAXSW256,
27966 IX86_BUILTIN_PMAXSD256,
27967 IX86_BUILTIN_PMAXUB256,
27968 IX86_BUILTIN_PMAXUW256,
27969 IX86_BUILTIN_PMAXUD256,
27970 IX86_BUILTIN_PMINSB256,
27971 IX86_BUILTIN_PMINSW256,
27972 IX86_BUILTIN_PMINSD256,
27973 IX86_BUILTIN_PMINUB256,
27974 IX86_BUILTIN_PMINUW256,
27975 IX86_BUILTIN_PMINUD256,
27976 IX86_BUILTIN_PMOVMSKB256,
27977 IX86_BUILTIN_PMOVSXBW256,
27978 IX86_BUILTIN_PMOVSXBD256,
27979 IX86_BUILTIN_PMOVSXBQ256,
27980 IX86_BUILTIN_PMOVSXWD256,
27981 IX86_BUILTIN_PMOVSXWQ256,
27982 IX86_BUILTIN_PMOVSXDQ256,
27983 IX86_BUILTIN_PMOVZXBW256,
27984 IX86_BUILTIN_PMOVZXBD256,
27985 IX86_BUILTIN_PMOVZXBQ256,
27986 IX86_BUILTIN_PMOVZXWD256,
27987 IX86_BUILTIN_PMOVZXWQ256,
27988 IX86_BUILTIN_PMOVZXDQ256,
27989 IX86_BUILTIN_PMULDQ256,
27990 IX86_BUILTIN_PMULHRSW256,
27991 IX86_BUILTIN_PMULHUW256,
27992 IX86_BUILTIN_PMULHW256,
27993 IX86_BUILTIN_PMULLW256,
27994 IX86_BUILTIN_PMULLD256,
27995 IX86_BUILTIN_PMULUDQ256,
27996 IX86_BUILTIN_POR256,
27997 IX86_BUILTIN_PSADBW256,
27998 IX86_BUILTIN_PSHUFB256,
27999 IX86_BUILTIN_PSHUFD256,
28000 IX86_BUILTIN_PSHUFHW256,
28001 IX86_BUILTIN_PSHUFLW256,
28002 IX86_BUILTIN_PSIGNB256,
28003 IX86_BUILTIN_PSIGNW256,
28004 IX86_BUILTIN_PSIGND256,
28005 IX86_BUILTIN_PSLLDQI256,
28006 IX86_BUILTIN_PSLLWI256,
28007 IX86_BUILTIN_PSLLW256,
28008 IX86_BUILTIN_PSLLDI256,
28009 IX86_BUILTIN_PSLLD256,
28010 IX86_BUILTIN_PSLLQI256,
28011 IX86_BUILTIN_PSLLQ256,
28012 IX86_BUILTIN_PSRAWI256,
28013 IX86_BUILTIN_PSRAW256,
28014 IX86_BUILTIN_PSRADI256,
28015 IX86_BUILTIN_PSRAD256,
28016 IX86_BUILTIN_PSRLDQI256,
28017 IX86_BUILTIN_PSRLWI256,
28018 IX86_BUILTIN_PSRLW256,
28019 IX86_BUILTIN_PSRLDI256,
28020 IX86_BUILTIN_PSRLD256,
28021 IX86_BUILTIN_PSRLQI256,
28022 IX86_BUILTIN_PSRLQ256,
28023 IX86_BUILTIN_PSUBB256,
28024 IX86_BUILTIN_PSUBW256,
28025 IX86_BUILTIN_PSUBD256,
28026 IX86_BUILTIN_PSUBQ256,
28027 IX86_BUILTIN_PSUBSB256,
28028 IX86_BUILTIN_PSUBSW256,
28029 IX86_BUILTIN_PSUBUSB256,
28030 IX86_BUILTIN_PSUBUSW256,
28031 IX86_BUILTIN_PUNPCKHBW256,
28032 IX86_BUILTIN_PUNPCKHWD256,
28033 IX86_BUILTIN_PUNPCKHDQ256,
28034 IX86_BUILTIN_PUNPCKHQDQ256,
28035 IX86_BUILTIN_PUNPCKLBW256,
28036 IX86_BUILTIN_PUNPCKLWD256,
28037 IX86_BUILTIN_PUNPCKLDQ256,
28038 IX86_BUILTIN_PUNPCKLQDQ256,
28039 IX86_BUILTIN_PXOR256,
28040 IX86_BUILTIN_MOVNTDQA256,
28041 IX86_BUILTIN_VBROADCASTSS_PS,
28042 IX86_BUILTIN_VBROADCASTSS_PS256,
28043 IX86_BUILTIN_VBROADCASTSD_PD256,
28044 IX86_BUILTIN_VBROADCASTSI256,
28045 IX86_BUILTIN_PBLENDD256,
28046 IX86_BUILTIN_PBLENDD128,
28047 IX86_BUILTIN_PBROADCASTB256,
28048 IX86_BUILTIN_PBROADCASTW256,
28049 IX86_BUILTIN_PBROADCASTD256,
28050 IX86_BUILTIN_PBROADCASTQ256,
28051 IX86_BUILTIN_PBROADCASTB128,
28052 IX86_BUILTIN_PBROADCASTW128,
28053 IX86_BUILTIN_PBROADCASTD128,
28054 IX86_BUILTIN_PBROADCASTQ128,
28055 IX86_BUILTIN_VPERMVARSI256,
28056 IX86_BUILTIN_VPERMDF256,
28057 IX86_BUILTIN_VPERMVARSF256,
28058 IX86_BUILTIN_VPERMDI256,
28059 IX86_BUILTIN_VPERMTI256,
28060 IX86_BUILTIN_VEXTRACT128I256,
28061 IX86_BUILTIN_VINSERT128I256,
28062 IX86_BUILTIN_MASKLOADD,
28063 IX86_BUILTIN_MASKLOADQ,
28064 IX86_BUILTIN_MASKLOADD256,
28065 IX86_BUILTIN_MASKLOADQ256,
28066 IX86_BUILTIN_MASKSTORED,
28067 IX86_BUILTIN_MASKSTOREQ,
28068 IX86_BUILTIN_MASKSTORED256,
28069 IX86_BUILTIN_MASKSTOREQ256,
28070 IX86_BUILTIN_PSLLVV4DI,
28071 IX86_BUILTIN_PSLLVV2DI,
28072 IX86_BUILTIN_PSLLVV8SI,
28073 IX86_BUILTIN_PSLLVV4SI,
28074 IX86_BUILTIN_PSRAVV8SI,
28075 IX86_BUILTIN_PSRAVV4SI,
28076 IX86_BUILTIN_PSRLVV4DI,
28077 IX86_BUILTIN_PSRLVV2DI,
28078 IX86_BUILTIN_PSRLVV8SI,
28079 IX86_BUILTIN_PSRLVV4SI,
28080
28081 IX86_BUILTIN_GATHERSIV2DF,
28082 IX86_BUILTIN_GATHERSIV4DF,
28083 IX86_BUILTIN_GATHERDIV2DF,
28084 IX86_BUILTIN_GATHERDIV4DF,
28085 IX86_BUILTIN_GATHERSIV4SF,
28086 IX86_BUILTIN_GATHERSIV8SF,
28087 IX86_BUILTIN_GATHERDIV4SF,
28088 IX86_BUILTIN_GATHERDIV8SF,
28089 IX86_BUILTIN_GATHERSIV2DI,
28090 IX86_BUILTIN_GATHERSIV4DI,
28091 IX86_BUILTIN_GATHERDIV2DI,
28092 IX86_BUILTIN_GATHERDIV4DI,
28093 IX86_BUILTIN_GATHERSIV4SI,
28094 IX86_BUILTIN_GATHERSIV8SI,
28095 IX86_BUILTIN_GATHERDIV4SI,
28096 IX86_BUILTIN_GATHERDIV8SI,
28097
28098 /* AVX512F */
28099 IX86_BUILTIN_SI512_SI256,
28100 IX86_BUILTIN_PD512_PD256,
28101 IX86_BUILTIN_PS512_PS256,
28102 IX86_BUILTIN_SI512_SI,
28103 IX86_BUILTIN_PD512_PD,
28104 IX86_BUILTIN_PS512_PS,
28105 IX86_BUILTIN_ADDPD512,
28106 IX86_BUILTIN_ADDPS512,
28107 IX86_BUILTIN_ADDSD_ROUND,
28108 IX86_BUILTIN_ADDSS_ROUND,
28109 IX86_BUILTIN_ALIGND512,
28110 IX86_BUILTIN_ALIGNQ512,
28111 IX86_BUILTIN_BLENDMD512,
28112 IX86_BUILTIN_BLENDMPD512,
28113 IX86_BUILTIN_BLENDMPS512,
28114 IX86_BUILTIN_BLENDMQ512,
28115 IX86_BUILTIN_BROADCASTF32X4_512,
28116 IX86_BUILTIN_BROADCASTF64X4_512,
28117 IX86_BUILTIN_BROADCASTI32X4_512,
28118 IX86_BUILTIN_BROADCASTI64X4_512,
28119 IX86_BUILTIN_BROADCASTSD512,
28120 IX86_BUILTIN_BROADCASTSS512,
28121 IX86_BUILTIN_CMPD512,
28122 IX86_BUILTIN_CMPPD512,
28123 IX86_BUILTIN_CMPPS512,
28124 IX86_BUILTIN_CMPQ512,
28125 IX86_BUILTIN_CMPSD_MASK,
28126 IX86_BUILTIN_CMPSS_MASK,
28127 IX86_BUILTIN_COMIDF,
28128 IX86_BUILTIN_COMISF,
28129 IX86_BUILTIN_COMPRESSPD512,
28130 IX86_BUILTIN_COMPRESSPDSTORE512,
28131 IX86_BUILTIN_COMPRESSPS512,
28132 IX86_BUILTIN_COMPRESSPSSTORE512,
28133 IX86_BUILTIN_CVTDQ2PD512,
28134 IX86_BUILTIN_CVTDQ2PS512,
28135 IX86_BUILTIN_CVTPD2DQ512,
28136 IX86_BUILTIN_CVTPD2PS512,
28137 IX86_BUILTIN_CVTPD2UDQ512,
28138 IX86_BUILTIN_CVTPH2PS512,
28139 IX86_BUILTIN_CVTPS2DQ512,
28140 IX86_BUILTIN_CVTPS2PD512,
28141 IX86_BUILTIN_CVTPS2PH512,
28142 IX86_BUILTIN_CVTPS2UDQ512,
28143 IX86_BUILTIN_CVTSD2SS_ROUND,
28144 IX86_BUILTIN_CVTSI2SD64,
28145 IX86_BUILTIN_CVTSI2SS32,
28146 IX86_BUILTIN_CVTSI2SS64,
28147 IX86_BUILTIN_CVTSS2SD_ROUND,
28148 IX86_BUILTIN_CVTTPD2DQ512,
28149 IX86_BUILTIN_CVTTPD2UDQ512,
28150 IX86_BUILTIN_CVTTPS2DQ512,
28151 IX86_BUILTIN_CVTTPS2UDQ512,
28152 IX86_BUILTIN_CVTUDQ2PD512,
28153 IX86_BUILTIN_CVTUDQ2PS512,
28154 IX86_BUILTIN_CVTUSI2SD32,
28155 IX86_BUILTIN_CVTUSI2SD64,
28156 IX86_BUILTIN_CVTUSI2SS32,
28157 IX86_BUILTIN_CVTUSI2SS64,
28158 IX86_BUILTIN_DIVPD512,
28159 IX86_BUILTIN_DIVPS512,
28160 IX86_BUILTIN_DIVSD_ROUND,
28161 IX86_BUILTIN_DIVSS_ROUND,
28162 IX86_BUILTIN_EXPANDPD512,
28163 IX86_BUILTIN_EXPANDPD512Z,
28164 IX86_BUILTIN_EXPANDPDLOAD512,
28165 IX86_BUILTIN_EXPANDPDLOAD512Z,
28166 IX86_BUILTIN_EXPANDPS512,
28167 IX86_BUILTIN_EXPANDPS512Z,
28168 IX86_BUILTIN_EXPANDPSLOAD512,
28169 IX86_BUILTIN_EXPANDPSLOAD512Z,
28170 IX86_BUILTIN_EXTRACTF32X4,
28171 IX86_BUILTIN_EXTRACTF64X4,
28172 IX86_BUILTIN_EXTRACTI32X4,
28173 IX86_BUILTIN_EXTRACTI64X4,
28174 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28175 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28176 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28177 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28178 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28179 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28180 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28181 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28182 IX86_BUILTIN_GETEXPPD512,
28183 IX86_BUILTIN_GETEXPPS512,
28184 IX86_BUILTIN_GETEXPSD128,
28185 IX86_BUILTIN_GETEXPSS128,
28186 IX86_BUILTIN_GETMANTPD512,
28187 IX86_BUILTIN_GETMANTPS512,
28188 IX86_BUILTIN_GETMANTSD128,
28189 IX86_BUILTIN_GETMANTSS128,
28190 IX86_BUILTIN_INSERTF32X4,
28191 IX86_BUILTIN_INSERTF64X4,
28192 IX86_BUILTIN_INSERTI32X4,
28193 IX86_BUILTIN_INSERTI64X4,
28194 IX86_BUILTIN_LOADAPD512,
28195 IX86_BUILTIN_LOADAPS512,
28196 IX86_BUILTIN_LOADDQUDI512,
28197 IX86_BUILTIN_LOADDQUSI512,
28198 IX86_BUILTIN_LOADUPD512,
28199 IX86_BUILTIN_LOADUPS512,
28200 IX86_BUILTIN_MAXPD512,
28201 IX86_BUILTIN_MAXPS512,
28202 IX86_BUILTIN_MAXSD_ROUND,
28203 IX86_BUILTIN_MAXSS_ROUND,
28204 IX86_BUILTIN_MINPD512,
28205 IX86_BUILTIN_MINPS512,
28206 IX86_BUILTIN_MINSD_ROUND,
28207 IX86_BUILTIN_MINSS_ROUND,
28208 IX86_BUILTIN_MOVAPD512,
28209 IX86_BUILTIN_MOVAPS512,
28210 IX86_BUILTIN_MOVDDUP512,
28211 IX86_BUILTIN_MOVDQA32LOAD512,
28212 IX86_BUILTIN_MOVDQA32STORE512,
28213 IX86_BUILTIN_MOVDQA32_512,
28214 IX86_BUILTIN_MOVDQA64LOAD512,
28215 IX86_BUILTIN_MOVDQA64STORE512,
28216 IX86_BUILTIN_MOVDQA64_512,
28217 IX86_BUILTIN_MOVNTDQ512,
28218 IX86_BUILTIN_MOVNTDQA512,
28219 IX86_BUILTIN_MOVNTPD512,
28220 IX86_BUILTIN_MOVNTPS512,
28221 IX86_BUILTIN_MOVSHDUP512,
28222 IX86_BUILTIN_MOVSLDUP512,
28223 IX86_BUILTIN_MULPD512,
28224 IX86_BUILTIN_MULPS512,
28225 IX86_BUILTIN_MULSD_ROUND,
28226 IX86_BUILTIN_MULSS_ROUND,
28227 IX86_BUILTIN_PABSD512,
28228 IX86_BUILTIN_PABSQ512,
28229 IX86_BUILTIN_PADDD512,
28230 IX86_BUILTIN_PADDQ512,
28231 IX86_BUILTIN_PANDD512,
28232 IX86_BUILTIN_PANDND512,
28233 IX86_BUILTIN_PANDNQ512,
28234 IX86_BUILTIN_PANDQ512,
28235 IX86_BUILTIN_PBROADCASTD512,
28236 IX86_BUILTIN_PBROADCASTD512_GPR,
28237 IX86_BUILTIN_PBROADCASTMB512,
28238 IX86_BUILTIN_PBROADCASTMW512,
28239 IX86_BUILTIN_PBROADCASTQ512,
28240 IX86_BUILTIN_PBROADCASTQ512_GPR,
28241 IX86_BUILTIN_PBROADCASTQ512_MEM,
28242 IX86_BUILTIN_PCMPEQD512_MASK,
28243 IX86_BUILTIN_PCMPEQQ512_MASK,
28244 IX86_BUILTIN_PCMPGTD512_MASK,
28245 IX86_BUILTIN_PCMPGTQ512_MASK,
28246 IX86_BUILTIN_PCOMPRESSD512,
28247 IX86_BUILTIN_PCOMPRESSDSTORE512,
28248 IX86_BUILTIN_PCOMPRESSQ512,
28249 IX86_BUILTIN_PCOMPRESSQSTORE512,
28250 IX86_BUILTIN_PEXPANDD512,
28251 IX86_BUILTIN_PEXPANDD512Z,
28252 IX86_BUILTIN_PEXPANDDLOAD512,
28253 IX86_BUILTIN_PEXPANDDLOAD512Z,
28254 IX86_BUILTIN_PEXPANDQ512,
28255 IX86_BUILTIN_PEXPANDQ512Z,
28256 IX86_BUILTIN_PEXPANDQLOAD512,
28257 IX86_BUILTIN_PEXPANDQLOAD512Z,
28258 IX86_BUILTIN_PMAXSD512,
28259 IX86_BUILTIN_PMAXSQ512,
28260 IX86_BUILTIN_PMAXUD512,
28261 IX86_BUILTIN_PMAXUQ512,
28262 IX86_BUILTIN_PMINSD512,
28263 IX86_BUILTIN_PMINSQ512,
28264 IX86_BUILTIN_PMINUD512,
28265 IX86_BUILTIN_PMINUQ512,
28266 IX86_BUILTIN_PMOVDB512,
28267 IX86_BUILTIN_PMOVDB512_MEM,
28268 IX86_BUILTIN_PMOVDW512,
28269 IX86_BUILTIN_PMOVDW512_MEM,
28270 IX86_BUILTIN_PMOVQB512,
28271 IX86_BUILTIN_PMOVQB512_MEM,
28272 IX86_BUILTIN_PMOVQD512,
28273 IX86_BUILTIN_PMOVQD512_MEM,
28274 IX86_BUILTIN_PMOVQW512,
28275 IX86_BUILTIN_PMOVQW512_MEM,
28276 IX86_BUILTIN_PMOVSDB512,
28277 IX86_BUILTIN_PMOVSDB512_MEM,
28278 IX86_BUILTIN_PMOVSDW512,
28279 IX86_BUILTIN_PMOVSDW512_MEM,
28280 IX86_BUILTIN_PMOVSQB512,
28281 IX86_BUILTIN_PMOVSQB512_MEM,
28282 IX86_BUILTIN_PMOVSQD512,
28283 IX86_BUILTIN_PMOVSQD512_MEM,
28284 IX86_BUILTIN_PMOVSQW512,
28285 IX86_BUILTIN_PMOVSQW512_MEM,
28286 IX86_BUILTIN_PMOVSXBD512,
28287 IX86_BUILTIN_PMOVSXBQ512,
28288 IX86_BUILTIN_PMOVSXDQ512,
28289 IX86_BUILTIN_PMOVSXWD512,
28290 IX86_BUILTIN_PMOVSXWQ512,
28291 IX86_BUILTIN_PMOVUSDB512,
28292 IX86_BUILTIN_PMOVUSDB512_MEM,
28293 IX86_BUILTIN_PMOVUSDW512,
28294 IX86_BUILTIN_PMOVUSDW512_MEM,
28295 IX86_BUILTIN_PMOVUSQB512,
28296 IX86_BUILTIN_PMOVUSQB512_MEM,
28297 IX86_BUILTIN_PMOVUSQD512,
28298 IX86_BUILTIN_PMOVUSQD512_MEM,
28299 IX86_BUILTIN_PMOVUSQW512,
28300 IX86_BUILTIN_PMOVUSQW512_MEM,
28301 IX86_BUILTIN_PMOVZXBD512,
28302 IX86_BUILTIN_PMOVZXBQ512,
28303 IX86_BUILTIN_PMOVZXDQ512,
28304 IX86_BUILTIN_PMOVZXWD512,
28305 IX86_BUILTIN_PMOVZXWQ512,
28306 IX86_BUILTIN_PMULDQ512,
28307 IX86_BUILTIN_PMULLD512,
28308 IX86_BUILTIN_PMULUDQ512,
28309 IX86_BUILTIN_PORD512,
28310 IX86_BUILTIN_PORQ512,
28311 IX86_BUILTIN_PROLD512,
28312 IX86_BUILTIN_PROLQ512,
28313 IX86_BUILTIN_PROLVD512,
28314 IX86_BUILTIN_PROLVQ512,
28315 IX86_BUILTIN_PRORD512,
28316 IX86_BUILTIN_PRORQ512,
28317 IX86_BUILTIN_PRORVD512,
28318 IX86_BUILTIN_PRORVQ512,
28319 IX86_BUILTIN_PSHUFD512,
28320 IX86_BUILTIN_PSLLD512,
28321 IX86_BUILTIN_PSLLDI512,
28322 IX86_BUILTIN_PSLLQ512,
28323 IX86_BUILTIN_PSLLQI512,
28324 IX86_BUILTIN_PSLLVV16SI,
28325 IX86_BUILTIN_PSLLVV8DI,
28326 IX86_BUILTIN_PSRAD512,
28327 IX86_BUILTIN_PSRADI512,
28328 IX86_BUILTIN_PSRAQ512,
28329 IX86_BUILTIN_PSRAQI512,
28330 IX86_BUILTIN_PSRAVV16SI,
28331 IX86_BUILTIN_PSRAVV8DI,
28332 IX86_BUILTIN_PSRLD512,
28333 IX86_BUILTIN_PSRLDI512,
28334 IX86_BUILTIN_PSRLQ512,
28335 IX86_BUILTIN_PSRLQI512,
28336 IX86_BUILTIN_PSRLVV16SI,
28337 IX86_BUILTIN_PSRLVV8DI,
28338 IX86_BUILTIN_PSUBD512,
28339 IX86_BUILTIN_PSUBQ512,
28340 IX86_BUILTIN_PTESTMD512,
28341 IX86_BUILTIN_PTESTMQ512,
28342 IX86_BUILTIN_PTESTNMD512,
28343 IX86_BUILTIN_PTESTNMQ512,
28344 IX86_BUILTIN_PUNPCKHDQ512,
28345 IX86_BUILTIN_PUNPCKHQDQ512,
28346 IX86_BUILTIN_PUNPCKLDQ512,
28347 IX86_BUILTIN_PUNPCKLQDQ512,
28348 IX86_BUILTIN_PXORD512,
28349 IX86_BUILTIN_PXORQ512,
28350 IX86_BUILTIN_RCP14PD512,
28351 IX86_BUILTIN_RCP14PS512,
28352 IX86_BUILTIN_RCP14SD,
28353 IX86_BUILTIN_RCP14SS,
28354 IX86_BUILTIN_RNDSCALEPD,
28355 IX86_BUILTIN_RNDSCALEPS,
28356 IX86_BUILTIN_RNDSCALESD,
28357 IX86_BUILTIN_RNDSCALESS,
28358 IX86_BUILTIN_RSQRT14PD512,
28359 IX86_BUILTIN_RSQRT14PS512,
28360 IX86_BUILTIN_RSQRT14SD,
28361 IX86_BUILTIN_RSQRT14SS,
28362 IX86_BUILTIN_SCALEFPD512,
28363 IX86_BUILTIN_SCALEFPS512,
28364 IX86_BUILTIN_SCALEFSD,
28365 IX86_BUILTIN_SCALEFSS,
28366 IX86_BUILTIN_SHUFPD512,
28367 IX86_BUILTIN_SHUFPS512,
28368 IX86_BUILTIN_SHUF_F32x4,
28369 IX86_BUILTIN_SHUF_F64x2,
28370 IX86_BUILTIN_SHUF_I32x4,
28371 IX86_BUILTIN_SHUF_I64x2,
28372 IX86_BUILTIN_SQRTPD512,
28373 IX86_BUILTIN_SQRTPD512_MASK,
28374 IX86_BUILTIN_SQRTPS512_MASK,
28375 IX86_BUILTIN_SQRTPS_NR512,
28376 IX86_BUILTIN_SQRTSD_ROUND,
28377 IX86_BUILTIN_SQRTSS_ROUND,
28378 IX86_BUILTIN_STOREAPD512,
28379 IX86_BUILTIN_STOREAPS512,
28380 IX86_BUILTIN_STOREDQUDI512,
28381 IX86_BUILTIN_STOREDQUSI512,
28382 IX86_BUILTIN_STOREUPD512,
28383 IX86_BUILTIN_STOREUPS512,
28384 IX86_BUILTIN_SUBPD512,
28385 IX86_BUILTIN_SUBPS512,
28386 IX86_BUILTIN_SUBSD_ROUND,
28387 IX86_BUILTIN_SUBSS_ROUND,
28388 IX86_BUILTIN_UCMPD512,
28389 IX86_BUILTIN_UCMPQ512,
28390 IX86_BUILTIN_UNPCKHPD512,
28391 IX86_BUILTIN_UNPCKHPS512,
28392 IX86_BUILTIN_UNPCKLPD512,
28393 IX86_BUILTIN_UNPCKLPS512,
28394 IX86_BUILTIN_VCVTSD2SI32,
28395 IX86_BUILTIN_VCVTSD2SI64,
28396 IX86_BUILTIN_VCVTSD2USI32,
28397 IX86_BUILTIN_VCVTSD2USI64,
28398 IX86_BUILTIN_VCVTSS2SI32,
28399 IX86_BUILTIN_VCVTSS2SI64,
28400 IX86_BUILTIN_VCVTSS2USI32,
28401 IX86_BUILTIN_VCVTSS2USI64,
28402 IX86_BUILTIN_VCVTTSD2SI32,
28403 IX86_BUILTIN_VCVTTSD2SI64,
28404 IX86_BUILTIN_VCVTTSD2USI32,
28405 IX86_BUILTIN_VCVTTSD2USI64,
28406 IX86_BUILTIN_VCVTTSS2SI32,
28407 IX86_BUILTIN_VCVTTSS2SI64,
28408 IX86_BUILTIN_VCVTTSS2USI32,
28409 IX86_BUILTIN_VCVTTSS2USI64,
28410 IX86_BUILTIN_VFMADDPD512_MASK,
28411 IX86_BUILTIN_VFMADDPD512_MASK3,
28412 IX86_BUILTIN_VFMADDPD512_MASKZ,
28413 IX86_BUILTIN_VFMADDPS512_MASK,
28414 IX86_BUILTIN_VFMADDPS512_MASK3,
28415 IX86_BUILTIN_VFMADDPS512_MASKZ,
28416 IX86_BUILTIN_VFMADDSD3_ROUND,
28417 IX86_BUILTIN_VFMADDSS3_ROUND,
28418 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28419 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28420 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28421 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28422 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28423 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28424 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28425 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28426 IX86_BUILTIN_VFMSUBPD512_MASK3,
28427 IX86_BUILTIN_VFMSUBPS512_MASK3,
28428 IX86_BUILTIN_VFMSUBSD3_MASK3,
28429 IX86_BUILTIN_VFMSUBSS3_MASK3,
28430 IX86_BUILTIN_VFNMADDPD512_MASK,
28431 IX86_BUILTIN_VFNMADDPS512_MASK,
28432 IX86_BUILTIN_VFNMSUBPD512_MASK,
28433 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28434 IX86_BUILTIN_VFNMSUBPS512_MASK,
28435 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28436 IX86_BUILTIN_VPCLZCNTD512,
28437 IX86_BUILTIN_VPCLZCNTQ512,
28438 IX86_BUILTIN_VPCONFLICTD512,
28439 IX86_BUILTIN_VPCONFLICTQ512,
28440 IX86_BUILTIN_VPERMDF512,
28441 IX86_BUILTIN_VPERMDI512,
28442 IX86_BUILTIN_VPERMI2VARD512,
28443 IX86_BUILTIN_VPERMI2VARPD512,
28444 IX86_BUILTIN_VPERMI2VARPS512,
28445 IX86_BUILTIN_VPERMI2VARQ512,
28446 IX86_BUILTIN_VPERMILPD512,
28447 IX86_BUILTIN_VPERMILPS512,
28448 IX86_BUILTIN_VPERMILVARPD512,
28449 IX86_BUILTIN_VPERMILVARPS512,
28450 IX86_BUILTIN_VPERMT2VARD512,
28451 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28452 IX86_BUILTIN_VPERMT2VARPD512,
28453 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28454 IX86_BUILTIN_VPERMT2VARPS512,
28455 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28456 IX86_BUILTIN_VPERMT2VARQ512,
28457 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28458 IX86_BUILTIN_VPERMVARDF512,
28459 IX86_BUILTIN_VPERMVARDI512,
28460 IX86_BUILTIN_VPERMVARSF512,
28461 IX86_BUILTIN_VPERMVARSI512,
28462 IX86_BUILTIN_VTERNLOGD512_MASK,
28463 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28464 IX86_BUILTIN_VTERNLOGQ512_MASK,
28465 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28466
28467 /* Mask arithmetic operations */
28468 IX86_BUILTIN_KAND16,
28469 IX86_BUILTIN_KANDN16,
28470 IX86_BUILTIN_KNOT16,
28471 IX86_BUILTIN_KOR16,
28472 IX86_BUILTIN_KORTESTC16,
28473 IX86_BUILTIN_KORTESTZ16,
28474 IX86_BUILTIN_KUNPCKBW,
28475 IX86_BUILTIN_KXNOR16,
28476 IX86_BUILTIN_KXOR16,
28477 IX86_BUILTIN_KMOV16,
28478
28479 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28480 where all operands are 32-byte or 64-byte wide respectively. */
28481 IX86_BUILTIN_GATHERALTSIV4DF,
28482 IX86_BUILTIN_GATHERALTDIV8SF,
28483 IX86_BUILTIN_GATHERALTSIV4DI,
28484 IX86_BUILTIN_GATHERALTDIV8SI,
28485 IX86_BUILTIN_GATHER3ALTDIV16SF,
28486 IX86_BUILTIN_GATHER3ALTDIV16SI,
28487 IX86_BUILTIN_GATHER3ALTSIV8DF,
28488 IX86_BUILTIN_GATHER3ALTSIV8DI,
28489 IX86_BUILTIN_GATHER3DIV16SF,
28490 IX86_BUILTIN_GATHER3DIV16SI,
28491 IX86_BUILTIN_GATHER3DIV8DF,
28492 IX86_BUILTIN_GATHER3DIV8DI,
28493 IX86_BUILTIN_GATHER3SIV16SF,
28494 IX86_BUILTIN_GATHER3SIV16SI,
28495 IX86_BUILTIN_GATHER3SIV8DF,
28496 IX86_BUILTIN_GATHER3SIV8DI,
28497 IX86_BUILTIN_SCATTERDIV16SF,
28498 IX86_BUILTIN_SCATTERDIV16SI,
28499 IX86_BUILTIN_SCATTERDIV8DF,
28500 IX86_BUILTIN_SCATTERDIV8DI,
28501 IX86_BUILTIN_SCATTERSIV16SF,
28502 IX86_BUILTIN_SCATTERSIV16SI,
28503 IX86_BUILTIN_SCATTERSIV8DF,
28504 IX86_BUILTIN_SCATTERSIV8DI,
28505
28506 /* AVX512PF */
28507 IX86_BUILTIN_GATHERPFQPD,
28508 IX86_BUILTIN_GATHERPFDPS,
28509 IX86_BUILTIN_GATHERPFDPD,
28510 IX86_BUILTIN_GATHERPFQPS,
28511 IX86_BUILTIN_SCATTERPFDPD,
28512 IX86_BUILTIN_SCATTERPFDPS,
28513 IX86_BUILTIN_SCATTERPFQPD,
28514 IX86_BUILTIN_SCATTERPFQPS,
28515
28516 /* AVX-512ER */
28517 IX86_BUILTIN_EXP2PD_MASK,
28518 IX86_BUILTIN_EXP2PS_MASK,
28519 IX86_BUILTIN_EXP2PS,
28520 IX86_BUILTIN_RCP28PD,
28521 IX86_BUILTIN_RCP28PS,
28522 IX86_BUILTIN_RCP28SD,
28523 IX86_BUILTIN_RCP28SS,
28524 IX86_BUILTIN_RSQRT28PD,
28525 IX86_BUILTIN_RSQRT28PS,
28526 IX86_BUILTIN_RSQRT28SD,
28527 IX86_BUILTIN_RSQRT28SS,
28528
28529 /* SHA builtins. */
28530 IX86_BUILTIN_SHA1MSG1,
28531 IX86_BUILTIN_SHA1MSG2,
28532 IX86_BUILTIN_SHA1NEXTE,
28533 IX86_BUILTIN_SHA1RNDS4,
28534 IX86_BUILTIN_SHA256MSG1,
28535 IX86_BUILTIN_SHA256MSG2,
28536 IX86_BUILTIN_SHA256RNDS2,
28537
28538 /* CLFLUSHOPT instructions. */
28539 IX86_BUILTIN_CLFLUSHOPT,
28540
28541 /* TFmode support builtins. */
28542 IX86_BUILTIN_INFQ,
28543 IX86_BUILTIN_HUGE_VALQ,
28544 IX86_BUILTIN_FABSQ,
28545 IX86_BUILTIN_COPYSIGNQ,
28546
28547 /* Vectorizer support builtins. */
28548 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28549 IX86_BUILTIN_CPYSGNPS,
28550 IX86_BUILTIN_CPYSGNPD,
28551 IX86_BUILTIN_CPYSGNPS256,
28552 IX86_BUILTIN_CPYSGNPS512,
28553 IX86_BUILTIN_CPYSGNPD256,
28554 IX86_BUILTIN_CPYSGNPD512,
28555 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28556 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28557
28558
28559 /* FMA4 instructions. */
28560 IX86_BUILTIN_VFMADDSS,
28561 IX86_BUILTIN_VFMADDSD,
28562 IX86_BUILTIN_VFMADDPS,
28563 IX86_BUILTIN_VFMADDPD,
28564 IX86_BUILTIN_VFMADDPS256,
28565 IX86_BUILTIN_VFMADDPD256,
28566 IX86_BUILTIN_VFMADDSUBPS,
28567 IX86_BUILTIN_VFMADDSUBPD,
28568 IX86_BUILTIN_VFMADDSUBPS256,
28569 IX86_BUILTIN_VFMADDSUBPD256,
28570
28571 /* FMA3 instructions. */
28572 IX86_BUILTIN_VFMADDSS3,
28573 IX86_BUILTIN_VFMADDSD3,
28574
28575 /* XOP instructions. */
28576 IX86_BUILTIN_VPCMOV,
28577 IX86_BUILTIN_VPCMOV_V2DI,
28578 IX86_BUILTIN_VPCMOV_V4SI,
28579 IX86_BUILTIN_VPCMOV_V8HI,
28580 IX86_BUILTIN_VPCMOV_V16QI,
28581 IX86_BUILTIN_VPCMOV_V4SF,
28582 IX86_BUILTIN_VPCMOV_V2DF,
28583 IX86_BUILTIN_VPCMOV256,
28584 IX86_BUILTIN_VPCMOV_V4DI256,
28585 IX86_BUILTIN_VPCMOV_V8SI256,
28586 IX86_BUILTIN_VPCMOV_V16HI256,
28587 IX86_BUILTIN_VPCMOV_V32QI256,
28588 IX86_BUILTIN_VPCMOV_V8SF256,
28589 IX86_BUILTIN_VPCMOV_V4DF256,
28590
28591 IX86_BUILTIN_VPPERM,
28592
28593 IX86_BUILTIN_VPMACSSWW,
28594 IX86_BUILTIN_VPMACSWW,
28595 IX86_BUILTIN_VPMACSSWD,
28596 IX86_BUILTIN_VPMACSWD,
28597 IX86_BUILTIN_VPMACSSDD,
28598 IX86_BUILTIN_VPMACSDD,
28599 IX86_BUILTIN_VPMACSSDQL,
28600 IX86_BUILTIN_VPMACSSDQH,
28601 IX86_BUILTIN_VPMACSDQL,
28602 IX86_BUILTIN_VPMACSDQH,
28603 IX86_BUILTIN_VPMADCSSWD,
28604 IX86_BUILTIN_VPMADCSWD,
28605
28606 IX86_BUILTIN_VPHADDBW,
28607 IX86_BUILTIN_VPHADDBD,
28608 IX86_BUILTIN_VPHADDBQ,
28609 IX86_BUILTIN_VPHADDWD,
28610 IX86_BUILTIN_VPHADDWQ,
28611 IX86_BUILTIN_VPHADDDQ,
28612 IX86_BUILTIN_VPHADDUBW,
28613 IX86_BUILTIN_VPHADDUBD,
28614 IX86_BUILTIN_VPHADDUBQ,
28615 IX86_BUILTIN_VPHADDUWD,
28616 IX86_BUILTIN_VPHADDUWQ,
28617 IX86_BUILTIN_VPHADDUDQ,
28618 IX86_BUILTIN_VPHSUBBW,
28619 IX86_BUILTIN_VPHSUBWD,
28620 IX86_BUILTIN_VPHSUBDQ,
28621
28622 IX86_BUILTIN_VPROTB,
28623 IX86_BUILTIN_VPROTW,
28624 IX86_BUILTIN_VPROTD,
28625 IX86_BUILTIN_VPROTQ,
28626 IX86_BUILTIN_VPROTB_IMM,
28627 IX86_BUILTIN_VPROTW_IMM,
28628 IX86_BUILTIN_VPROTD_IMM,
28629 IX86_BUILTIN_VPROTQ_IMM,
28630
28631 IX86_BUILTIN_VPSHLB,
28632 IX86_BUILTIN_VPSHLW,
28633 IX86_BUILTIN_VPSHLD,
28634 IX86_BUILTIN_VPSHLQ,
28635 IX86_BUILTIN_VPSHAB,
28636 IX86_BUILTIN_VPSHAW,
28637 IX86_BUILTIN_VPSHAD,
28638 IX86_BUILTIN_VPSHAQ,
28639
28640 IX86_BUILTIN_VFRCZSS,
28641 IX86_BUILTIN_VFRCZSD,
28642 IX86_BUILTIN_VFRCZPS,
28643 IX86_BUILTIN_VFRCZPD,
28644 IX86_BUILTIN_VFRCZPS256,
28645 IX86_BUILTIN_VFRCZPD256,
28646
28647 IX86_BUILTIN_VPCOMEQUB,
28648 IX86_BUILTIN_VPCOMNEUB,
28649 IX86_BUILTIN_VPCOMLTUB,
28650 IX86_BUILTIN_VPCOMLEUB,
28651 IX86_BUILTIN_VPCOMGTUB,
28652 IX86_BUILTIN_VPCOMGEUB,
28653 IX86_BUILTIN_VPCOMFALSEUB,
28654 IX86_BUILTIN_VPCOMTRUEUB,
28655
28656 IX86_BUILTIN_VPCOMEQUW,
28657 IX86_BUILTIN_VPCOMNEUW,
28658 IX86_BUILTIN_VPCOMLTUW,
28659 IX86_BUILTIN_VPCOMLEUW,
28660 IX86_BUILTIN_VPCOMGTUW,
28661 IX86_BUILTIN_VPCOMGEUW,
28662 IX86_BUILTIN_VPCOMFALSEUW,
28663 IX86_BUILTIN_VPCOMTRUEUW,
28664
28665 IX86_BUILTIN_VPCOMEQUD,
28666 IX86_BUILTIN_VPCOMNEUD,
28667 IX86_BUILTIN_VPCOMLTUD,
28668 IX86_BUILTIN_VPCOMLEUD,
28669 IX86_BUILTIN_VPCOMGTUD,
28670 IX86_BUILTIN_VPCOMGEUD,
28671 IX86_BUILTIN_VPCOMFALSEUD,
28672 IX86_BUILTIN_VPCOMTRUEUD,
28673
28674 IX86_BUILTIN_VPCOMEQUQ,
28675 IX86_BUILTIN_VPCOMNEUQ,
28676 IX86_BUILTIN_VPCOMLTUQ,
28677 IX86_BUILTIN_VPCOMLEUQ,
28678 IX86_BUILTIN_VPCOMGTUQ,
28679 IX86_BUILTIN_VPCOMGEUQ,
28680 IX86_BUILTIN_VPCOMFALSEUQ,
28681 IX86_BUILTIN_VPCOMTRUEUQ,
28682
28683 IX86_BUILTIN_VPCOMEQB,
28684 IX86_BUILTIN_VPCOMNEB,
28685 IX86_BUILTIN_VPCOMLTB,
28686 IX86_BUILTIN_VPCOMLEB,
28687 IX86_BUILTIN_VPCOMGTB,
28688 IX86_BUILTIN_VPCOMGEB,
28689 IX86_BUILTIN_VPCOMFALSEB,
28690 IX86_BUILTIN_VPCOMTRUEB,
28691
28692 IX86_BUILTIN_VPCOMEQW,
28693 IX86_BUILTIN_VPCOMNEW,
28694 IX86_BUILTIN_VPCOMLTW,
28695 IX86_BUILTIN_VPCOMLEW,
28696 IX86_BUILTIN_VPCOMGTW,
28697 IX86_BUILTIN_VPCOMGEW,
28698 IX86_BUILTIN_VPCOMFALSEW,
28699 IX86_BUILTIN_VPCOMTRUEW,
28700
28701 IX86_BUILTIN_VPCOMEQD,
28702 IX86_BUILTIN_VPCOMNED,
28703 IX86_BUILTIN_VPCOMLTD,
28704 IX86_BUILTIN_VPCOMLED,
28705 IX86_BUILTIN_VPCOMGTD,
28706 IX86_BUILTIN_VPCOMGED,
28707 IX86_BUILTIN_VPCOMFALSED,
28708 IX86_BUILTIN_VPCOMTRUED,
28709
28710 IX86_BUILTIN_VPCOMEQQ,
28711 IX86_BUILTIN_VPCOMNEQ,
28712 IX86_BUILTIN_VPCOMLTQ,
28713 IX86_BUILTIN_VPCOMLEQ,
28714 IX86_BUILTIN_VPCOMGTQ,
28715 IX86_BUILTIN_VPCOMGEQ,
28716 IX86_BUILTIN_VPCOMFALSEQ,
28717 IX86_BUILTIN_VPCOMTRUEQ,
28718
28719 /* LWP instructions. */
28720 IX86_BUILTIN_LLWPCB,
28721 IX86_BUILTIN_SLWPCB,
28722 IX86_BUILTIN_LWPVAL32,
28723 IX86_BUILTIN_LWPVAL64,
28724 IX86_BUILTIN_LWPINS32,
28725 IX86_BUILTIN_LWPINS64,
28726
28727 IX86_BUILTIN_CLZS,
28728
28729 /* RTM */
28730 IX86_BUILTIN_XBEGIN,
28731 IX86_BUILTIN_XEND,
28732 IX86_BUILTIN_XABORT,
28733 IX86_BUILTIN_XTEST,
28734
28735 /* BMI instructions. */
28736 IX86_BUILTIN_BEXTR32,
28737 IX86_BUILTIN_BEXTR64,
28738 IX86_BUILTIN_CTZS,
28739
28740 /* TBM instructions. */
28741 IX86_BUILTIN_BEXTRI32,
28742 IX86_BUILTIN_BEXTRI64,
28743
28744 /* BMI2 instructions. */
28745 IX86_BUILTIN_BZHI32,
28746 IX86_BUILTIN_BZHI64,
28747 IX86_BUILTIN_PDEP32,
28748 IX86_BUILTIN_PDEP64,
28749 IX86_BUILTIN_PEXT32,
28750 IX86_BUILTIN_PEXT64,
28751
28752 /* ADX instructions. */
28753 IX86_BUILTIN_ADDCARRYX32,
28754 IX86_BUILTIN_ADDCARRYX64,
28755
28756 /* SBB instructions. */
28757 IX86_BUILTIN_SBB32,
28758 IX86_BUILTIN_SBB64,
28759
28760 /* FSGSBASE instructions. */
28761 IX86_BUILTIN_RDFSBASE32,
28762 IX86_BUILTIN_RDFSBASE64,
28763 IX86_BUILTIN_RDGSBASE32,
28764 IX86_BUILTIN_RDGSBASE64,
28765 IX86_BUILTIN_WRFSBASE32,
28766 IX86_BUILTIN_WRFSBASE64,
28767 IX86_BUILTIN_WRGSBASE32,
28768 IX86_BUILTIN_WRGSBASE64,
28769
28770 /* RDRND instructions. */
28771 IX86_BUILTIN_RDRAND16_STEP,
28772 IX86_BUILTIN_RDRAND32_STEP,
28773 IX86_BUILTIN_RDRAND64_STEP,
28774
28775 /* RDSEED instructions. */
28776 IX86_BUILTIN_RDSEED16_STEP,
28777 IX86_BUILTIN_RDSEED32_STEP,
28778 IX86_BUILTIN_RDSEED64_STEP,
28779
28780 /* F16C instructions. */
28781 IX86_BUILTIN_CVTPH2PS,
28782 IX86_BUILTIN_CVTPH2PS256,
28783 IX86_BUILTIN_CVTPS2PH,
28784 IX86_BUILTIN_CVTPS2PH256,
28785
28786 /* CFString built-in for darwin */
28787 IX86_BUILTIN_CFSTRING,
28788
28789 /* Builtins to get CPU type and supported features. */
28790 IX86_BUILTIN_CPU_INIT,
28791 IX86_BUILTIN_CPU_IS,
28792 IX86_BUILTIN_CPU_SUPPORTS,
28793
28794 /* Read/write FLAGS register built-ins. */
28795 IX86_BUILTIN_READ_FLAGS,
28796 IX86_BUILTIN_WRITE_FLAGS,
28797
28798 IX86_BUILTIN_MAX
28799 };
28800
28801 /* Table for the ix86 builtin decls. */
28802 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28803
28804 /* Table of all of the builtin functions that are possible with different ISA's
28805 but are waiting to be built until a function is declared to use that
28806 ISA. */
28807 struct builtin_isa {
28808 const char *name; /* function name */
28809 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28810 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28811 bool const_p; /* true if the declaration is constant */
28812 bool set_and_not_built_p;
28813 };
28814
28815 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28816
28817
28818 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28819 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28820 function decl in the ix86_builtins array. Returns the function decl or
28821 NULL_TREE, if the builtin was not added.
28822
28823 If the front end has a special hook for builtin functions, delay adding
28824 builtin functions that aren't in the current ISA until the ISA is changed
28825 with function specific optimization. Doing so, can save about 300K for the
28826 default compiler. When the builtin is expanded, check at that time whether
28827 it is valid.
28828
28829 If the front end doesn't have a special hook, record all builtins, even if
28830 it isn't an instruction set in the current ISA in case the user uses
28831 function specific options for a different ISA, so that we don't get scope
28832 errors if a builtin is added in the middle of a function scope. */
28833
28834 static inline tree
28835 def_builtin (HOST_WIDE_INT mask, const char *name,
28836 enum ix86_builtin_func_type tcode,
28837 enum ix86_builtins code)
28838 {
28839 tree decl = NULL_TREE;
28840
28841 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28842 {
28843 ix86_builtins_isa[(int) code].isa = mask;
28844
28845 mask &= ~OPTION_MASK_ISA_64BIT;
28846 if (mask == 0
28847 || (mask & ix86_isa_flags) != 0
28848 || (lang_hooks.builtin_function
28849 == lang_hooks.builtin_function_ext_scope))
28850
28851 {
28852 tree type = ix86_get_builtin_func_type (tcode);
28853 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28854 NULL, NULL_TREE);
28855 ix86_builtins[(int) code] = decl;
28856 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28857 }
28858 else
28859 {
28860 ix86_builtins[(int) code] = NULL_TREE;
28861 ix86_builtins_isa[(int) code].tcode = tcode;
28862 ix86_builtins_isa[(int) code].name = name;
28863 ix86_builtins_isa[(int) code].const_p = false;
28864 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28865 }
28866 }
28867
28868 return decl;
28869 }
28870
28871 /* Like def_builtin, but also marks the function decl "const". */
28872
28873 static inline tree
28874 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28875 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28876 {
28877 tree decl = def_builtin (mask, name, tcode, code);
28878 if (decl)
28879 TREE_READONLY (decl) = 1;
28880 else
28881 ix86_builtins_isa[(int) code].const_p = true;
28882
28883 return decl;
28884 }
28885
28886 /* Add any new builtin functions for a given ISA that may not have been
28887 declared. This saves a bit of space compared to adding all of the
28888 declarations to the tree, even if we didn't use them. */
28889
28890 static void
28891 ix86_add_new_builtins (HOST_WIDE_INT isa)
28892 {
28893 int i;
28894
28895 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28896 {
28897 if ((ix86_builtins_isa[i].isa & isa) != 0
28898 && ix86_builtins_isa[i].set_and_not_built_p)
28899 {
28900 tree decl, type;
28901
28902 /* Don't define the builtin again. */
28903 ix86_builtins_isa[i].set_and_not_built_p = false;
28904
28905 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28906 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28907 type, i, BUILT_IN_MD, NULL,
28908 NULL_TREE);
28909
28910 ix86_builtins[i] = decl;
28911 if (ix86_builtins_isa[i].const_p)
28912 TREE_READONLY (decl) = 1;
28913 }
28914 }
28915 }
28916
28917 /* Bits for builtin_description.flag. */
28918
28919 /* Set when we don't support the comparison natively, and should
28920 swap_comparison in order to support it. */
28921 #define BUILTIN_DESC_SWAP_OPERANDS 1
28922
28923 struct builtin_description
28924 {
28925 const HOST_WIDE_INT mask;
28926 const enum insn_code icode;
28927 const char *const name;
28928 const enum ix86_builtins code;
28929 const enum rtx_code comparison;
28930 const int flag;
28931 };
28932
28933 static const struct builtin_description bdesc_comi[] =
28934 {
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28959 };
28960
28961 static const struct builtin_description bdesc_pcmpestr[] =
28962 {
28963 /* SSE4.2 */
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28971 };
28972
28973 static const struct builtin_description bdesc_pcmpistr[] =
28974 {
28975 /* SSE4.2 */
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28983 };
28984
28985 /* Special builtins with variable number of arguments. */
28986 static const struct builtin_description bdesc_special_args[] =
28987 {
28988 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28989 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28990 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28991
28992 /* 80387 (for use internally for atomic compound assignment). */
28993 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28994 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28995 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
28996 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28997
28998 /* MMX */
28999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29000
29001 /* 3DNow! */
29002 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29003
29004 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29005 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29006 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29007 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29008 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29011 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013
29014 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29015 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29016 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29020 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29021 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29022
29023 /* SSE */
29024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29027
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29032
29033 /* SSE or 3DNow!A */
29034 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29035 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29036
29037 /* SSE2 */
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29045 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29048
29049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29051
29052 /* SSE3 */
29053 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29054
29055 /* SSE4.1 */
29056 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29057
29058 /* SSE4A */
29059 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29060 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29061
29062 /* AVX */
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29065
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29071
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29079
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29083
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29092
29093 /* AVX2 */
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29103
29104 /* AVX512F */
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29152
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29155 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29156 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29157 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29158 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29159
29160 /* FSGSBASE */
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29166 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29168 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29169
29170 /* RTM */
29171 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29172 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29173 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29174 };
29175
29176 /* Builtins with variable number of arguments. */
29177 static const struct builtin_description bdesc_args[] =
29178 {
29179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29180 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29182 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29183 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29184 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29185 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29186
29187 /* MMX */
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29194
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29206
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29211
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29218
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29225
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29229
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29231
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29238
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29245
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29250
29251 /* 3DNow! */
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29256
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29272
29273 /* 3DNow!A */
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29276 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29277 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29278 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29279 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29280
29281 /* SSE */
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29294
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29296
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29305
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29326
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29344
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29347 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29348
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29350
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29354
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29357
29358 /* SSE MMX or 3Dnow!A */
29359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29362
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29367
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29370
29371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29372
29373 /* SSE2 */
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29375
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29381
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29387
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29389
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29392 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29393 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29394
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29398
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29407
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29428
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29444
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29446
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29455
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29467
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29472
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29475
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29482
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29500
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29503
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29506
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29508
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29510 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29513
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29521
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29529
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29534
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29538
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29540
29541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29542
29543 /* SSE2 MMX */
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29545 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29546
29547 /* SSE3 */
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29550
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29554 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29557
29558 /* SSSE3 */
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29565
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29590
29591 /* SSSE3. */
29592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29594
29595 /* SSE4.1 */
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29606
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29620
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29633
29634 /* SSE4.1 */
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29639
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29644
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29647
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29650
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29655
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29658
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29661
29662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29665
29666 /* SSE4.2 */
29667 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29669 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29670 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29671 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29672
29673 /* SSE4A */
29674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29675 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29676 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29677 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29678
29679 /* AES */
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29682
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29687
29688 /* PCLMUL */
29689 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29690
29691 /* AVX */
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29718
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29723
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29758
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29762
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29768
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29770
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29773
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29778
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29781
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29784
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29789
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29792
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29795
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29800
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29807
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29823
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29826
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29828 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29829
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29831
29832 /* AVX2 */
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29979
29980 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29981
29982 /* BMI */
29983 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29984 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29985 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29986
29987 /* TBM */
29988 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29989 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29990
29991 /* F16C */
29992 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29993 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29994 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29995 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29996
29997 /* BMI2 */
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29999 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30000 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30001 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30002 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30003 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30004
30005 /* AVX512F */
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30062 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30173 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30174 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30175 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30176 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30203
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30208 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30212
30213 /* Mask arithmetic operations */
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30224
30225 /* SHA */
30226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30233 };
30234
30235 /* Builtins with rounding support. */
30236 static const struct builtin_description bdesc_round_args[] =
30237 {
30238 /* AVX512F */
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30258 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30260 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30269 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30319 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30321 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30323 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30325 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30327 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30329 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30331 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30333 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30358
30359 /* AVX512ER */
30360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30361 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30362 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30363 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30366 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30367 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30368 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30369 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30370 };
30371
30372 /* FMA4 and XOP. */
30373 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30374 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30375 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30376 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30377 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30378 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30379 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30380 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30381 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30382 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30383 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30384 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30385 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30386 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30387 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30388 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30389 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30390 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30391 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30392 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30393 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30394 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30395 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30396 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30397 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30398 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30399 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30400 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30401 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30402 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30403 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30404 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30405 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30406 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30407 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30408 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30409 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30410 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30411 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30412 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30413 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30414 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30415 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30416 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30417 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30418 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30419 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30420 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30421 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30422 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30423 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30424 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30425
30426 static const struct builtin_description bdesc_multi_arg[] =
30427 {
30428 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30429 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30430 UNKNOWN, (int)MULTI_ARG_3_SF },
30431 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30432 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30433 UNKNOWN, (int)MULTI_ARG_3_DF },
30434
30435 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30436 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30437 UNKNOWN, (int)MULTI_ARG_3_SF },
30438 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30439 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30440 UNKNOWN, (int)MULTI_ARG_3_DF },
30441
30442 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30443 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30444 UNKNOWN, (int)MULTI_ARG_3_SF },
30445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30446 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30447 UNKNOWN, (int)MULTI_ARG_3_DF },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30449 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30450 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30452 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30453 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30454
30455 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30456 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30457 UNKNOWN, (int)MULTI_ARG_3_SF },
30458 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30459 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30460 UNKNOWN, (int)MULTI_ARG_3_DF },
30461 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30462 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30463 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30464 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30465 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30466 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30467
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30475
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30483
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30485
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30498
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30515
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30522
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30538
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30546
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30554
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30562
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30570
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30578
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30586
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30594
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30602
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30611
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30620
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30625
30626 };
30627 \f
30628 /* TM vector builtins. */
30629
30630 /* Reuse the existing x86-specific `struct builtin_description' cause
30631 we're lazy. Add casts to make them fit. */
30632 static const struct builtin_description bdesc_tm[] =
30633 {
30634 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30635 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30636 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30637 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30638 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30639 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30640 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30641
30642 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30643 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30644 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30647 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30649
30650 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30651 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30652 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30653 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30654 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30655 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30657
30658 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30660 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30661 };
30662
30663 /* TM callbacks. */
30664
30665 /* Return the builtin decl needed to load a vector of TYPE. */
30666
30667 static tree
30668 ix86_builtin_tm_load (tree type)
30669 {
30670 if (TREE_CODE (type) == VECTOR_TYPE)
30671 {
30672 switch (tree_to_uhwi (TYPE_SIZE (type)))
30673 {
30674 case 64:
30675 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30676 case 128:
30677 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30678 case 256:
30679 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30680 }
30681 }
30682 return NULL_TREE;
30683 }
30684
30685 /* Return the builtin decl needed to store a vector of TYPE. */
30686
30687 static tree
30688 ix86_builtin_tm_store (tree type)
30689 {
30690 if (TREE_CODE (type) == VECTOR_TYPE)
30691 {
30692 switch (tree_to_uhwi (TYPE_SIZE (type)))
30693 {
30694 case 64:
30695 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30696 case 128:
30697 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30698 case 256:
30699 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30700 }
30701 }
30702 return NULL_TREE;
30703 }
30704 \f
30705 /* Initialize the transactional memory vector load/store builtins. */
30706
30707 static void
30708 ix86_init_tm_builtins (void)
30709 {
30710 enum ix86_builtin_func_type ftype;
30711 const struct builtin_description *d;
30712 size_t i;
30713 tree decl;
30714 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30715 tree attrs_log, attrs_type_log;
30716
30717 if (!flag_tm)
30718 return;
30719
30720 /* If there are no builtins defined, we must be compiling in a
30721 language without trans-mem support. */
30722 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30723 return;
30724
30725 /* Use whatever attributes a normal TM load has. */
30726 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30727 attrs_load = DECL_ATTRIBUTES (decl);
30728 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30729 /* Use whatever attributes a normal TM store has. */
30730 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30731 attrs_store = DECL_ATTRIBUTES (decl);
30732 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30733 /* Use whatever attributes a normal TM log has. */
30734 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30735 attrs_log = DECL_ATTRIBUTES (decl);
30736 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30737
30738 for (i = 0, d = bdesc_tm;
30739 i < ARRAY_SIZE (bdesc_tm);
30740 i++, d++)
30741 {
30742 if ((d->mask & ix86_isa_flags) != 0
30743 || (lang_hooks.builtin_function
30744 == lang_hooks.builtin_function_ext_scope))
30745 {
30746 tree type, attrs, attrs_type;
30747 enum built_in_function code = (enum built_in_function) d->code;
30748
30749 ftype = (enum ix86_builtin_func_type) d->flag;
30750 type = ix86_get_builtin_func_type (ftype);
30751
30752 if (BUILTIN_TM_LOAD_P (code))
30753 {
30754 attrs = attrs_load;
30755 attrs_type = attrs_type_load;
30756 }
30757 else if (BUILTIN_TM_STORE_P (code))
30758 {
30759 attrs = attrs_store;
30760 attrs_type = attrs_type_store;
30761 }
30762 else
30763 {
30764 attrs = attrs_log;
30765 attrs_type = attrs_type_log;
30766 }
30767 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30768 /* The builtin without the prefix for
30769 calling it directly. */
30770 d->name + strlen ("__builtin_"),
30771 attrs);
30772 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30773 set the TYPE_ATTRIBUTES. */
30774 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30775
30776 set_builtin_decl (code, decl, false);
30777 }
30778 }
30779 }
30780
30781 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30782 in the current target ISA to allow the user to compile particular modules
30783 with different target specific options that differ from the command line
30784 options. */
30785 static void
30786 ix86_init_mmx_sse_builtins (void)
30787 {
30788 const struct builtin_description * d;
30789 enum ix86_builtin_func_type ftype;
30790 size_t i;
30791
30792 /* Add all special builtins with variable number of operands. */
30793 for (i = 0, d = bdesc_special_args;
30794 i < ARRAY_SIZE (bdesc_special_args);
30795 i++, d++)
30796 {
30797 if (d->name == 0)
30798 continue;
30799
30800 ftype = (enum ix86_builtin_func_type) d->flag;
30801 def_builtin (d->mask, d->name, ftype, d->code);
30802 }
30803
30804 /* Add all builtins with variable number of operands. */
30805 for (i = 0, d = bdesc_args;
30806 i < ARRAY_SIZE (bdesc_args);
30807 i++, d++)
30808 {
30809 if (d->name == 0)
30810 continue;
30811
30812 ftype = (enum ix86_builtin_func_type) d->flag;
30813 def_builtin_const (d->mask, d->name, ftype, d->code);
30814 }
30815
30816 /* Add all builtins with rounding. */
30817 for (i = 0, d = bdesc_round_args;
30818 i < ARRAY_SIZE (bdesc_round_args);
30819 i++, d++)
30820 {
30821 if (d->name == 0)
30822 continue;
30823
30824 ftype = (enum ix86_builtin_func_type) d->flag;
30825 def_builtin_const (d->mask, d->name, ftype, d->code);
30826 }
30827
30828 /* pcmpestr[im] insns. */
30829 for (i = 0, d = bdesc_pcmpestr;
30830 i < ARRAY_SIZE (bdesc_pcmpestr);
30831 i++, d++)
30832 {
30833 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30834 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30835 else
30836 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30837 def_builtin_const (d->mask, d->name, ftype, d->code);
30838 }
30839
30840 /* pcmpistr[im] insns. */
30841 for (i = 0, d = bdesc_pcmpistr;
30842 i < ARRAY_SIZE (bdesc_pcmpistr);
30843 i++, d++)
30844 {
30845 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30846 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30847 else
30848 ftype = INT_FTYPE_V16QI_V16QI_INT;
30849 def_builtin_const (d->mask, d->name, ftype, d->code);
30850 }
30851
30852 /* comi/ucomi insns. */
30853 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30854 {
30855 if (d->mask == OPTION_MASK_ISA_SSE2)
30856 ftype = INT_FTYPE_V2DF_V2DF;
30857 else
30858 ftype = INT_FTYPE_V4SF_V4SF;
30859 def_builtin_const (d->mask, d->name, ftype, d->code);
30860 }
30861
30862 /* SSE */
30863 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30864 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30865 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30866 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30867
30868 /* SSE or 3DNow!A */
30869 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30870 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30871 IX86_BUILTIN_MASKMOVQ);
30872
30873 /* SSE2 */
30874 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30875 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30876
30877 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30878 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30879 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30880 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30881
30882 /* SSE3. */
30883 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30884 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30885 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30886 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30887
30888 /* AES */
30889 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30890 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30891 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30892 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30893 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30894 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30895 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30896 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30897 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30898 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30899 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30900 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30901
30902 /* PCLMUL */
30903 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30904 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30905
30906 /* RDRND */
30907 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30908 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30909 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30910 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30911 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30912 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30913 IX86_BUILTIN_RDRAND64_STEP);
30914
30915 /* AVX2 */
30916 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30917 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30918 IX86_BUILTIN_GATHERSIV2DF);
30919
30920 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30921 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30922 IX86_BUILTIN_GATHERSIV4DF);
30923
30924 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30925 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30926 IX86_BUILTIN_GATHERDIV2DF);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30929 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30930 IX86_BUILTIN_GATHERDIV4DF);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30933 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30934 IX86_BUILTIN_GATHERSIV4SF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30937 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30938 IX86_BUILTIN_GATHERSIV8SF);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30941 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30942 IX86_BUILTIN_GATHERDIV4SF);
30943
30944 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30945 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30946 IX86_BUILTIN_GATHERDIV8SF);
30947
30948 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30949 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30950 IX86_BUILTIN_GATHERSIV2DI);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30953 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30954 IX86_BUILTIN_GATHERSIV4DI);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30957 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30958 IX86_BUILTIN_GATHERDIV2DI);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30961 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30962 IX86_BUILTIN_GATHERDIV4DI);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30965 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30966 IX86_BUILTIN_GATHERSIV4SI);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30969 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30970 IX86_BUILTIN_GATHERSIV8SI);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30973 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30974 IX86_BUILTIN_GATHERDIV4SI);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30977 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30978 IX86_BUILTIN_GATHERDIV8SI);
30979
30980 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30981 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30982 IX86_BUILTIN_GATHERALTSIV4DF);
30983
30984 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30985 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30986 IX86_BUILTIN_GATHERALTDIV8SF);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30989 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30990 IX86_BUILTIN_GATHERALTSIV4DI);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30993 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30994 IX86_BUILTIN_GATHERALTDIV8SI);
30995
30996 /* AVX512F */
30997 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30998 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30999 IX86_BUILTIN_GATHER3SIV16SF);
31000
31001 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31002 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31003 IX86_BUILTIN_GATHER3SIV8DF);
31004
31005 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31006 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31007 IX86_BUILTIN_GATHER3DIV16SF);
31008
31009 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31010 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31011 IX86_BUILTIN_GATHER3DIV8DF);
31012
31013 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31014 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31015 IX86_BUILTIN_GATHER3SIV16SI);
31016
31017 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31018 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31019 IX86_BUILTIN_GATHER3SIV8DI);
31020
31021 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31022 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31023 IX86_BUILTIN_GATHER3DIV16SI);
31024
31025 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31026 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31027 IX86_BUILTIN_GATHER3DIV8DI);
31028
31029 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31030 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31031 IX86_BUILTIN_GATHER3ALTSIV8DF);
31032
31033 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31034 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31035 IX86_BUILTIN_GATHER3ALTDIV16SF);
31036
31037 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31038 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31039 IX86_BUILTIN_GATHER3ALTSIV8DI);
31040
31041 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31042 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31043 IX86_BUILTIN_GATHER3ALTDIV16SI);
31044
31045 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31046 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31047 IX86_BUILTIN_SCATTERSIV16SF);
31048
31049 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31050 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31051 IX86_BUILTIN_SCATTERSIV8DF);
31052
31053 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31054 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31055 IX86_BUILTIN_SCATTERDIV16SF);
31056
31057 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31058 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31059 IX86_BUILTIN_SCATTERDIV8DF);
31060
31061 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31062 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31063 IX86_BUILTIN_SCATTERSIV16SI);
31064
31065 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31066 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31067 IX86_BUILTIN_SCATTERSIV8DI);
31068
31069 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31070 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31071 IX86_BUILTIN_SCATTERDIV16SI);
31072
31073 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31074 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31075 IX86_BUILTIN_SCATTERDIV8DI);
31076
31077 /* AVX512PF */
31078 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31079 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31080 IX86_BUILTIN_GATHERPFDPD);
31081 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31082 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31083 IX86_BUILTIN_GATHERPFDPS);
31084 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31085 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31086 IX86_BUILTIN_GATHERPFQPD);
31087 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31088 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31089 IX86_BUILTIN_GATHERPFQPS);
31090 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31091 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31092 IX86_BUILTIN_SCATTERPFDPD);
31093 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31094 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31095 IX86_BUILTIN_SCATTERPFDPS);
31096 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31097 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31098 IX86_BUILTIN_SCATTERPFQPD);
31099 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31100 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31101 IX86_BUILTIN_SCATTERPFQPS);
31102
31103 /* SHA */
31104 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31105 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31106 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31107 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31108 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31109 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31110 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31111 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31112 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31113 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31114 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31115 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31116 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31117 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31118
31119 /* RTM. */
31120 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31121 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31122
31123 /* MMX access to the vec_init patterns. */
31124 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31125 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31126
31127 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31128 V4HI_FTYPE_HI_HI_HI_HI,
31129 IX86_BUILTIN_VEC_INIT_V4HI);
31130
31131 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31132 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31133 IX86_BUILTIN_VEC_INIT_V8QI);
31134
31135 /* Access to the vec_extract patterns. */
31136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31137 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31139 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31140 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31141 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31142 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31143 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31144 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31145 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31146
31147 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31148 "__builtin_ia32_vec_ext_v4hi",
31149 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31150
31151 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31152 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31153
31154 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31155 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31156
31157 /* Access to the vec_set patterns. */
31158 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31159 "__builtin_ia32_vec_set_v2di",
31160 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31161
31162 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31163 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31164
31165 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31166 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31167
31168 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31169 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31170
31171 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31172 "__builtin_ia32_vec_set_v4hi",
31173 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31174
31175 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31176 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31177
31178 /* RDSEED */
31179 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31180 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31181 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31182 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31183 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31184 "__builtin_ia32_rdseed_di_step",
31185 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31186
31187 /* ADCX */
31188 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31189 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31190 def_builtin (OPTION_MASK_ISA_64BIT,
31191 "__builtin_ia32_addcarryx_u64",
31192 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31193 IX86_BUILTIN_ADDCARRYX64);
31194
31195 /* SBB */
31196 def_builtin (0, "__builtin_ia32_sbb_u32",
31197 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31198 def_builtin (OPTION_MASK_ISA_64BIT,
31199 "__builtin_ia32_sbb_u64",
31200 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31201 IX86_BUILTIN_SBB64);
31202
31203 /* Read/write FLAGS. */
31204 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31205 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31206 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31207 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31208 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31209 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31210 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31211 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31212
31213 /* CLFLUSHOPT. */
31214 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31215 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31216
31217 /* Add FMA4 multi-arg argument instructions */
31218 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31219 {
31220 if (d->name == 0)
31221 continue;
31222
31223 ftype = (enum ix86_builtin_func_type) d->flag;
31224 def_builtin_const (d->mask, d->name, ftype, d->code);
31225 }
31226 }
31227
31228 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31229 to return a pointer to VERSION_DECL if the outcome of the expression
31230 formed by PREDICATE_CHAIN is true. This function will be called during
31231 version dispatch to decide which function version to execute. It returns
31232 the basic block at the end, to which more conditions can be added. */
31233
31234 static basic_block
31235 add_condition_to_bb (tree function_decl, tree version_decl,
31236 tree predicate_chain, basic_block new_bb)
31237 {
31238 gimple return_stmt;
31239 tree convert_expr, result_var;
31240 gimple convert_stmt;
31241 gimple call_cond_stmt;
31242 gimple if_else_stmt;
31243
31244 basic_block bb1, bb2, bb3;
31245 edge e12, e23;
31246
31247 tree cond_var, and_expr_var = NULL_TREE;
31248 gimple_seq gseq;
31249
31250 tree predicate_decl, predicate_arg;
31251
31252 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31253
31254 gcc_assert (new_bb != NULL);
31255 gseq = bb_seq (new_bb);
31256
31257
31258 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31259 build_fold_addr_expr (version_decl));
31260 result_var = create_tmp_var (ptr_type_node, NULL);
31261 convert_stmt = gimple_build_assign (result_var, convert_expr);
31262 return_stmt = gimple_build_return (result_var);
31263
31264 if (predicate_chain == NULL_TREE)
31265 {
31266 gimple_seq_add_stmt (&gseq, convert_stmt);
31267 gimple_seq_add_stmt (&gseq, return_stmt);
31268 set_bb_seq (new_bb, gseq);
31269 gimple_set_bb (convert_stmt, new_bb);
31270 gimple_set_bb (return_stmt, new_bb);
31271 pop_cfun ();
31272 return new_bb;
31273 }
31274
31275 while (predicate_chain != NULL)
31276 {
31277 cond_var = create_tmp_var (integer_type_node, NULL);
31278 predicate_decl = TREE_PURPOSE (predicate_chain);
31279 predicate_arg = TREE_VALUE (predicate_chain);
31280 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31281 gimple_call_set_lhs (call_cond_stmt, cond_var);
31282
31283 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31284 gimple_set_bb (call_cond_stmt, new_bb);
31285 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31286
31287 predicate_chain = TREE_CHAIN (predicate_chain);
31288
31289 if (and_expr_var == NULL)
31290 and_expr_var = cond_var;
31291 else
31292 {
31293 gimple assign_stmt;
31294 /* Use MIN_EXPR to check if any integer is zero?.
31295 and_expr_var = min_expr <cond_var, and_expr_var> */
31296 assign_stmt = gimple_build_assign (and_expr_var,
31297 build2 (MIN_EXPR, integer_type_node,
31298 cond_var, and_expr_var));
31299
31300 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31301 gimple_set_bb (assign_stmt, new_bb);
31302 gimple_seq_add_stmt (&gseq, assign_stmt);
31303 }
31304 }
31305
31306 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31307 integer_zero_node,
31308 NULL_TREE, NULL_TREE);
31309 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31310 gimple_set_bb (if_else_stmt, new_bb);
31311 gimple_seq_add_stmt (&gseq, if_else_stmt);
31312
31313 gimple_seq_add_stmt (&gseq, convert_stmt);
31314 gimple_seq_add_stmt (&gseq, return_stmt);
31315 set_bb_seq (new_bb, gseq);
31316
31317 bb1 = new_bb;
31318 e12 = split_block (bb1, if_else_stmt);
31319 bb2 = e12->dest;
31320 e12->flags &= ~EDGE_FALLTHRU;
31321 e12->flags |= EDGE_TRUE_VALUE;
31322
31323 e23 = split_block (bb2, return_stmt);
31324
31325 gimple_set_bb (convert_stmt, bb2);
31326 gimple_set_bb (return_stmt, bb2);
31327
31328 bb3 = e23->dest;
31329 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31330
31331 remove_edge (e23);
31332 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31333
31334 pop_cfun ();
31335
31336 return bb3;
31337 }
31338
31339 /* This parses the attribute arguments to target in DECL and determines
31340 the right builtin to use to match the platform specification.
31341 It returns the priority value for this version decl. If PREDICATE_LIST
31342 is not NULL, it stores the list of cpu features that need to be checked
31343 before dispatching this function. */
31344
31345 static unsigned int
31346 get_builtin_code_for_version (tree decl, tree *predicate_list)
31347 {
31348 tree attrs;
31349 struct cl_target_option cur_target;
31350 tree target_node;
31351 struct cl_target_option *new_target;
31352 const char *arg_str = NULL;
31353 const char *attrs_str = NULL;
31354 char *tok_str = NULL;
31355 char *token;
31356
31357 /* Priority of i386 features, greater value is higher priority. This is
31358 used to decide the order in which function dispatch must happen. For
31359 instance, a version specialized for SSE4.2 should be checked for dispatch
31360 before a version for SSE3, as SSE4.2 implies SSE3. */
31361 enum feature_priority
31362 {
31363 P_ZERO = 0,
31364 P_MMX,
31365 P_SSE,
31366 P_SSE2,
31367 P_SSE3,
31368 P_SSSE3,
31369 P_PROC_SSSE3,
31370 P_SSE4_A,
31371 P_PROC_SSE4_A,
31372 P_SSE4_1,
31373 P_SSE4_2,
31374 P_PROC_SSE4_2,
31375 P_POPCNT,
31376 P_AVX,
31377 P_PROC_AVX,
31378 P_FMA4,
31379 P_XOP,
31380 P_PROC_XOP,
31381 P_FMA,
31382 P_PROC_FMA,
31383 P_AVX2,
31384 P_PROC_AVX2
31385 };
31386
31387 enum feature_priority priority = P_ZERO;
31388
31389 /* These are the target attribute strings for which a dispatcher is
31390 available, from fold_builtin_cpu. */
31391
31392 static struct _feature_list
31393 {
31394 const char *const name;
31395 const enum feature_priority priority;
31396 }
31397 const feature_list[] =
31398 {
31399 {"mmx", P_MMX},
31400 {"sse", P_SSE},
31401 {"sse2", P_SSE2},
31402 {"sse3", P_SSE3},
31403 {"sse4a", P_SSE4_A},
31404 {"ssse3", P_SSSE3},
31405 {"sse4.1", P_SSE4_1},
31406 {"sse4.2", P_SSE4_2},
31407 {"popcnt", P_POPCNT},
31408 {"avx", P_AVX},
31409 {"fma4", P_FMA4},
31410 {"xop", P_XOP},
31411 {"fma", P_FMA},
31412 {"avx2", P_AVX2}
31413 };
31414
31415
31416 static unsigned int NUM_FEATURES
31417 = sizeof (feature_list) / sizeof (struct _feature_list);
31418
31419 unsigned int i;
31420
31421 tree predicate_chain = NULL_TREE;
31422 tree predicate_decl, predicate_arg;
31423
31424 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31425 gcc_assert (attrs != NULL);
31426
31427 attrs = TREE_VALUE (TREE_VALUE (attrs));
31428
31429 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31430 attrs_str = TREE_STRING_POINTER (attrs);
31431
31432 /* Return priority zero for default function. */
31433 if (strcmp (attrs_str, "default") == 0)
31434 return 0;
31435
31436 /* Handle arch= if specified. For priority, set it to be 1 more than
31437 the best instruction set the processor can handle. For instance, if
31438 there is a version for atom and a version for ssse3 (the highest ISA
31439 priority for atom), the atom version must be checked for dispatch
31440 before the ssse3 version. */
31441 if (strstr (attrs_str, "arch=") != NULL)
31442 {
31443 cl_target_option_save (&cur_target, &global_options);
31444 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31445 &global_options_set);
31446
31447 gcc_assert (target_node);
31448 new_target = TREE_TARGET_OPTION (target_node);
31449 gcc_assert (new_target);
31450
31451 if (new_target->arch_specified && new_target->arch > 0)
31452 {
31453 switch (new_target->arch)
31454 {
31455 case PROCESSOR_CORE2:
31456 arg_str = "core2";
31457 priority = P_PROC_SSSE3;
31458 break;
31459 case PROCESSOR_NEHALEM:
31460 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31461 arg_str = "westmere";
31462 else
31463 /* We translate "arch=corei7" and "arch=nehalem" to
31464 "corei7" so that it will be mapped to M_INTEL_COREI7
31465 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31466 arg_str = "corei7";
31467 priority = P_PROC_SSE4_2;
31468 break;
31469 case PROCESSOR_SANDYBRIDGE:
31470 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31471 arg_str = "ivybridge";
31472 else
31473 arg_str = "sandybridge";
31474 priority = P_PROC_AVX;
31475 break;
31476 case PROCESSOR_HASWELL:
31477 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31478 arg_str = "broadwell";
31479 else
31480 arg_str = "haswell";
31481 priority = P_PROC_AVX2;
31482 break;
31483 case PROCESSOR_BONNELL:
31484 arg_str = "bonnell";
31485 priority = P_PROC_SSSE3;
31486 break;
31487 case PROCESSOR_SILVERMONT:
31488 arg_str = "silvermont";
31489 priority = P_PROC_SSE4_2;
31490 break;
31491 case PROCESSOR_AMDFAM10:
31492 arg_str = "amdfam10h";
31493 priority = P_PROC_SSE4_A;
31494 break;
31495 case PROCESSOR_BTVER1:
31496 arg_str = "btver1";
31497 priority = P_PROC_SSE4_A;
31498 break;
31499 case PROCESSOR_BTVER2:
31500 arg_str = "btver2";
31501 priority = P_PROC_AVX;
31502 break;
31503 case PROCESSOR_BDVER1:
31504 arg_str = "bdver1";
31505 priority = P_PROC_XOP;
31506 break;
31507 case PROCESSOR_BDVER2:
31508 arg_str = "bdver2";
31509 priority = P_PROC_FMA;
31510 break;
31511 case PROCESSOR_BDVER3:
31512 arg_str = "bdver3";
31513 priority = P_PROC_FMA;
31514 break;
31515 case PROCESSOR_BDVER4:
31516 arg_str = "bdver4";
31517 priority = P_PROC_AVX2;
31518 break;
31519 }
31520 }
31521
31522 cl_target_option_restore (&global_options, &cur_target);
31523
31524 if (predicate_list && arg_str == NULL)
31525 {
31526 error_at (DECL_SOURCE_LOCATION (decl),
31527 "No dispatcher found for the versioning attributes");
31528 return 0;
31529 }
31530
31531 if (predicate_list)
31532 {
31533 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31534 /* For a C string literal the length includes the trailing NULL. */
31535 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31536 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31537 predicate_chain);
31538 }
31539 }
31540
31541 /* Process feature name. */
31542 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31543 strcpy (tok_str, attrs_str);
31544 token = strtok (tok_str, ",");
31545 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31546
31547 while (token != NULL)
31548 {
31549 /* Do not process "arch=" */
31550 if (strncmp (token, "arch=", 5) == 0)
31551 {
31552 token = strtok (NULL, ",");
31553 continue;
31554 }
31555 for (i = 0; i < NUM_FEATURES; ++i)
31556 {
31557 if (strcmp (token, feature_list[i].name) == 0)
31558 {
31559 if (predicate_list)
31560 {
31561 predicate_arg = build_string_literal (
31562 strlen (feature_list[i].name) + 1,
31563 feature_list[i].name);
31564 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31565 predicate_chain);
31566 }
31567 /* Find the maximum priority feature. */
31568 if (feature_list[i].priority > priority)
31569 priority = feature_list[i].priority;
31570
31571 break;
31572 }
31573 }
31574 if (predicate_list && i == NUM_FEATURES)
31575 {
31576 error_at (DECL_SOURCE_LOCATION (decl),
31577 "No dispatcher found for %s", token);
31578 return 0;
31579 }
31580 token = strtok (NULL, ",");
31581 }
31582 free (tok_str);
31583
31584 if (predicate_list && predicate_chain == NULL_TREE)
31585 {
31586 error_at (DECL_SOURCE_LOCATION (decl),
31587 "No dispatcher found for the versioning attributes : %s",
31588 attrs_str);
31589 return 0;
31590 }
31591 else if (predicate_list)
31592 {
31593 predicate_chain = nreverse (predicate_chain);
31594 *predicate_list = predicate_chain;
31595 }
31596
31597 return priority;
31598 }
31599
31600 /* This compares the priority of target features in function DECL1
31601 and DECL2. It returns positive value if DECL1 is higher priority,
31602 negative value if DECL2 is higher priority and 0 if they are the
31603 same. */
31604
31605 static int
31606 ix86_compare_version_priority (tree decl1, tree decl2)
31607 {
31608 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31609 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31610
31611 return (int)priority1 - (int)priority2;
31612 }
31613
31614 /* V1 and V2 point to function versions with different priorities
31615 based on the target ISA. This function compares their priorities. */
31616
31617 static int
31618 feature_compare (const void *v1, const void *v2)
31619 {
31620 typedef struct _function_version_info
31621 {
31622 tree version_decl;
31623 tree predicate_chain;
31624 unsigned int dispatch_priority;
31625 } function_version_info;
31626
31627 const function_version_info c1 = *(const function_version_info *)v1;
31628 const function_version_info c2 = *(const function_version_info *)v2;
31629 return (c2.dispatch_priority - c1.dispatch_priority);
31630 }
31631
31632 /* This function generates the dispatch function for
31633 multi-versioned functions. DISPATCH_DECL is the function which will
31634 contain the dispatch logic. FNDECLS are the function choices for
31635 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31636 in DISPATCH_DECL in which the dispatch code is generated. */
31637
31638 static int
31639 dispatch_function_versions (tree dispatch_decl,
31640 void *fndecls_p,
31641 basic_block *empty_bb)
31642 {
31643 tree default_decl;
31644 gimple ifunc_cpu_init_stmt;
31645 gimple_seq gseq;
31646 int ix;
31647 tree ele;
31648 vec<tree> *fndecls;
31649 unsigned int num_versions = 0;
31650 unsigned int actual_versions = 0;
31651 unsigned int i;
31652
31653 struct _function_version_info
31654 {
31655 tree version_decl;
31656 tree predicate_chain;
31657 unsigned int dispatch_priority;
31658 }*function_version_info;
31659
31660 gcc_assert (dispatch_decl != NULL
31661 && fndecls_p != NULL
31662 && empty_bb != NULL);
31663
31664 /*fndecls_p is actually a vector. */
31665 fndecls = static_cast<vec<tree> *> (fndecls_p);
31666
31667 /* At least one more version other than the default. */
31668 num_versions = fndecls->length ();
31669 gcc_assert (num_versions >= 2);
31670
31671 function_version_info = (struct _function_version_info *)
31672 XNEWVEC (struct _function_version_info, (num_versions - 1));
31673
31674 /* The first version in the vector is the default decl. */
31675 default_decl = (*fndecls)[0];
31676
31677 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31678
31679 gseq = bb_seq (*empty_bb);
31680 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31681 constructors, so explicity call __builtin_cpu_init here. */
31682 ifunc_cpu_init_stmt = gimple_build_call_vec (
31683 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31684 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31685 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31686 set_bb_seq (*empty_bb, gseq);
31687
31688 pop_cfun ();
31689
31690
31691 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31692 {
31693 tree version_decl = ele;
31694 tree predicate_chain = NULL_TREE;
31695 unsigned int priority;
31696 /* Get attribute string, parse it and find the right predicate decl.
31697 The predicate function could be a lengthy combination of many
31698 features, like arch-type and various isa-variants. */
31699 priority = get_builtin_code_for_version (version_decl,
31700 &predicate_chain);
31701
31702 if (predicate_chain == NULL_TREE)
31703 continue;
31704
31705 function_version_info [actual_versions].version_decl = version_decl;
31706 function_version_info [actual_versions].predicate_chain
31707 = predicate_chain;
31708 function_version_info [actual_versions].dispatch_priority = priority;
31709 actual_versions++;
31710 }
31711
31712 /* Sort the versions according to descending order of dispatch priority. The
31713 priority is based on the ISA. This is not a perfect solution. There
31714 could still be ambiguity. If more than one function version is suitable
31715 to execute, which one should be dispatched? In future, allow the user
31716 to specify a dispatch priority next to the version. */
31717 qsort (function_version_info, actual_versions,
31718 sizeof (struct _function_version_info), feature_compare);
31719
31720 for (i = 0; i < actual_versions; ++i)
31721 *empty_bb = add_condition_to_bb (dispatch_decl,
31722 function_version_info[i].version_decl,
31723 function_version_info[i].predicate_chain,
31724 *empty_bb);
31725
31726 /* dispatch default version at the end. */
31727 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31728 NULL, *empty_bb);
31729
31730 free (function_version_info);
31731 return 0;
31732 }
31733
31734 /* Comparator function to be used in qsort routine to sort attribute
31735 specification strings to "target". */
31736
31737 static int
31738 attr_strcmp (const void *v1, const void *v2)
31739 {
31740 const char *c1 = *(char *const*)v1;
31741 const char *c2 = *(char *const*)v2;
31742 return strcmp (c1, c2);
31743 }
31744
31745 /* ARGLIST is the argument to target attribute. This function tokenizes
31746 the comma separated arguments, sorts them and returns a string which
31747 is a unique identifier for the comma separated arguments. It also
31748 replaces non-identifier characters "=,-" with "_". */
31749
31750 static char *
31751 sorted_attr_string (tree arglist)
31752 {
31753 tree arg;
31754 size_t str_len_sum = 0;
31755 char **args = NULL;
31756 char *attr_str, *ret_str;
31757 char *attr = NULL;
31758 unsigned int argnum = 1;
31759 unsigned int i;
31760
31761 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31762 {
31763 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31764 size_t len = strlen (str);
31765 str_len_sum += len + 1;
31766 if (arg != arglist)
31767 argnum++;
31768 for (i = 0; i < strlen (str); i++)
31769 if (str[i] == ',')
31770 argnum++;
31771 }
31772
31773 attr_str = XNEWVEC (char, str_len_sum);
31774 str_len_sum = 0;
31775 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31776 {
31777 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31778 size_t len = strlen (str);
31779 memcpy (attr_str + str_len_sum, str, len);
31780 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31781 str_len_sum += len + 1;
31782 }
31783
31784 /* Replace "=,-" with "_". */
31785 for (i = 0; i < strlen (attr_str); i++)
31786 if (attr_str[i] == '=' || attr_str[i]== '-')
31787 attr_str[i] = '_';
31788
31789 if (argnum == 1)
31790 return attr_str;
31791
31792 args = XNEWVEC (char *, argnum);
31793
31794 i = 0;
31795 attr = strtok (attr_str, ",");
31796 while (attr != NULL)
31797 {
31798 args[i] = attr;
31799 i++;
31800 attr = strtok (NULL, ",");
31801 }
31802
31803 qsort (args, argnum, sizeof (char *), attr_strcmp);
31804
31805 ret_str = XNEWVEC (char, str_len_sum);
31806 str_len_sum = 0;
31807 for (i = 0; i < argnum; i++)
31808 {
31809 size_t len = strlen (args[i]);
31810 memcpy (ret_str + str_len_sum, args[i], len);
31811 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31812 str_len_sum += len + 1;
31813 }
31814
31815 XDELETEVEC (args);
31816 XDELETEVEC (attr_str);
31817 return ret_str;
31818 }
31819
31820 /* This function changes the assembler name for functions that are
31821 versions. If DECL is a function version and has a "target"
31822 attribute, it appends the attribute string to its assembler name. */
31823
31824 static tree
31825 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31826 {
31827 tree version_attr;
31828 const char *orig_name, *version_string;
31829 char *attr_str, *assembler_name;
31830
31831 if (DECL_DECLARED_INLINE_P (decl)
31832 && lookup_attribute ("gnu_inline",
31833 DECL_ATTRIBUTES (decl)))
31834 error_at (DECL_SOURCE_LOCATION (decl),
31835 "Function versions cannot be marked as gnu_inline,"
31836 " bodies have to be generated");
31837
31838 if (DECL_VIRTUAL_P (decl)
31839 || DECL_VINDEX (decl))
31840 sorry ("Virtual function multiversioning not supported");
31841
31842 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31843
31844 /* target attribute string cannot be NULL. */
31845 gcc_assert (version_attr != NULL_TREE);
31846
31847 orig_name = IDENTIFIER_POINTER (id);
31848 version_string
31849 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31850
31851 if (strcmp (version_string, "default") == 0)
31852 return id;
31853
31854 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31855 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31856
31857 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31858
31859 /* Allow assembler name to be modified if already set. */
31860 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31861 SET_DECL_RTL (decl, NULL);
31862
31863 tree ret = get_identifier (assembler_name);
31864 XDELETEVEC (attr_str);
31865 XDELETEVEC (assembler_name);
31866 return ret;
31867 }
31868
31869 /* This function returns true if FN1 and FN2 are versions of the same function,
31870 that is, the target strings of the function decls are different. This assumes
31871 that FN1 and FN2 have the same signature. */
31872
31873 static bool
31874 ix86_function_versions (tree fn1, tree fn2)
31875 {
31876 tree attr1, attr2;
31877 char *target1, *target2;
31878 bool result;
31879
31880 if (TREE_CODE (fn1) != FUNCTION_DECL
31881 || TREE_CODE (fn2) != FUNCTION_DECL)
31882 return false;
31883
31884 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31885 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31886
31887 /* At least one function decl should have the target attribute specified. */
31888 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31889 return false;
31890
31891 /* Diagnose missing target attribute if one of the decls is already
31892 multi-versioned. */
31893 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31894 {
31895 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31896 {
31897 if (attr2 != NULL_TREE)
31898 {
31899 tree tem = fn1;
31900 fn1 = fn2;
31901 fn2 = tem;
31902 attr1 = attr2;
31903 }
31904 error_at (DECL_SOURCE_LOCATION (fn2),
31905 "missing %<target%> attribute for multi-versioned %D",
31906 fn2);
31907 inform (DECL_SOURCE_LOCATION (fn1),
31908 "previous declaration of %D", fn1);
31909 /* Prevent diagnosing of the same error multiple times. */
31910 DECL_ATTRIBUTES (fn2)
31911 = tree_cons (get_identifier ("target"),
31912 copy_node (TREE_VALUE (attr1)),
31913 DECL_ATTRIBUTES (fn2));
31914 }
31915 return false;
31916 }
31917
31918 target1 = sorted_attr_string (TREE_VALUE (attr1));
31919 target2 = sorted_attr_string (TREE_VALUE (attr2));
31920
31921 /* The sorted target strings must be different for fn1 and fn2
31922 to be versions. */
31923 if (strcmp (target1, target2) == 0)
31924 result = false;
31925 else
31926 result = true;
31927
31928 XDELETEVEC (target1);
31929 XDELETEVEC (target2);
31930
31931 return result;
31932 }
31933
31934 static tree
31935 ix86_mangle_decl_assembler_name (tree decl, tree id)
31936 {
31937 /* For function version, add the target suffix to the assembler name. */
31938 if (TREE_CODE (decl) == FUNCTION_DECL
31939 && DECL_FUNCTION_VERSIONED (decl))
31940 id = ix86_mangle_function_version_assembler_name (decl, id);
31941 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31942 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31943 #endif
31944
31945 return id;
31946 }
31947
31948 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31949 is true, append the full path name of the source file. */
31950
31951 static char *
31952 make_name (tree decl, const char *suffix, bool make_unique)
31953 {
31954 char *global_var_name;
31955 int name_len;
31956 const char *name;
31957 const char *unique_name = NULL;
31958
31959 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31960
31961 /* Get a unique name that can be used globally without any chances
31962 of collision at link time. */
31963 if (make_unique)
31964 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31965
31966 name_len = strlen (name) + strlen (suffix) + 2;
31967
31968 if (make_unique)
31969 name_len += strlen (unique_name) + 1;
31970 global_var_name = XNEWVEC (char, name_len);
31971
31972 /* Use '.' to concatenate names as it is demangler friendly. */
31973 if (make_unique)
31974 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31975 suffix);
31976 else
31977 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31978
31979 return global_var_name;
31980 }
31981
31982 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31983
31984 /* Make a dispatcher declaration for the multi-versioned function DECL.
31985 Calls to DECL function will be replaced with calls to the dispatcher
31986 by the front-end. Return the decl created. */
31987
31988 static tree
31989 make_dispatcher_decl (const tree decl)
31990 {
31991 tree func_decl;
31992 char *func_name;
31993 tree fn_type, func_type;
31994 bool is_uniq = false;
31995
31996 if (TREE_PUBLIC (decl) == 0)
31997 is_uniq = true;
31998
31999 func_name = make_name (decl, "ifunc", is_uniq);
32000
32001 fn_type = TREE_TYPE (decl);
32002 func_type = build_function_type (TREE_TYPE (fn_type),
32003 TYPE_ARG_TYPES (fn_type));
32004
32005 func_decl = build_fn_decl (func_name, func_type);
32006 XDELETEVEC (func_name);
32007 TREE_USED (func_decl) = 1;
32008 DECL_CONTEXT (func_decl) = NULL_TREE;
32009 DECL_INITIAL (func_decl) = error_mark_node;
32010 DECL_ARTIFICIAL (func_decl) = 1;
32011 /* Mark this func as external, the resolver will flip it again if
32012 it gets generated. */
32013 DECL_EXTERNAL (func_decl) = 1;
32014 /* This will be of type IFUNCs have to be externally visible. */
32015 TREE_PUBLIC (func_decl) = 1;
32016
32017 return func_decl;
32018 }
32019
32020 #endif
32021
32022 /* Returns true if decl is multi-versioned and DECL is the default function,
32023 that is it is not tagged with target specific optimization. */
32024
32025 static bool
32026 is_function_default_version (const tree decl)
32027 {
32028 if (TREE_CODE (decl) != FUNCTION_DECL
32029 || !DECL_FUNCTION_VERSIONED (decl))
32030 return false;
32031 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32032 gcc_assert (attr);
32033 attr = TREE_VALUE (TREE_VALUE (attr));
32034 return (TREE_CODE (attr) == STRING_CST
32035 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32036 }
32037
32038 /* Make a dispatcher declaration for the multi-versioned function DECL.
32039 Calls to DECL function will be replaced with calls to the dispatcher
32040 by the front-end. Returns the decl of the dispatcher function. */
32041
32042 static tree
32043 ix86_get_function_versions_dispatcher (void *decl)
32044 {
32045 tree fn = (tree) decl;
32046 struct cgraph_node *node = NULL;
32047 struct cgraph_node *default_node = NULL;
32048 struct cgraph_function_version_info *node_v = NULL;
32049 struct cgraph_function_version_info *first_v = NULL;
32050
32051 tree dispatch_decl = NULL;
32052
32053 struct cgraph_function_version_info *default_version_info = NULL;
32054
32055 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32056
32057 node = cgraph_node::get (fn);
32058 gcc_assert (node != NULL);
32059
32060 node_v = node->function_version ();
32061 gcc_assert (node_v != NULL);
32062
32063 if (node_v->dispatcher_resolver != NULL)
32064 return node_v->dispatcher_resolver;
32065
32066 /* Find the default version and make it the first node. */
32067 first_v = node_v;
32068 /* Go to the beginning of the chain. */
32069 while (first_v->prev != NULL)
32070 first_v = first_v->prev;
32071 default_version_info = first_v;
32072 while (default_version_info != NULL)
32073 {
32074 if (is_function_default_version
32075 (default_version_info->this_node->decl))
32076 break;
32077 default_version_info = default_version_info->next;
32078 }
32079
32080 /* If there is no default node, just return NULL. */
32081 if (default_version_info == NULL)
32082 return NULL;
32083
32084 /* Make default info the first node. */
32085 if (first_v != default_version_info)
32086 {
32087 default_version_info->prev->next = default_version_info->next;
32088 if (default_version_info->next)
32089 default_version_info->next->prev = default_version_info->prev;
32090 first_v->prev = default_version_info;
32091 default_version_info->next = first_v;
32092 default_version_info->prev = NULL;
32093 }
32094
32095 default_node = default_version_info->this_node;
32096
32097 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32098 if (targetm.has_ifunc_p ())
32099 {
32100 struct cgraph_function_version_info *it_v = NULL;
32101 struct cgraph_node *dispatcher_node = NULL;
32102 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32103
32104 /* Right now, the dispatching is done via ifunc. */
32105 dispatch_decl = make_dispatcher_decl (default_node->decl);
32106
32107 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32108 gcc_assert (dispatcher_node != NULL);
32109 dispatcher_node->dispatcher_function = 1;
32110 dispatcher_version_info
32111 = dispatcher_node->insert_new_function_version ();
32112 dispatcher_version_info->next = default_version_info;
32113 dispatcher_node->definition = 1;
32114
32115 /* Set the dispatcher for all the versions. */
32116 it_v = default_version_info;
32117 while (it_v != NULL)
32118 {
32119 it_v->dispatcher_resolver = dispatch_decl;
32120 it_v = it_v->next;
32121 }
32122 }
32123 else
32124 #endif
32125 {
32126 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32127 "multiversioning needs ifunc which is not supported "
32128 "on this target");
32129 }
32130
32131 return dispatch_decl;
32132 }
32133
32134 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32135 it to CHAIN. */
32136
32137 static tree
32138 make_attribute (const char *name, const char *arg_name, tree chain)
32139 {
32140 tree attr_name;
32141 tree attr_arg_name;
32142 tree attr_args;
32143 tree attr;
32144
32145 attr_name = get_identifier (name);
32146 attr_arg_name = build_string (strlen (arg_name), arg_name);
32147 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32148 attr = tree_cons (attr_name, attr_args, chain);
32149 return attr;
32150 }
32151
32152 /* Make the resolver function decl to dispatch the versions of
32153 a multi-versioned function, DEFAULT_DECL. Create an
32154 empty basic block in the resolver and store the pointer in
32155 EMPTY_BB. Return the decl of the resolver function. */
32156
32157 static tree
32158 make_resolver_func (const tree default_decl,
32159 const tree dispatch_decl,
32160 basic_block *empty_bb)
32161 {
32162 char *resolver_name;
32163 tree decl, type, decl_name, t;
32164 bool is_uniq = false;
32165
32166 /* IFUNC's have to be globally visible. So, if the default_decl is
32167 not, then the name of the IFUNC should be made unique. */
32168 if (TREE_PUBLIC (default_decl) == 0)
32169 is_uniq = true;
32170
32171 /* Append the filename to the resolver function if the versions are
32172 not externally visible. This is because the resolver function has
32173 to be externally visible for the loader to find it. So, appending
32174 the filename will prevent conflicts with a resolver function from
32175 another module which is based on the same version name. */
32176 resolver_name = make_name (default_decl, "resolver", is_uniq);
32177
32178 /* The resolver function should return a (void *). */
32179 type = build_function_type_list (ptr_type_node, NULL_TREE);
32180
32181 decl = build_fn_decl (resolver_name, type);
32182 decl_name = get_identifier (resolver_name);
32183 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32184
32185 DECL_NAME (decl) = decl_name;
32186 TREE_USED (decl) = 1;
32187 DECL_ARTIFICIAL (decl) = 1;
32188 DECL_IGNORED_P (decl) = 0;
32189 /* IFUNC resolvers have to be externally visible. */
32190 TREE_PUBLIC (decl) = 1;
32191 DECL_UNINLINABLE (decl) = 1;
32192
32193 /* Resolver is not external, body is generated. */
32194 DECL_EXTERNAL (decl) = 0;
32195 DECL_EXTERNAL (dispatch_decl) = 0;
32196
32197 DECL_CONTEXT (decl) = NULL_TREE;
32198 DECL_INITIAL (decl) = make_node (BLOCK);
32199 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32200
32201 if (DECL_COMDAT_GROUP (default_decl)
32202 || TREE_PUBLIC (default_decl))
32203 {
32204 /* In this case, each translation unit with a call to this
32205 versioned function will put out a resolver. Ensure it
32206 is comdat to keep just one copy. */
32207 DECL_COMDAT (decl) = 1;
32208 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32209 }
32210 /* Build result decl and add to function_decl. */
32211 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32212 DECL_ARTIFICIAL (t) = 1;
32213 DECL_IGNORED_P (t) = 1;
32214 DECL_RESULT (decl) = t;
32215
32216 gimplify_function_tree (decl);
32217 push_cfun (DECL_STRUCT_FUNCTION (decl));
32218 *empty_bb = init_lowered_empty_function (decl, false);
32219
32220 cgraph_node::add_new_function (decl, true);
32221 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32222
32223 pop_cfun ();
32224
32225 gcc_assert (dispatch_decl != NULL);
32226 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32227 DECL_ATTRIBUTES (dispatch_decl)
32228 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32229
32230 /* Create the alias for dispatch to resolver here. */
32231 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32232 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32233 XDELETEVEC (resolver_name);
32234 return decl;
32235 }
32236
32237 /* Generate the dispatching code body to dispatch multi-versioned function
32238 DECL. The target hook is called to process the "target" attributes and
32239 provide the code to dispatch the right function at run-time. NODE points
32240 to the dispatcher decl whose body will be created. */
32241
32242 static tree
32243 ix86_generate_version_dispatcher_body (void *node_p)
32244 {
32245 tree resolver_decl;
32246 basic_block empty_bb;
32247 tree default_ver_decl;
32248 struct cgraph_node *versn;
32249 struct cgraph_node *node;
32250
32251 struct cgraph_function_version_info *node_version_info = NULL;
32252 struct cgraph_function_version_info *versn_info = NULL;
32253
32254 node = (cgraph_node *)node_p;
32255
32256 node_version_info = node->function_version ();
32257 gcc_assert (node->dispatcher_function
32258 && node_version_info != NULL);
32259
32260 if (node_version_info->dispatcher_resolver)
32261 return node_version_info->dispatcher_resolver;
32262
32263 /* The first version in the chain corresponds to the default version. */
32264 default_ver_decl = node_version_info->next->this_node->decl;
32265
32266 /* node is going to be an alias, so remove the finalized bit. */
32267 node->definition = false;
32268
32269 resolver_decl = make_resolver_func (default_ver_decl,
32270 node->decl, &empty_bb);
32271
32272 node_version_info->dispatcher_resolver = resolver_decl;
32273
32274 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32275
32276 auto_vec<tree, 2> fn_ver_vec;
32277
32278 for (versn_info = node_version_info->next; versn_info;
32279 versn_info = versn_info->next)
32280 {
32281 versn = versn_info->this_node;
32282 /* Check for virtual functions here again, as by this time it should
32283 have been determined if this function needs a vtable index or
32284 not. This happens for methods in derived classes that override
32285 virtual methods in base classes but are not explicitly marked as
32286 virtual. */
32287 if (DECL_VINDEX (versn->decl))
32288 sorry ("Virtual function multiversioning not supported");
32289
32290 fn_ver_vec.safe_push (versn->decl);
32291 }
32292
32293 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32294 cgraph_edge::rebuild_edges ();
32295 pop_cfun ();
32296 return resolver_decl;
32297 }
32298 /* This builds the processor_model struct type defined in
32299 libgcc/config/i386/cpuinfo.c */
32300
32301 static tree
32302 build_processor_model_struct (void)
32303 {
32304 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32305 "__cpu_features"};
32306 tree field = NULL_TREE, field_chain = NULL_TREE;
32307 int i;
32308 tree type = make_node (RECORD_TYPE);
32309
32310 /* The first 3 fields are unsigned int. */
32311 for (i = 0; i < 3; ++i)
32312 {
32313 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32314 get_identifier (field_name[i]), unsigned_type_node);
32315 if (field_chain != NULL_TREE)
32316 DECL_CHAIN (field) = field_chain;
32317 field_chain = field;
32318 }
32319
32320 /* The last field is an array of unsigned integers of size one. */
32321 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32322 get_identifier (field_name[3]),
32323 build_array_type (unsigned_type_node,
32324 build_index_type (size_one_node)));
32325 if (field_chain != NULL_TREE)
32326 DECL_CHAIN (field) = field_chain;
32327 field_chain = field;
32328
32329 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32330 return type;
32331 }
32332
32333 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32334
32335 static tree
32336 make_var_decl (tree type, const char *name)
32337 {
32338 tree new_decl;
32339
32340 new_decl = build_decl (UNKNOWN_LOCATION,
32341 VAR_DECL,
32342 get_identifier(name),
32343 type);
32344
32345 DECL_EXTERNAL (new_decl) = 1;
32346 TREE_STATIC (new_decl) = 1;
32347 TREE_PUBLIC (new_decl) = 1;
32348 DECL_INITIAL (new_decl) = 0;
32349 DECL_ARTIFICIAL (new_decl) = 0;
32350 DECL_PRESERVE_P (new_decl) = 1;
32351
32352 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32353 assemble_variable (new_decl, 0, 0, 0);
32354
32355 return new_decl;
32356 }
32357
32358 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32359 into an integer defined in libgcc/config/i386/cpuinfo.c */
32360
32361 static tree
32362 fold_builtin_cpu (tree fndecl, tree *args)
32363 {
32364 unsigned int i;
32365 enum ix86_builtins fn_code = (enum ix86_builtins)
32366 DECL_FUNCTION_CODE (fndecl);
32367 tree param_string_cst = NULL;
32368
32369 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32370 enum processor_features
32371 {
32372 F_CMOV = 0,
32373 F_MMX,
32374 F_POPCNT,
32375 F_SSE,
32376 F_SSE2,
32377 F_SSE3,
32378 F_SSSE3,
32379 F_SSE4_1,
32380 F_SSE4_2,
32381 F_AVX,
32382 F_AVX2,
32383 F_SSE4_A,
32384 F_FMA4,
32385 F_XOP,
32386 F_FMA,
32387 F_MAX
32388 };
32389
32390 /* These are the values for vendor types and cpu types and subtypes
32391 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32392 the corresponding start value. */
32393 enum processor_model
32394 {
32395 M_INTEL = 1,
32396 M_AMD,
32397 M_CPU_TYPE_START,
32398 M_INTEL_BONNELL,
32399 M_INTEL_CORE2,
32400 M_INTEL_COREI7,
32401 M_AMDFAM10H,
32402 M_AMDFAM15H,
32403 M_INTEL_SILVERMONT,
32404 M_AMD_BTVER1,
32405 M_AMD_BTVER2,
32406 M_CPU_SUBTYPE_START,
32407 M_INTEL_COREI7_NEHALEM,
32408 M_INTEL_COREI7_WESTMERE,
32409 M_INTEL_COREI7_SANDYBRIDGE,
32410 M_AMDFAM10H_BARCELONA,
32411 M_AMDFAM10H_SHANGHAI,
32412 M_AMDFAM10H_ISTANBUL,
32413 M_AMDFAM15H_BDVER1,
32414 M_AMDFAM15H_BDVER2,
32415 M_AMDFAM15H_BDVER3,
32416 M_AMDFAM15H_BDVER4,
32417 M_INTEL_COREI7_IVYBRIDGE,
32418 M_INTEL_COREI7_HASWELL
32419 };
32420
32421 static struct _arch_names_table
32422 {
32423 const char *const name;
32424 const enum processor_model model;
32425 }
32426 const arch_names_table[] =
32427 {
32428 {"amd", M_AMD},
32429 {"intel", M_INTEL},
32430 {"atom", M_INTEL_BONNELL},
32431 {"slm", M_INTEL_SILVERMONT},
32432 {"core2", M_INTEL_CORE2},
32433 {"corei7", M_INTEL_COREI7},
32434 {"nehalem", M_INTEL_COREI7_NEHALEM},
32435 {"westmere", M_INTEL_COREI7_WESTMERE},
32436 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32437 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32438 {"haswell", M_INTEL_COREI7_HASWELL},
32439 {"bonnell", M_INTEL_BONNELL},
32440 {"silvermont", M_INTEL_SILVERMONT},
32441 {"amdfam10h", M_AMDFAM10H},
32442 {"barcelona", M_AMDFAM10H_BARCELONA},
32443 {"shanghai", M_AMDFAM10H_SHANGHAI},
32444 {"istanbul", M_AMDFAM10H_ISTANBUL},
32445 {"btver1", M_AMD_BTVER1},
32446 {"amdfam15h", M_AMDFAM15H},
32447 {"bdver1", M_AMDFAM15H_BDVER1},
32448 {"bdver2", M_AMDFAM15H_BDVER2},
32449 {"bdver3", M_AMDFAM15H_BDVER3},
32450 {"bdver4", M_AMDFAM15H_BDVER4},
32451 {"btver2", M_AMD_BTVER2},
32452 };
32453
32454 static struct _isa_names_table
32455 {
32456 const char *const name;
32457 const enum processor_features feature;
32458 }
32459 const isa_names_table[] =
32460 {
32461 {"cmov", F_CMOV},
32462 {"mmx", F_MMX},
32463 {"popcnt", F_POPCNT},
32464 {"sse", F_SSE},
32465 {"sse2", F_SSE2},
32466 {"sse3", F_SSE3},
32467 {"ssse3", F_SSSE3},
32468 {"sse4a", F_SSE4_A},
32469 {"sse4.1", F_SSE4_1},
32470 {"sse4.2", F_SSE4_2},
32471 {"avx", F_AVX},
32472 {"fma4", F_FMA4},
32473 {"xop", F_XOP},
32474 {"fma", F_FMA},
32475 {"avx2", F_AVX2}
32476 };
32477
32478 tree __processor_model_type = build_processor_model_struct ();
32479 tree __cpu_model_var = make_var_decl (__processor_model_type,
32480 "__cpu_model");
32481
32482
32483 varpool_node::add (__cpu_model_var);
32484
32485 gcc_assert ((args != NULL) && (*args != NULL));
32486
32487 param_string_cst = *args;
32488 while (param_string_cst
32489 && TREE_CODE (param_string_cst) != STRING_CST)
32490 {
32491 /* *args must be a expr that can contain other EXPRS leading to a
32492 STRING_CST. */
32493 if (!EXPR_P (param_string_cst))
32494 {
32495 error ("Parameter to builtin must be a string constant or literal");
32496 return integer_zero_node;
32497 }
32498 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32499 }
32500
32501 gcc_assert (param_string_cst);
32502
32503 if (fn_code == IX86_BUILTIN_CPU_IS)
32504 {
32505 tree ref;
32506 tree field;
32507 tree final;
32508
32509 unsigned int field_val = 0;
32510 unsigned int NUM_ARCH_NAMES
32511 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32512
32513 for (i = 0; i < NUM_ARCH_NAMES; i++)
32514 if (strcmp (arch_names_table[i].name,
32515 TREE_STRING_POINTER (param_string_cst)) == 0)
32516 break;
32517
32518 if (i == NUM_ARCH_NAMES)
32519 {
32520 error ("Parameter to builtin not valid: %s",
32521 TREE_STRING_POINTER (param_string_cst));
32522 return integer_zero_node;
32523 }
32524
32525 field = TYPE_FIELDS (__processor_model_type);
32526 field_val = arch_names_table[i].model;
32527
32528 /* CPU types are stored in the next field. */
32529 if (field_val > M_CPU_TYPE_START
32530 && field_val < M_CPU_SUBTYPE_START)
32531 {
32532 field = DECL_CHAIN (field);
32533 field_val -= M_CPU_TYPE_START;
32534 }
32535
32536 /* CPU subtypes are stored in the next field. */
32537 if (field_val > M_CPU_SUBTYPE_START)
32538 {
32539 field = DECL_CHAIN ( DECL_CHAIN (field));
32540 field_val -= M_CPU_SUBTYPE_START;
32541 }
32542
32543 /* Get the appropriate field in __cpu_model. */
32544 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32545 field, NULL_TREE);
32546
32547 /* Check the value. */
32548 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32549 build_int_cstu (unsigned_type_node, field_val));
32550 return build1 (CONVERT_EXPR, integer_type_node, final);
32551 }
32552 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32553 {
32554 tree ref;
32555 tree array_elt;
32556 tree field;
32557 tree final;
32558
32559 unsigned int field_val = 0;
32560 unsigned int NUM_ISA_NAMES
32561 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32562
32563 for (i = 0; i < NUM_ISA_NAMES; i++)
32564 if (strcmp (isa_names_table[i].name,
32565 TREE_STRING_POINTER (param_string_cst)) == 0)
32566 break;
32567
32568 if (i == NUM_ISA_NAMES)
32569 {
32570 error ("Parameter to builtin not valid: %s",
32571 TREE_STRING_POINTER (param_string_cst));
32572 return integer_zero_node;
32573 }
32574
32575 field = TYPE_FIELDS (__processor_model_type);
32576 /* Get the last field, which is __cpu_features. */
32577 while (DECL_CHAIN (field))
32578 field = DECL_CHAIN (field);
32579
32580 /* Get the appropriate field: __cpu_model.__cpu_features */
32581 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32582 field, NULL_TREE);
32583
32584 /* Access the 0th element of __cpu_features array. */
32585 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32586 integer_zero_node, NULL_TREE, NULL_TREE);
32587
32588 field_val = (1 << isa_names_table[i].feature);
32589 /* Return __cpu_model.__cpu_features[0] & field_val */
32590 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32591 build_int_cstu (unsigned_type_node, field_val));
32592 return build1 (CONVERT_EXPR, integer_type_node, final);
32593 }
32594 gcc_unreachable ();
32595 }
32596
32597 static tree
32598 ix86_fold_builtin (tree fndecl, int n_args,
32599 tree *args, bool ignore ATTRIBUTE_UNUSED)
32600 {
32601 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32602 {
32603 enum ix86_builtins fn_code = (enum ix86_builtins)
32604 DECL_FUNCTION_CODE (fndecl);
32605 if (fn_code == IX86_BUILTIN_CPU_IS
32606 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32607 {
32608 gcc_assert (n_args == 1);
32609 return fold_builtin_cpu (fndecl, args);
32610 }
32611 }
32612
32613 #ifdef SUBTARGET_FOLD_BUILTIN
32614 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32615 #endif
32616
32617 return NULL_TREE;
32618 }
32619
32620 /* Make builtins to detect cpu type and features supported. NAME is
32621 the builtin name, CODE is the builtin code, and FTYPE is the function
32622 type of the builtin. */
32623
32624 static void
32625 make_cpu_type_builtin (const char* name, int code,
32626 enum ix86_builtin_func_type ftype, bool is_const)
32627 {
32628 tree decl;
32629 tree type;
32630
32631 type = ix86_get_builtin_func_type (ftype);
32632 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32633 NULL, NULL_TREE);
32634 gcc_assert (decl != NULL_TREE);
32635 ix86_builtins[(int) code] = decl;
32636 TREE_READONLY (decl) = is_const;
32637 }
32638
32639 /* Make builtins to get CPU type and features supported. The created
32640 builtins are :
32641
32642 __builtin_cpu_init (), to detect cpu type and features,
32643 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32644 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32645 */
32646
32647 static void
32648 ix86_init_platform_type_builtins (void)
32649 {
32650 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32651 INT_FTYPE_VOID, false);
32652 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32653 INT_FTYPE_PCCHAR, true);
32654 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32655 INT_FTYPE_PCCHAR, true);
32656 }
32657
32658 /* Internal method for ix86_init_builtins. */
32659
32660 static void
32661 ix86_init_builtins_va_builtins_abi (void)
32662 {
32663 tree ms_va_ref, sysv_va_ref;
32664 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32665 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32666 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32667 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32668
32669 if (!TARGET_64BIT)
32670 return;
32671 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32672 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32673 ms_va_ref = build_reference_type (ms_va_list_type_node);
32674 sysv_va_ref =
32675 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32676
32677 fnvoid_va_end_ms =
32678 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32679 fnvoid_va_start_ms =
32680 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32681 fnvoid_va_end_sysv =
32682 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32683 fnvoid_va_start_sysv =
32684 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32685 NULL_TREE);
32686 fnvoid_va_copy_ms =
32687 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32688 NULL_TREE);
32689 fnvoid_va_copy_sysv =
32690 build_function_type_list (void_type_node, sysv_va_ref,
32691 sysv_va_ref, NULL_TREE);
32692
32693 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32694 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32695 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32696 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32697 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32698 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32699 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32700 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32701 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32702 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32703 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32704 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32705 }
32706
32707 static void
32708 ix86_init_builtin_types (void)
32709 {
32710 tree float128_type_node, float80_type_node;
32711
32712 /* The __float80 type. */
32713 float80_type_node = long_double_type_node;
32714 if (TYPE_MODE (float80_type_node) != XFmode)
32715 {
32716 /* The __float80 type. */
32717 float80_type_node = make_node (REAL_TYPE);
32718
32719 TYPE_PRECISION (float80_type_node) = 80;
32720 layout_type (float80_type_node);
32721 }
32722 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32723
32724 /* The __float128 type. */
32725 float128_type_node = make_node (REAL_TYPE);
32726 TYPE_PRECISION (float128_type_node) = 128;
32727 layout_type (float128_type_node);
32728 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32729
32730 /* This macro is built by i386-builtin-types.awk. */
32731 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32732 }
32733
32734 static void
32735 ix86_init_builtins (void)
32736 {
32737 tree t;
32738
32739 ix86_init_builtin_types ();
32740
32741 /* Builtins to get CPU type and features. */
32742 ix86_init_platform_type_builtins ();
32743
32744 /* TFmode support builtins. */
32745 def_builtin_const (0, "__builtin_infq",
32746 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32747 def_builtin_const (0, "__builtin_huge_valq",
32748 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32749
32750 /* We will expand them to normal call if SSE isn't available since
32751 they are used by libgcc. */
32752 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32753 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32754 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32755 TREE_READONLY (t) = 1;
32756 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32757
32758 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32759 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32760 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32761 TREE_READONLY (t) = 1;
32762 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32763
32764 ix86_init_tm_builtins ();
32765 ix86_init_mmx_sse_builtins ();
32766
32767 if (TARGET_LP64)
32768 ix86_init_builtins_va_builtins_abi ();
32769
32770 #ifdef SUBTARGET_INIT_BUILTINS
32771 SUBTARGET_INIT_BUILTINS;
32772 #endif
32773 }
32774
32775 /* Return the ix86 builtin for CODE. */
32776
32777 static tree
32778 ix86_builtin_decl (unsigned code, bool)
32779 {
32780 if (code >= IX86_BUILTIN_MAX)
32781 return error_mark_node;
32782
32783 return ix86_builtins[code];
32784 }
32785
32786 /* Errors in the source file can cause expand_expr to return const0_rtx
32787 where we expect a vector. To avoid crashing, use one of the vector
32788 clear instructions. */
32789 static rtx
32790 safe_vector_operand (rtx x, enum machine_mode mode)
32791 {
32792 if (x == const0_rtx)
32793 x = CONST0_RTX (mode);
32794 return x;
32795 }
32796
32797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32798
32799 static rtx
32800 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32801 {
32802 rtx pat;
32803 tree arg0 = CALL_EXPR_ARG (exp, 0);
32804 tree arg1 = CALL_EXPR_ARG (exp, 1);
32805 rtx op0 = expand_normal (arg0);
32806 rtx op1 = expand_normal (arg1);
32807 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32808 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32809 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32810
32811 if (VECTOR_MODE_P (mode0))
32812 op0 = safe_vector_operand (op0, mode0);
32813 if (VECTOR_MODE_P (mode1))
32814 op1 = safe_vector_operand (op1, mode1);
32815
32816 if (optimize || !target
32817 || GET_MODE (target) != tmode
32818 || !insn_data[icode].operand[0].predicate (target, tmode))
32819 target = gen_reg_rtx (tmode);
32820
32821 if (GET_MODE (op1) == SImode && mode1 == TImode)
32822 {
32823 rtx x = gen_reg_rtx (V4SImode);
32824 emit_insn (gen_sse2_loadd (x, op1));
32825 op1 = gen_lowpart (TImode, x);
32826 }
32827
32828 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32829 op0 = copy_to_mode_reg (mode0, op0);
32830 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32831 op1 = copy_to_mode_reg (mode1, op1);
32832
32833 pat = GEN_FCN (icode) (target, op0, op1);
32834 if (! pat)
32835 return 0;
32836
32837 emit_insn (pat);
32838
32839 return target;
32840 }
32841
32842 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32843
32844 static rtx
32845 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32846 enum ix86_builtin_func_type m_type,
32847 enum rtx_code sub_code)
32848 {
32849 rtx pat;
32850 int i;
32851 int nargs;
32852 bool comparison_p = false;
32853 bool tf_p = false;
32854 bool last_arg_constant = false;
32855 int num_memory = 0;
32856 struct {
32857 rtx op;
32858 enum machine_mode mode;
32859 } args[4];
32860
32861 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32862
32863 switch (m_type)
32864 {
32865 case MULTI_ARG_4_DF2_DI_I:
32866 case MULTI_ARG_4_DF2_DI_I1:
32867 case MULTI_ARG_4_SF2_SI_I:
32868 case MULTI_ARG_4_SF2_SI_I1:
32869 nargs = 4;
32870 last_arg_constant = true;
32871 break;
32872
32873 case MULTI_ARG_3_SF:
32874 case MULTI_ARG_3_DF:
32875 case MULTI_ARG_3_SF2:
32876 case MULTI_ARG_3_DF2:
32877 case MULTI_ARG_3_DI:
32878 case MULTI_ARG_3_SI:
32879 case MULTI_ARG_3_SI_DI:
32880 case MULTI_ARG_3_HI:
32881 case MULTI_ARG_3_HI_SI:
32882 case MULTI_ARG_3_QI:
32883 case MULTI_ARG_3_DI2:
32884 case MULTI_ARG_3_SI2:
32885 case MULTI_ARG_3_HI2:
32886 case MULTI_ARG_3_QI2:
32887 nargs = 3;
32888 break;
32889
32890 case MULTI_ARG_2_SF:
32891 case MULTI_ARG_2_DF:
32892 case MULTI_ARG_2_DI:
32893 case MULTI_ARG_2_SI:
32894 case MULTI_ARG_2_HI:
32895 case MULTI_ARG_2_QI:
32896 nargs = 2;
32897 break;
32898
32899 case MULTI_ARG_2_DI_IMM:
32900 case MULTI_ARG_2_SI_IMM:
32901 case MULTI_ARG_2_HI_IMM:
32902 case MULTI_ARG_2_QI_IMM:
32903 nargs = 2;
32904 last_arg_constant = true;
32905 break;
32906
32907 case MULTI_ARG_1_SF:
32908 case MULTI_ARG_1_DF:
32909 case MULTI_ARG_1_SF2:
32910 case MULTI_ARG_1_DF2:
32911 case MULTI_ARG_1_DI:
32912 case MULTI_ARG_1_SI:
32913 case MULTI_ARG_1_HI:
32914 case MULTI_ARG_1_QI:
32915 case MULTI_ARG_1_SI_DI:
32916 case MULTI_ARG_1_HI_DI:
32917 case MULTI_ARG_1_HI_SI:
32918 case MULTI_ARG_1_QI_DI:
32919 case MULTI_ARG_1_QI_SI:
32920 case MULTI_ARG_1_QI_HI:
32921 nargs = 1;
32922 break;
32923
32924 case MULTI_ARG_2_DI_CMP:
32925 case MULTI_ARG_2_SI_CMP:
32926 case MULTI_ARG_2_HI_CMP:
32927 case MULTI_ARG_2_QI_CMP:
32928 nargs = 2;
32929 comparison_p = true;
32930 break;
32931
32932 case MULTI_ARG_2_SF_TF:
32933 case MULTI_ARG_2_DF_TF:
32934 case MULTI_ARG_2_DI_TF:
32935 case MULTI_ARG_2_SI_TF:
32936 case MULTI_ARG_2_HI_TF:
32937 case MULTI_ARG_2_QI_TF:
32938 nargs = 2;
32939 tf_p = true;
32940 break;
32941
32942 default:
32943 gcc_unreachable ();
32944 }
32945
32946 if (optimize || !target
32947 || GET_MODE (target) != tmode
32948 || !insn_data[icode].operand[0].predicate (target, tmode))
32949 target = gen_reg_rtx (tmode);
32950
32951 gcc_assert (nargs <= 4);
32952
32953 for (i = 0; i < nargs; i++)
32954 {
32955 tree arg = CALL_EXPR_ARG (exp, i);
32956 rtx op = expand_normal (arg);
32957 int adjust = (comparison_p) ? 1 : 0;
32958 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32959
32960 if (last_arg_constant && i == nargs - 1)
32961 {
32962 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32963 {
32964 enum insn_code new_icode = icode;
32965 switch (icode)
32966 {
32967 case CODE_FOR_xop_vpermil2v2df3:
32968 case CODE_FOR_xop_vpermil2v4sf3:
32969 case CODE_FOR_xop_vpermil2v4df3:
32970 case CODE_FOR_xop_vpermil2v8sf3:
32971 error ("the last argument must be a 2-bit immediate");
32972 return gen_reg_rtx (tmode);
32973 case CODE_FOR_xop_rotlv2di3:
32974 new_icode = CODE_FOR_rotlv2di3;
32975 goto xop_rotl;
32976 case CODE_FOR_xop_rotlv4si3:
32977 new_icode = CODE_FOR_rotlv4si3;
32978 goto xop_rotl;
32979 case CODE_FOR_xop_rotlv8hi3:
32980 new_icode = CODE_FOR_rotlv8hi3;
32981 goto xop_rotl;
32982 case CODE_FOR_xop_rotlv16qi3:
32983 new_icode = CODE_FOR_rotlv16qi3;
32984 xop_rotl:
32985 if (CONST_INT_P (op))
32986 {
32987 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32988 op = GEN_INT (INTVAL (op) & mask);
32989 gcc_checking_assert
32990 (insn_data[icode].operand[i + 1].predicate (op, mode));
32991 }
32992 else
32993 {
32994 gcc_checking_assert
32995 (nargs == 2
32996 && insn_data[new_icode].operand[0].mode == tmode
32997 && insn_data[new_icode].operand[1].mode == tmode
32998 && insn_data[new_icode].operand[2].mode == mode
32999 && insn_data[new_icode].operand[0].predicate
33000 == insn_data[icode].operand[0].predicate
33001 && insn_data[new_icode].operand[1].predicate
33002 == insn_data[icode].operand[1].predicate);
33003 icode = new_icode;
33004 goto non_constant;
33005 }
33006 break;
33007 default:
33008 gcc_unreachable ();
33009 }
33010 }
33011 }
33012 else
33013 {
33014 non_constant:
33015 if (VECTOR_MODE_P (mode))
33016 op = safe_vector_operand (op, mode);
33017
33018 /* If we aren't optimizing, only allow one memory operand to be
33019 generated. */
33020 if (memory_operand (op, mode))
33021 num_memory++;
33022
33023 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33024
33025 if (optimize
33026 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33027 || num_memory > 1)
33028 op = force_reg (mode, op);
33029 }
33030
33031 args[i].op = op;
33032 args[i].mode = mode;
33033 }
33034
33035 switch (nargs)
33036 {
33037 case 1:
33038 pat = GEN_FCN (icode) (target, args[0].op);
33039 break;
33040
33041 case 2:
33042 if (tf_p)
33043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33044 GEN_INT ((int)sub_code));
33045 else if (! comparison_p)
33046 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33047 else
33048 {
33049 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33050 args[0].op,
33051 args[1].op);
33052
33053 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33054 }
33055 break;
33056
33057 case 3:
33058 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33059 break;
33060
33061 case 4:
33062 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33063 break;
33064
33065 default:
33066 gcc_unreachable ();
33067 }
33068
33069 if (! pat)
33070 return 0;
33071
33072 emit_insn (pat);
33073 return target;
33074 }
33075
33076 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33077 insns with vec_merge. */
33078
33079 static rtx
33080 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33081 rtx target)
33082 {
33083 rtx pat;
33084 tree arg0 = CALL_EXPR_ARG (exp, 0);
33085 rtx op1, op0 = expand_normal (arg0);
33086 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33087 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33088
33089 if (optimize || !target
33090 || GET_MODE (target) != tmode
33091 || !insn_data[icode].operand[0].predicate (target, tmode))
33092 target = gen_reg_rtx (tmode);
33093
33094 if (VECTOR_MODE_P (mode0))
33095 op0 = safe_vector_operand (op0, mode0);
33096
33097 if ((optimize && !register_operand (op0, mode0))
33098 || !insn_data[icode].operand[1].predicate (op0, mode0))
33099 op0 = copy_to_mode_reg (mode0, op0);
33100
33101 op1 = op0;
33102 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33103 op1 = copy_to_mode_reg (mode0, op1);
33104
33105 pat = GEN_FCN (icode) (target, op0, op1);
33106 if (! pat)
33107 return 0;
33108 emit_insn (pat);
33109 return target;
33110 }
33111
33112 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33113
33114 static rtx
33115 ix86_expand_sse_compare (const struct builtin_description *d,
33116 tree exp, rtx target, bool swap)
33117 {
33118 rtx pat;
33119 tree arg0 = CALL_EXPR_ARG (exp, 0);
33120 tree arg1 = CALL_EXPR_ARG (exp, 1);
33121 rtx op0 = expand_normal (arg0);
33122 rtx op1 = expand_normal (arg1);
33123 rtx op2;
33124 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33125 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33126 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33127 enum rtx_code comparison = d->comparison;
33128
33129 if (VECTOR_MODE_P (mode0))
33130 op0 = safe_vector_operand (op0, mode0);
33131 if (VECTOR_MODE_P (mode1))
33132 op1 = safe_vector_operand (op1, mode1);
33133
33134 /* Swap operands if we have a comparison that isn't available in
33135 hardware. */
33136 if (swap)
33137 {
33138 rtx tmp = gen_reg_rtx (mode1);
33139 emit_move_insn (tmp, op1);
33140 op1 = op0;
33141 op0 = tmp;
33142 }
33143
33144 if (optimize || !target
33145 || GET_MODE (target) != tmode
33146 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33147 target = gen_reg_rtx (tmode);
33148
33149 if ((optimize && !register_operand (op0, mode0))
33150 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33151 op0 = copy_to_mode_reg (mode0, op0);
33152 if ((optimize && !register_operand (op1, mode1))
33153 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33154 op1 = copy_to_mode_reg (mode1, op1);
33155
33156 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33157 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33158 if (! pat)
33159 return 0;
33160 emit_insn (pat);
33161 return target;
33162 }
33163
33164 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33165
33166 static rtx
33167 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33168 rtx target)
33169 {
33170 rtx pat;
33171 tree arg0 = CALL_EXPR_ARG (exp, 0);
33172 tree arg1 = CALL_EXPR_ARG (exp, 1);
33173 rtx op0 = expand_normal (arg0);
33174 rtx op1 = expand_normal (arg1);
33175 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33176 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33177 enum rtx_code comparison = d->comparison;
33178
33179 if (VECTOR_MODE_P (mode0))
33180 op0 = safe_vector_operand (op0, mode0);
33181 if (VECTOR_MODE_P (mode1))
33182 op1 = safe_vector_operand (op1, mode1);
33183
33184 /* Swap operands if we have a comparison that isn't available in
33185 hardware. */
33186 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33187 {
33188 rtx tmp = op1;
33189 op1 = op0;
33190 op0 = tmp;
33191 }
33192
33193 target = gen_reg_rtx (SImode);
33194 emit_move_insn (target, const0_rtx);
33195 target = gen_rtx_SUBREG (QImode, target, 0);
33196
33197 if ((optimize && !register_operand (op0, mode0))
33198 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33199 op0 = copy_to_mode_reg (mode0, op0);
33200 if ((optimize && !register_operand (op1, mode1))
33201 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33202 op1 = copy_to_mode_reg (mode1, op1);
33203
33204 pat = GEN_FCN (d->icode) (op0, op1);
33205 if (! pat)
33206 return 0;
33207 emit_insn (pat);
33208 emit_insn (gen_rtx_SET (VOIDmode,
33209 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33210 gen_rtx_fmt_ee (comparison, QImode,
33211 SET_DEST (pat),
33212 const0_rtx)));
33213
33214 return SUBREG_REG (target);
33215 }
33216
33217 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33218
33219 static rtx
33220 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33221 rtx target)
33222 {
33223 rtx pat;
33224 tree arg0 = CALL_EXPR_ARG (exp, 0);
33225 rtx op1, op0 = expand_normal (arg0);
33226 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33227 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33228
33229 if (optimize || target == 0
33230 || GET_MODE (target) != tmode
33231 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33232 target = gen_reg_rtx (tmode);
33233
33234 if (VECTOR_MODE_P (mode0))
33235 op0 = safe_vector_operand (op0, mode0);
33236
33237 if ((optimize && !register_operand (op0, mode0))
33238 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33239 op0 = copy_to_mode_reg (mode0, op0);
33240
33241 op1 = GEN_INT (d->comparison);
33242
33243 pat = GEN_FCN (d->icode) (target, op0, op1);
33244 if (! pat)
33245 return 0;
33246 emit_insn (pat);
33247 return target;
33248 }
33249
33250 static rtx
33251 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33252 tree exp, rtx target)
33253 {
33254 rtx pat;
33255 tree arg0 = CALL_EXPR_ARG (exp, 0);
33256 tree arg1 = CALL_EXPR_ARG (exp, 1);
33257 rtx op0 = expand_normal (arg0);
33258 rtx op1 = expand_normal (arg1);
33259 rtx op2;
33260 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33261 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33262 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33263
33264 if (optimize || target == 0
33265 || GET_MODE (target) != tmode
33266 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33267 target = gen_reg_rtx (tmode);
33268
33269 op0 = safe_vector_operand (op0, mode0);
33270 op1 = safe_vector_operand (op1, mode1);
33271
33272 if ((optimize && !register_operand (op0, mode0))
33273 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33274 op0 = copy_to_mode_reg (mode0, op0);
33275 if ((optimize && !register_operand (op1, mode1))
33276 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33277 op1 = copy_to_mode_reg (mode1, op1);
33278
33279 op2 = GEN_INT (d->comparison);
33280
33281 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33282 if (! pat)
33283 return 0;
33284 emit_insn (pat);
33285 return target;
33286 }
33287
33288 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33289
33290 static rtx
33291 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33292 rtx target)
33293 {
33294 rtx pat;
33295 tree arg0 = CALL_EXPR_ARG (exp, 0);
33296 tree arg1 = CALL_EXPR_ARG (exp, 1);
33297 rtx op0 = expand_normal (arg0);
33298 rtx op1 = expand_normal (arg1);
33299 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33300 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33301 enum rtx_code comparison = d->comparison;
33302
33303 if (VECTOR_MODE_P (mode0))
33304 op0 = safe_vector_operand (op0, mode0);
33305 if (VECTOR_MODE_P (mode1))
33306 op1 = safe_vector_operand (op1, mode1);
33307
33308 target = gen_reg_rtx (SImode);
33309 emit_move_insn (target, const0_rtx);
33310 target = gen_rtx_SUBREG (QImode, target, 0);
33311
33312 if ((optimize && !register_operand (op0, mode0))
33313 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33314 op0 = copy_to_mode_reg (mode0, op0);
33315 if ((optimize && !register_operand (op1, mode1))
33316 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33317 op1 = copy_to_mode_reg (mode1, op1);
33318
33319 pat = GEN_FCN (d->icode) (op0, op1);
33320 if (! pat)
33321 return 0;
33322 emit_insn (pat);
33323 emit_insn (gen_rtx_SET (VOIDmode,
33324 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33325 gen_rtx_fmt_ee (comparison, QImode,
33326 SET_DEST (pat),
33327 const0_rtx)));
33328
33329 return SUBREG_REG (target);
33330 }
33331
33332 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33333
33334 static rtx
33335 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33336 tree exp, rtx target)
33337 {
33338 rtx pat;
33339 tree arg0 = CALL_EXPR_ARG (exp, 0);
33340 tree arg1 = CALL_EXPR_ARG (exp, 1);
33341 tree arg2 = CALL_EXPR_ARG (exp, 2);
33342 tree arg3 = CALL_EXPR_ARG (exp, 3);
33343 tree arg4 = CALL_EXPR_ARG (exp, 4);
33344 rtx scratch0, scratch1;
33345 rtx op0 = expand_normal (arg0);
33346 rtx op1 = expand_normal (arg1);
33347 rtx op2 = expand_normal (arg2);
33348 rtx op3 = expand_normal (arg3);
33349 rtx op4 = expand_normal (arg4);
33350 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33351
33352 tmode0 = insn_data[d->icode].operand[0].mode;
33353 tmode1 = insn_data[d->icode].operand[1].mode;
33354 modev2 = insn_data[d->icode].operand[2].mode;
33355 modei3 = insn_data[d->icode].operand[3].mode;
33356 modev4 = insn_data[d->icode].operand[4].mode;
33357 modei5 = insn_data[d->icode].operand[5].mode;
33358 modeimm = insn_data[d->icode].operand[6].mode;
33359
33360 if (VECTOR_MODE_P (modev2))
33361 op0 = safe_vector_operand (op0, modev2);
33362 if (VECTOR_MODE_P (modev4))
33363 op2 = safe_vector_operand (op2, modev4);
33364
33365 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33366 op0 = copy_to_mode_reg (modev2, op0);
33367 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33368 op1 = copy_to_mode_reg (modei3, op1);
33369 if ((optimize && !register_operand (op2, modev4))
33370 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33371 op2 = copy_to_mode_reg (modev4, op2);
33372 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33373 op3 = copy_to_mode_reg (modei5, op3);
33374
33375 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33376 {
33377 error ("the fifth argument must be an 8-bit immediate");
33378 return const0_rtx;
33379 }
33380
33381 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33382 {
33383 if (optimize || !target
33384 || GET_MODE (target) != tmode0
33385 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33386 target = gen_reg_rtx (tmode0);
33387
33388 scratch1 = gen_reg_rtx (tmode1);
33389
33390 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33391 }
33392 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33393 {
33394 if (optimize || !target
33395 || GET_MODE (target) != tmode1
33396 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33397 target = gen_reg_rtx (tmode1);
33398
33399 scratch0 = gen_reg_rtx (tmode0);
33400
33401 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33402 }
33403 else
33404 {
33405 gcc_assert (d->flag);
33406
33407 scratch0 = gen_reg_rtx (tmode0);
33408 scratch1 = gen_reg_rtx (tmode1);
33409
33410 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33411 }
33412
33413 if (! pat)
33414 return 0;
33415
33416 emit_insn (pat);
33417
33418 if (d->flag)
33419 {
33420 target = gen_reg_rtx (SImode);
33421 emit_move_insn (target, const0_rtx);
33422 target = gen_rtx_SUBREG (QImode, target, 0);
33423
33424 emit_insn
33425 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33426 gen_rtx_fmt_ee (EQ, QImode,
33427 gen_rtx_REG ((enum machine_mode) d->flag,
33428 FLAGS_REG),
33429 const0_rtx)));
33430 return SUBREG_REG (target);
33431 }
33432 else
33433 return target;
33434 }
33435
33436
33437 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33438
33439 static rtx
33440 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33441 tree exp, rtx target)
33442 {
33443 rtx pat;
33444 tree arg0 = CALL_EXPR_ARG (exp, 0);
33445 tree arg1 = CALL_EXPR_ARG (exp, 1);
33446 tree arg2 = CALL_EXPR_ARG (exp, 2);
33447 rtx scratch0, scratch1;
33448 rtx op0 = expand_normal (arg0);
33449 rtx op1 = expand_normal (arg1);
33450 rtx op2 = expand_normal (arg2);
33451 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33452
33453 tmode0 = insn_data[d->icode].operand[0].mode;
33454 tmode1 = insn_data[d->icode].operand[1].mode;
33455 modev2 = insn_data[d->icode].operand[2].mode;
33456 modev3 = insn_data[d->icode].operand[3].mode;
33457 modeimm = insn_data[d->icode].operand[4].mode;
33458
33459 if (VECTOR_MODE_P (modev2))
33460 op0 = safe_vector_operand (op0, modev2);
33461 if (VECTOR_MODE_P (modev3))
33462 op1 = safe_vector_operand (op1, modev3);
33463
33464 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33465 op0 = copy_to_mode_reg (modev2, op0);
33466 if ((optimize && !register_operand (op1, modev3))
33467 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33468 op1 = copy_to_mode_reg (modev3, op1);
33469
33470 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33471 {
33472 error ("the third argument must be an 8-bit immediate");
33473 return const0_rtx;
33474 }
33475
33476 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33477 {
33478 if (optimize || !target
33479 || GET_MODE (target) != tmode0
33480 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33481 target = gen_reg_rtx (tmode0);
33482
33483 scratch1 = gen_reg_rtx (tmode1);
33484
33485 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33486 }
33487 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33488 {
33489 if (optimize || !target
33490 || GET_MODE (target) != tmode1
33491 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33492 target = gen_reg_rtx (tmode1);
33493
33494 scratch0 = gen_reg_rtx (tmode0);
33495
33496 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33497 }
33498 else
33499 {
33500 gcc_assert (d->flag);
33501
33502 scratch0 = gen_reg_rtx (tmode0);
33503 scratch1 = gen_reg_rtx (tmode1);
33504
33505 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33506 }
33507
33508 if (! pat)
33509 return 0;
33510
33511 emit_insn (pat);
33512
33513 if (d->flag)
33514 {
33515 target = gen_reg_rtx (SImode);
33516 emit_move_insn (target, const0_rtx);
33517 target = gen_rtx_SUBREG (QImode, target, 0);
33518
33519 emit_insn
33520 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33521 gen_rtx_fmt_ee (EQ, QImode,
33522 gen_rtx_REG ((enum machine_mode) d->flag,
33523 FLAGS_REG),
33524 const0_rtx)));
33525 return SUBREG_REG (target);
33526 }
33527 else
33528 return target;
33529 }
33530
33531 /* Subroutine of ix86_expand_builtin to take care of insns with
33532 variable number of operands. */
33533
33534 static rtx
33535 ix86_expand_args_builtin (const struct builtin_description *d,
33536 tree exp, rtx target)
33537 {
33538 rtx pat, real_target;
33539 unsigned int i, nargs;
33540 unsigned int nargs_constant = 0;
33541 unsigned int mask_pos = 0;
33542 int num_memory = 0;
33543 struct
33544 {
33545 rtx op;
33546 enum machine_mode mode;
33547 } args[6];
33548 bool last_arg_count = false;
33549 enum insn_code icode = d->icode;
33550 const struct insn_data_d *insn_p = &insn_data[icode];
33551 enum machine_mode tmode = insn_p->operand[0].mode;
33552 enum machine_mode rmode = VOIDmode;
33553 bool swap = false;
33554 enum rtx_code comparison = d->comparison;
33555
33556 switch ((enum ix86_builtin_func_type) d->flag)
33557 {
33558 case V2DF_FTYPE_V2DF_ROUND:
33559 case V4DF_FTYPE_V4DF_ROUND:
33560 case V4SF_FTYPE_V4SF_ROUND:
33561 case V8SF_FTYPE_V8SF_ROUND:
33562 case V4SI_FTYPE_V4SF_ROUND:
33563 case V8SI_FTYPE_V8SF_ROUND:
33564 return ix86_expand_sse_round (d, exp, target);
33565 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33566 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33567 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33568 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33569 case INT_FTYPE_V8SF_V8SF_PTEST:
33570 case INT_FTYPE_V4DI_V4DI_PTEST:
33571 case INT_FTYPE_V4DF_V4DF_PTEST:
33572 case INT_FTYPE_V4SF_V4SF_PTEST:
33573 case INT_FTYPE_V2DI_V2DI_PTEST:
33574 case INT_FTYPE_V2DF_V2DF_PTEST:
33575 return ix86_expand_sse_ptest (d, exp, target);
33576 case FLOAT128_FTYPE_FLOAT128:
33577 case FLOAT_FTYPE_FLOAT:
33578 case INT_FTYPE_INT:
33579 case UINT64_FTYPE_INT:
33580 case UINT16_FTYPE_UINT16:
33581 case INT64_FTYPE_INT64:
33582 case INT64_FTYPE_V4SF:
33583 case INT64_FTYPE_V2DF:
33584 case INT_FTYPE_V16QI:
33585 case INT_FTYPE_V8QI:
33586 case INT_FTYPE_V8SF:
33587 case INT_FTYPE_V4DF:
33588 case INT_FTYPE_V4SF:
33589 case INT_FTYPE_V2DF:
33590 case INT_FTYPE_V32QI:
33591 case V16QI_FTYPE_V16QI:
33592 case V8SI_FTYPE_V8SF:
33593 case V8SI_FTYPE_V4SI:
33594 case V8HI_FTYPE_V8HI:
33595 case V8HI_FTYPE_V16QI:
33596 case V8QI_FTYPE_V8QI:
33597 case V8SF_FTYPE_V8SF:
33598 case V8SF_FTYPE_V8SI:
33599 case V8SF_FTYPE_V4SF:
33600 case V8SF_FTYPE_V8HI:
33601 case V4SI_FTYPE_V4SI:
33602 case V4SI_FTYPE_V16QI:
33603 case V4SI_FTYPE_V4SF:
33604 case V4SI_FTYPE_V8SI:
33605 case V4SI_FTYPE_V8HI:
33606 case V4SI_FTYPE_V4DF:
33607 case V4SI_FTYPE_V2DF:
33608 case V4HI_FTYPE_V4HI:
33609 case V4DF_FTYPE_V4DF:
33610 case V4DF_FTYPE_V4SI:
33611 case V4DF_FTYPE_V4SF:
33612 case V4DF_FTYPE_V2DF:
33613 case V4SF_FTYPE_V4SF:
33614 case V4SF_FTYPE_V4SI:
33615 case V4SF_FTYPE_V8SF:
33616 case V4SF_FTYPE_V4DF:
33617 case V4SF_FTYPE_V8HI:
33618 case V4SF_FTYPE_V2DF:
33619 case V2DI_FTYPE_V2DI:
33620 case V2DI_FTYPE_V16QI:
33621 case V2DI_FTYPE_V8HI:
33622 case V2DI_FTYPE_V4SI:
33623 case V2DF_FTYPE_V2DF:
33624 case V2DF_FTYPE_V4SI:
33625 case V2DF_FTYPE_V4DF:
33626 case V2DF_FTYPE_V4SF:
33627 case V2DF_FTYPE_V2SI:
33628 case V2SI_FTYPE_V2SI:
33629 case V2SI_FTYPE_V4SF:
33630 case V2SI_FTYPE_V2SF:
33631 case V2SI_FTYPE_V2DF:
33632 case V2SF_FTYPE_V2SF:
33633 case V2SF_FTYPE_V2SI:
33634 case V32QI_FTYPE_V32QI:
33635 case V32QI_FTYPE_V16QI:
33636 case V16HI_FTYPE_V16HI:
33637 case V16HI_FTYPE_V8HI:
33638 case V8SI_FTYPE_V8SI:
33639 case V16HI_FTYPE_V16QI:
33640 case V8SI_FTYPE_V16QI:
33641 case V4DI_FTYPE_V16QI:
33642 case V8SI_FTYPE_V8HI:
33643 case V4DI_FTYPE_V8HI:
33644 case V4DI_FTYPE_V4SI:
33645 case V4DI_FTYPE_V2DI:
33646 case HI_FTYPE_HI:
33647 case UINT_FTYPE_V2DF:
33648 case UINT_FTYPE_V4SF:
33649 case UINT64_FTYPE_V2DF:
33650 case UINT64_FTYPE_V4SF:
33651 case V16QI_FTYPE_V8DI:
33652 case V16HI_FTYPE_V16SI:
33653 case V16SI_FTYPE_HI:
33654 case V16SI_FTYPE_V16SI:
33655 case V16SI_FTYPE_INT:
33656 case V16SF_FTYPE_FLOAT:
33657 case V16SF_FTYPE_V8SF:
33658 case V16SI_FTYPE_V8SI:
33659 case V16SF_FTYPE_V4SF:
33660 case V16SI_FTYPE_V4SI:
33661 case V16SF_FTYPE_V16SF:
33662 case V8HI_FTYPE_V8DI:
33663 case V8UHI_FTYPE_V8UHI:
33664 case V8SI_FTYPE_V8DI:
33665 case V8USI_FTYPE_V8USI:
33666 case V8SF_FTYPE_V8DF:
33667 case V8DI_FTYPE_QI:
33668 case V8DI_FTYPE_INT64:
33669 case V8DI_FTYPE_V4DI:
33670 case V8DI_FTYPE_V8DI:
33671 case V8DF_FTYPE_DOUBLE:
33672 case V8DF_FTYPE_V4DF:
33673 case V8DF_FTYPE_V2DF:
33674 case V8DF_FTYPE_V8DF:
33675 case V8DF_FTYPE_V8SI:
33676 nargs = 1;
33677 break;
33678 case V4SF_FTYPE_V4SF_VEC_MERGE:
33679 case V2DF_FTYPE_V2DF_VEC_MERGE:
33680 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33681 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33682 case V16QI_FTYPE_V16QI_V16QI:
33683 case V16QI_FTYPE_V8HI_V8HI:
33684 case V16SI_FTYPE_V16SI_V16SI:
33685 case V16SF_FTYPE_V16SF_V16SF:
33686 case V16SF_FTYPE_V16SF_V16SI:
33687 case V8QI_FTYPE_V8QI_V8QI:
33688 case V8QI_FTYPE_V4HI_V4HI:
33689 case V8HI_FTYPE_V8HI_V8HI:
33690 case V8HI_FTYPE_V16QI_V16QI:
33691 case V8HI_FTYPE_V4SI_V4SI:
33692 case V8SF_FTYPE_V8SF_V8SF:
33693 case V8SF_FTYPE_V8SF_V8SI:
33694 case V8DI_FTYPE_V8DI_V8DI:
33695 case V8DF_FTYPE_V8DF_V8DF:
33696 case V8DF_FTYPE_V8DF_V8DI:
33697 case V4SI_FTYPE_V4SI_V4SI:
33698 case V4SI_FTYPE_V8HI_V8HI:
33699 case V4SI_FTYPE_V4SF_V4SF:
33700 case V4SI_FTYPE_V2DF_V2DF:
33701 case V4HI_FTYPE_V4HI_V4HI:
33702 case V4HI_FTYPE_V8QI_V8QI:
33703 case V4HI_FTYPE_V2SI_V2SI:
33704 case V4DF_FTYPE_V4DF_V4DF:
33705 case V4DF_FTYPE_V4DF_V4DI:
33706 case V4SF_FTYPE_V4SF_V4SF:
33707 case V4SF_FTYPE_V4SF_V4SI:
33708 case V4SF_FTYPE_V4SF_V2SI:
33709 case V4SF_FTYPE_V4SF_V2DF:
33710 case V4SF_FTYPE_V4SF_UINT:
33711 case V4SF_FTYPE_V4SF_UINT64:
33712 case V4SF_FTYPE_V4SF_DI:
33713 case V4SF_FTYPE_V4SF_SI:
33714 case V2DI_FTYPE_V2DI_V2DI:
33715 case V2DI_FTYPE_V16QI_V16QI:
33716 case V2DI_FTYPE_V4SI_V4SI:
33717 case V2UDI_FTYPE_V4USI_V4USI:
33718 case V2DI_FTYPE_V2DI_V16QI:
33719 case V2DI_FTYPE_V2DF_V2DF:
33720 case V2SI_FTYPE_V2SI_V2SI:
33721 case V2SI_FTYPE_V4HI_V4HI:
33722 case V2SI_FTYPE_V2SF_V2SF:
33723 case V2DF_FTYPE_V2DF_V2DF:
33724 case V2DF_FTYPE_V2DF_V4SF:
33725 case V2DF_FTYPE_V2DF_V2DI:
33726 case V2DF_FTYPE_V2DF_DI:
33727 case V2DF_FTYPE_V2DF_SI:
33728 case V2DF_FTYPE_V2DF_UINT:
33729 case V2DF_FTYPE_V2DF_UINT64:
33730 case V2SF_FTYPE_V2SF_V2SF:
33731 case V1DI_FTYPE_V1DI_V1DI:
33732 case V1DI_FTYPE_V8QI_V8QI:
33733 case V1DI_FTYPE_V2SI_V2SI:
33734 case V32QI_FTYPE_V16HI_V16HI:
33735 case V16HI_FTYPE_V8SI_V8SI:
33736 case V32QI_FTYPE_V32QI_V32QI:
33737 case V16HI_FTYPE_V32QI_V32QI:
33738 case V16HI_FTYPE_V16HI_V16HI:
33739 case V8SI_FTYPE_V4DF_V4DF:
33740 case V8SI_FTYPE_V8SI_V8SI:
33741 case V8SI_FTYPE_V16HI_V16HI:
33742 case V4DI_FTYPE_V4DI_V4DI:
33743 case V4DI_FTYPE_V8SI_V8SI:
33744 case V4UDI_FTYPE_V8USI_V8USI:
33745 case QI_FTYPE_V8DI_V8DI:
33746 case HI_FTYPE_V16SI_V16SI:
33747 if (comparison == UNKNOWN)
33748 return ix86_expand_binop_builtin (icode, exp, target);
33749 nargs = 2;
33750 break;
33751 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33752 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33753 gcc_assert (comparison != UNKNOWN);
33754 nargs = 2;
33755 swap = true;
33756 break;
33757 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33758 case V16HI_FTYPE_V16HI_SI_COUNT:
33759 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33760 case V8SI_FTYPE_V8SI_SI_COUNT:
33761 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33762 case V4DI_FTYPE_V4DI_INT_COUNT:
33763 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33764 case V8HI_FTYPE_V8HI_SI_COUNT:
33765 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33766 case V4SI_FTYPE_V4SI_SI_COUNT:
33767 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33768 case V4HI_FTYPE_V4HI_SI_COUNT:
33769 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33770 case V2DI_FTYPE_V2DI_SI_COUNT:
33771 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33772 case V2SI_FTYPE_V2SI_SI_COUNT:
33773 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33774 case V1DI_FTYPE_V1DI_SI_COUNT:
33775 nargs = 2;
33776 last_arg_count = true;
33777 break;
33778 case UINT64_FTYPE_UINT64_UINT64:
33779 case UINT_FTYPE_UINT_UINT:
33780 case UINT_FTYPE_UINT_USHORT:
33781 case UINT_FTYPE_UINT_UCHAR:
33782 case UINT16_FTYPE_UINT16_INT:
33783 case UINT8_FTYPE_UINT8_INT:
33784 case HI_FTYPE_HI_HI:
33785 case V16SI_FTYPE_V8DF_V8DF:
33786 nargs = 2;
33787 break;
33788 case V2DI_FTYPE_V2DI_INT_CONVERT:
33789 nargs = 2;
33790 rmode = V1TImode;
33791 nargs_constant = 1;
33792 break;
33793 case V4DI_FTYPE_V4DI_INT_CONVERT:
33794 nargs = 2;
33795 rmode = V2TImode;
33796 nargs_constant = 1;
33797 break;
33798 case V8HI_FTYPE_V8HI_INT:
33799 case V8HI_FTYPE_V8SF_INT:
33800 case V16HI_FTYPE_V16SF_INT:
33801 case V8HI_FTYPE_V4SF_INT:
33802 case V8SF_FTYPE_V8SF_INT:
33803 case V4SF_FTYPE_V16SF_INT:
33804 case V16SF_FTYPE_V16SF_INT:
33805 case V4SI_FTYPE_V4SI_INT:
33806 case V4SI_FTYPE_V8SI_INT:
33807 case V4HI_FTYPE_V4HI_INT:
33808 case V4DF_FTYPE_V4DF_INT:
33809 case V4DF_FTYPE_V8DF_INT:
33810 case V4SF_FTYPE_V4SF_INT:
33811 case V4SF_FTYPE_V8SF_INT:
33812 case V2DI_FTYPE_V2DI_INT:
33813 case V2DF_FTYPE_V2DF_INT:
33814 case V2DF_FTYPE_V4DF_INT:
33815 case V16HI_FTYPE_V16HI_INT:
33816 case V8SI_FTYPE_V8SI_INT:
33817 case V16SI_FTYPE_V16SI_INT:
33818 case V4SI_FTYPE_V16SI_INT:
33819 case V4DI_FTYPE_V4DI_INT:
33820 case V2DI_FTYPE_V4DI_INT:
33821 case V4DI_FTYPE_V8DI_INT:
33822 case HI_FTYPE_HI_INT:
33823 nargs = 2;
33824 nargs_constant = 1;
33825 break;
33826 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33827 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33828 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33829 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33830 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33831 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33832 case HI_FTYPE_V16SI_V16SI_HI:
33833 case QI_FTYPE_V8DI_V8DI_QI:
33834 case V16HI_FTYPE_V16SI_V16HI_HI:
33835 case V16QI_FTYPE_V16SI_V16QI_HI:
33836 case V16QI_FTYPE_V8DI_V16QI_QI:
33837 case V16SF_FTYPE_V16SF_V16SF_HI:
33838 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33839 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33840 case V16SF_FTYPE_V16SI_V16SF_HI:
33841 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33842 case V16SF_FTYPE_V4SF_V16SF_HI:
33843 case V16SI_FTYPE_SI_V16SI_HI:
33844 case V16SI_FTYPE_V16HI_V16SI_HI:
33845 case V16SI_FTYPE_V16QI_V16SI_HI:
33846 case V16SI_FTYPE_V16SF_V16SI_HI:
33847 case V16SI_FTYPE_V16SI_V16SI_HI:
33848 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33849 case V16SI_FTYPE_V4SI_V16SI_HI:
33850 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33851 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33852 case V8DF_FTYPE_V2DF_V8DF_QI:
33853 case V8DF_FTYPE_V4DF_V8DF_QI:
33854 case V8DF_FTYPE_V8DF_V8DF_QI:
33855 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33856 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33857 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33858 case V8DF_FTYPE_V8SF_V8DF_QI:
33859 case V8DF_FTYPE_V8SI_V8DF_QI:
33860 case V8DI_FTYPE_DI_V8DI_QI:
33861 case V8DI_FTYPE_V16QI_V8DI_QI:
33862 case V8DI_FTYPE_V2DI_V8DI_QI:
33863 case V8DI_FTYPE_V4DI_V8DI_QI:
33864 case V8DI_FTYPE_V8DI_V8DI_QI:
33865 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33866 case V8DI_FTYPE_V8HI_V8DI_QI:
33867 case V8DI_FTYPE_V8SI_V8DI_QI:
33868 case V8HI_FTYPE_V8DI_V8HI_QI:
33869 case V8SF_FTYPE_V8DF_V8SF_QI:
33870 case V8SI_FTYPE_V8DF_V8SI_QI:
33871 case V8SI_FTYPE_V8DI_V8SI_QI:
33872 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33873 nargs = 3;
33874 break;
33875 case V32QI_FTYPE_V32QI_V32QI_INT:
33876 case V16HI_FTYPE_V16HI_V16HI_INT:
33877 case V16QI_FTYPE_V16QI_V16QI_INT:
33878 case V4DI_FTYPE_V4DI_V4DI_INT:
33879 case V8HI_FTYPE_V8HI_V8HI_INT:
33880 case V8SI_FTYPE_V8SI_V8SI_INT:
33881 case V8SI_FTYPE_V8SI_V4SI_INT:
33882 case V8SF_FTYPE_V8SF_V8SF_INT:
33883 case V8SF_FTYPE_V8SF_V4SF_INT:
33884 case V4SI_FTYPE_V4SI_V4SI_INT:
33885 case V4DF_FTYPE_V4DF_V4DF_INT:
33886 case V16SF_FTYPE_V16SF_V16SF_INT:
33887 case V16SF_FTYPE_V16SF_V4SF_INT:
33888 case V16SI_FTYPE_V16SI_V4SI_INT:
33889 case V4DF_FTYPE_V4DF_V2DF_INT:
33890 case V4SF_FTYPE_V4SF_V4SF_INT:
33891 case V2DI_FTYPE_V2DI_V2DI_INT:
33892 case V4DI_FTYPE_V4DI_V2DI_INT:
33893 case V2DF_FTYPE_V2DF_V2DF_INT:
33894 case QI_FTYPE_V8DI_V8DI_INT:
33895 case QI_FTYPE_V8DF_V8DF_INT:
33896 case QI_FTYPE_V2DF_V2DF_INT:
33897 case QI_FTYPE_V4SF_V4SF_INT:
33898 case HI_FTYPE_V16SI_V16SI_INT:
33899 case HI_FTYPE_V16SF_V16SF_INT:
33900 nargs = 3;
33901 nargs_constant = 1;
33902 break;
33903 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33904 nargs = 3;
33905 rmode = V4DImode;
33906 nargs_constant = 1;
33907 break;
33908 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33909 nargs = 3;
33910 rmode = V2DImode;
33911 nargs_constant = 1;
33912 break;
33913 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33914 nargs = 3;
33915 rmode = DImode;
33916 nargs_constant = 1;
33917 break;
33918 case V2DI_FTYPE_V2DI_UINT_UINT:
33919 nargs = 3;
33920 nargs_constant = 2;
33921 break;
33922 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33923 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33924 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33925 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33926 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33927 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33928 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33929 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33930 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33931 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33932 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33933 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33934 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33935 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33936 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33937 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33938 nargs = 4;
33939 break;
33940 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33941 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33942 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33943 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33944 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33945 nargs = 4;
33946 nargs_constant = 1;
33947 break;
33948 case QI_FTYPE_V2DF_V2DF_INT_QI:
33949 case QI_FTYPE_V4SF_V4SF_INT_QI:
33950 nargs = 4;
33951 mask_pos = 1;
33952 nargs_constant = 1;
33953 break;
33954 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33955 nargs = 4;
33956 nargs_constant = 2;
33957 break;
33958 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33959 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33960 nargs = 4;
33961 break;
33962 case QI_FTYPE_V8DI_V8DI_INT_QI:
33963 case HI_FTYPE_V16SI_V16SI_INT_HI:
33964 case QI_FTYPE_V8DF_V8DF_INT_QI:
33965 case HI_FTYPE_V16SF_V16SF_INT_HI:
33966 mask_pos = 1;
33967 nargs = 4;
33968 nargs_constant = 1;
33969 break;
33970 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33971 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33972 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33973 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33974 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33975 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33976 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33977 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33978 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33979 nargs = 4;
33980 mask_pos = 2;
33981 nargs_constant = 1;
33982 break;
33983 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33984 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33985 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33986 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33987 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33988 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33989 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33990 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33991 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33992 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33993 nargs = 5;
33994 mask_pos = 2;
33995 nargs_constant = 1;
33996 break;
33997 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33998 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33999 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34000 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34001 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34002 nargs = 5;
34003 mask_pos = 1;
34004 nargs_constant = 1;
34005 break;
34006
34007 default:
34008 gcc_unreachable ();
34009 }
34010
34011 gcc_assert (nargs <= ARRAY_SIZE (args));
34012
34013 if (comparison != UNKNOWN)
34014 {
34015 gcc_assert (nargs == 2);
34016 return ix86_expand_sse_compare (d, exp, target, swap);
34017 }
34018
34019 if (rmode == VOIDmode || rmode == tmode)
34020 {
34021 if (optimize
34022 || target == 0
34023 || GET_MODE (target) != tmode
34024 || !insn_p->operand[0].predicate (target, tmode))
34025 target = gen_reg_rtx (tmode);
34026 real_target = target;
34027 }
34028 else
34029 {
34030 real_target = gen_reg_rtx (tmode);
34031 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34032 }
34033
34034 for (i = 0; i < nargs; i++)
34035 {
34036 tree arg = CALL_EXPR_ARG (exp, i);
34037 rtx op = expand_normal (arg);
34038 enum machine_mode mode = insn_p->operand[i + 1].mode;
34039 bool match = insn_p->operand[i + 1].predicate (op, mode);
34040
34041 if (last_arg_count && (i + 1) == nargs)
34042 {
34043 /* SIMD shift insns take either an 8-bit immediate or
34044 register as count. But builtin functions take int as
34045 count. If count doesn't match, we put it in register. */
34046 if (!match)
34047 {
34048 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34049 if (!insn_p->operand[i + 1].predicate (op, mode))
34050 op = copy_to_reg (op);
34051 }
34052 }
34053 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34054 (!mask_pos && (nargs - i) <= nargs_constant))
34055 {
34056 if (!match)
34057 switch (icode)
34058 {
34059 case CODE_FOR_avx2_inserti128:
34060 case CODE_FOR_avx2_extracti128:
34061 error ("the last argument must be an 1-bit immediate");
34062 return const0_rtx;
34063
34064 case CODE_FOR_avx512f_cmpv8di3_mask:
34065 case CODE_FOR_avx512f_cmpv16si3_mask:
34066 case CODE_FOR_avx512f_ucmpv8di3_mask:
34067 case CODE_FOR_avx512f_ucmpv16si3_mask:
34068 error ("the last argument must be a 3-bit immediate");
34069 return const0_rtx;
34070
34071 case CODE_FOR_sse4_1_roundsd:
34072 case CODE_FOR_sse4_1_roundss:
34073
34074 case CODE_FOR_sse4_1_roundpd:
34075 case CODE_FOR_sse4_1_roundps:
34076 case CODE_FOR_avx_roundpd256:
34077 case CODE_FOR_avx_roundps256:
34078
34079 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34080 case CODE_FOR_sse4_1_roundps_sfix:
34081 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34082 case CODE_FOR_avx_roundps_sfix256:
34083
34084 case CODE_FOR_sse4_1_blendps:
34085 case CODE_FOR_avx_blendpd256:
34086 case CODE_FOR_avx_vpermilv4df:
34087 case CODE_FOR_avx512f_getmantv8df_mask:
34088 case CODE_FOR_avx512f_getmantv16sf_mask:
34089 case CODE_FOR_avx512vl_getmantv8sf_mask:
34090 case CODE_FOR_avx512vl_getmantv4df_mask:
34091 case CODE_FOR_avx512vl_getmantv4sf_mask:
34092 case CODE_FOR_avx512vl_getmantv2df_mask:
34093 error ("the last argument must be a 4-bit immediate");
34094 return const0_rtx;
34095
34096 case CODE_FOR_sha1rnds4:
34097 case CODE_FOR_sse4_1_blendpd:
34098 case CODE_FOR_avx_vpermilv2df:
34099 case CODE_FOR_xop_vpermil2v2df3:
34100 case CODE_FOR_xop_vpermil2v4sf3:
34101 case CODE_FOR_xop_vpermil2v4df3:
34102 case CODE_FOR_xop_vpermil2v8sf3:
34103 case CODE_FOR_avx512f_vinsertf32x4_mask:
34104 case CODE_FOR_avx512f_vinserti32x4_mask:
34105 case CODE_FOR_avx512f_vextractf32x4_mask:
34106 case CODE_FOR_avx512f_vextracti32x4_mask:
34107 error ("the last argument must be a 2-bit immediate");
34108 return const0_rtx;
34109
34110 case CODE_FOR_avx_vextractf128v4df:
34111 case CODE_FOR_avx_vextractf128v8sf:
34112 case CODE_FOR_avx_vextractf128v8si:
34113 case CODE_FOR_avx_vinsertf128v4df:
34114 case CODE_FOR_avx_vinsertf128v8sf:
34115 case CODE_FOR_avx_vinsertf128v8si:
34116 case CODE_FOR_avx512f_vinsertf64x4_mask:
34117 case CODE_FOR_avx512f_vinserti64x4_mask:
34118 case CODE_FOR_avx512f_vextractf64x4_mask:
34119 case CODE_FOR_avx512f_vextracti64x4_mask:
34120 error ("the last argument must be a 1-bit immediate");
34121 return const0_rtx;
34122
34123 case CODE_FOR_avx_vmcmpv2df3:
34124 case CODE_FOR_avx_vmcmpv4sf3:
34125 case CODE_FOR_avx_cmpv2df3:
34126 case CODE_FOR_avx_cmpv4sf3:
34127 case CODE_FOR_avx_cmpv4df3:
34128 case CODE_FOR_avx_cmpv8sf3:
34129 case CODE_FOR_avx512f_cmpv8df3_mask:
34130 case CODE_FOR_avx512f_cmpv16sf3_mask:
34131 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34132 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34133 error ("the last argument must be a 5-bit immediate");
34134 return const0_rtx;
34135
34136 default:
34137 switch (nargs_constant)
34138 {
34139 case 2:
34140 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34141 (!mask_pos && (nargs - i) == nargs_constant))
34142 {
34143 error ("the next to last argument must be an 8-bit immediate");
34144 break;
34145 }
34146 case 1:
34147 error ("the last argument must be an 8-bit immediate");
34148 break;
34149 default:
34150 gcc_unreachable ();
34151 }
34152 return const0_rtx;
34153 }
34154 }
34155 else
34156 {
34157 if (VECTOR_MODE_P (mode))
34158 op = safe_vector_operand (op, mode);
34159
34160 /* If we aren't optimizing, only allow one memory operand to
34161 be generated. */
34162 if (memory_operand (op, mode))
34163 num_memory++;
34164
34165 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34166 {
34167 if (optimize || !match || num_memory > 1)
34168 op = copy_to_mode_reg (mode, op);
34169 }
34170 else
34171 {
34172 op = copy_to_reg (op);
34173 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34174 }
34175 }
34176
34177 args[i].op = op;
34178 args[i].mode = mode;
34179 }
34180
34181 switch (nargs)
34182 {
34183 case 1:
34184 pat = GEN_FCN (icode) (real_target, args[0].op);
34185 break;
34186 case 2:
34187 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34188 break;
34189 case 3:
34190 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34191 args[2].op);
34192 break;
34193 case 4:
34194 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34195 args[2].op, args[3].op);
34196 break;
34197 case 5:
34198 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34199 args[2].op, args[3].op, args[4].op);
34200 case 6:
34201 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34202 args[2].op, args[3].op, args[4].op,
34203 args[5].op);
34204 break;
34205 default:
34206 gcc_unreachable ();
34207 }
34208
34209 if (! pat)
34210 return 0;
34211
34212 emit_insn (pat);
34213 return target;
34214 }
34215
34216 /* Transform pattern of following layout:
34217 (parallel [
34218 set (A B)
34219 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34220 ])
34221 into:
34222 (set (A B))
34223
34224 Or:
34225 (parallel [ A B
34226 ...
34227 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34228 ...
34229 ])
34230 into:
34231 (parallel [ A B ... ]) */
34232
34233 static rtx
34234 ix86_erase_embedded_rounding (rtx pat)
34235 {
34236 if (GET_CODE (pat) == INSN)
34237 pat = PATTERN (pat);
34238
34239 gcc_assert (GET_CODE (pat) == PARALLEL);
34240
34241 if (XVECLEN (pat, 0) == 2)
34242 {
34243 rtx p0 = XVECEXP (pat, 0, 0);
34244 rtx p1 = XVECEXP (pat, 0, 1);
34245
34246 gcc_assert (GET_CODE (p0) == SET
34247 && GET_CODE (p1) == UNSPEC
34248 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34249
34250 return p0;
34251 }
34252 else
34253 {
34254 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34255 int i = 0;
34256 int j = 0;
34257
34258 for (; i < XVECLEN (pat, 0); ++i)
34259 {
34260 rtx elem = XVECEXP (pat, 0, i);
34261 if (GET_CODE (elem) != UNSPEC
34262 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34263 res [j++] = elem;
34264 }
34265
34266 /* No more than 1 occurence was removed. */
34267 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34268
34269 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34270 }
34271 }
34272
34273 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34274 with rounding. */
34275 static rtx
34276 ix86_expand_sse_comi_round (const struct builtin_description *d,
34277 tree exp, rtx target)
34278 {
34279 rtx pat, set_dst;
34280 tree arg0 = CALL_EXPR_ARG (exp, 0);
34281 tree arg1 = CALL_EXPR_ARG (exp, 1);
34282 tree arg2 = CALL_EXPR_ARG (exp, 2);
34283 tree arg3 = CALL_EXPR_ARG (exp, 3);
34284 rtx op0 = expand_normal (arg0);
34285 rtx op1 = expand_normal (arg1);
34286 rtx op2 = expand_normal (arg2);
34287 rtx op3 = expand_normal (arg3);
34288 enum insn_code icode = d->icode;
34289 const struct insn_data_d *insn_p = &insn_data[icode];
34290 enum machine_mode mode0 = insn_p->operand[0].mode;
34291 enum machine_mode mode1 = insn_p->operand[1].mode;
34292 enum rtx_code comparison = UNEQ;
34293 bool need_ucomi = false;
34294
34295 /* See avxintrin.h for values. */
34296 enum rtx_code comi_comparisons[32] =
34297 {
34298 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34299 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34300 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34301 };
34302 bool need_ucomi_values[32] =
34303 {
34304 true, false, false, true, true, false, false, true,
34305 true, false, false, true, true, false, false, true,
34306 false, true, true, false, false, true, true, false,
34307 false, true, true, false, false, true, true, false
34308 };
34309
34310 if (!CONST_INT_P (op2))
34311 {
34312 error ("the third argument must be comparison constant");
34313 return const0_rtx;
34314 }
34315 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34316 {
34317 error ("incorect comparison mode");
34318 return const0_rtx;
34319 }
34320
34321 if (!insn_p->operand[2].predicate (op3, SImode))
34322 {
34323 error ("incorrect rounding operand");
34324 return const0_rtx;
34325 }
34326
34327 comparison = comi_comparisons[INTVAL (op2)];
34328 need_ucomi = need_ucomi_values[INTVAL (op2)];
34329
34330 if (VECTOR_MODE_P (mode0))
34331 op0 = safe_vector_operand (op0, mode0);
34332 if (VECTOR_MODE_P (mode1))
34333 op1 = safe_vector_operand (op1, mode1);
34334
34335 target = gen_reg_rtx (SImode);
34336 emit_move_insn (target, const0_rtx);
34337 target = gen_rtx_SUBREG (QImode, target, 0);
34338
34339 if ((optimize && !register_operand (op0, mode0))
34340 || !insn_p->operand[0].predicate (op0, mode0))
34341 op0 = copy_to_mode_reg (mode0, op0);
34342 if ((optimize && !register_operand (op1, mode1))
34343 || !insn_p->operand[1].predicate (op1, mode1))
34344 op1 = copy_to_mode_reg (mode1, op1);
34345
34346 if (need_ucomi)
34347 icode = icode == CODE_FOR_sse_comi_round
34348 ? CODE_FOR_sse_ucomi_round
34349 : CODE_FOR_sse2_ucomi_round;
34350
34351 pat = GEN_FCN (icode) (op0, op1, op3);
34352 if (! pat)
34353 return 0;
34354
34355 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34356 if (INTVAL (op3) == NO_ROUND)
34357 {
34358 pat = ix86_erase_embedded_rounding (pat);
34359 if (! pat)
34360 return 0;
34361
34362 set_dst = SET_DEST (pat);
34363 }
34364 else
34365 {
34366 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34367 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34368 }
34369
34370 emit_insn (pat);
34371 emit_insn (gen_rtx_SET (VOIDmode,
34372 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34373 gen_rtx_fmt_ee (comparison, QImode,
34374 set_dst,
34375 const0_rtx)));
34376
34377 return SUBREG_REG (target);
34378 }
34379
34380 static rtx
34381 ix86_expand_round_builtin (const struct builtin_description *d,
34382 tree exp, rtx target)
34383 {
34384 rtx pat;
34385 unsigned int i, nargs;
34386 struct
34387 {
34388 rtx op;
34389 enum machine_mode mode;
34390 } args[6];
34391 enum insn_code icode = d->icode;
34392 const struct insn_data_d *insn_p = &insn_data[icode];
34393 enum machine_mode tmode = insn_p->operand[0].mode;
34394 unsigned int nargs_constant = 0;
34395 unsigned int redundant_embed_rnd = 0;
34396
34397 switch ((enum ix86_builtin_func_type) d->flag)
34398 {
34399 case UINT64_FTYPE_V2DF_INT:
34400 case UINT64_FTYPE_V4SF_INT:
34401 case UINT_FTYPE_V2DF_INT:
34402 case UINT_FTYPE_V4SF_INT:
34403 case INT64_FTYPE_V2DF_INT:
34404 case INT64_FTYPE_V4SF_INT:
34405 case INT_FTYPE_V2DF_INT:
34406 case INT_FTYPE_V4SF_INT:
34407 nargs = 2;
34408 break;
34409 case V4SF_FTYPE_V4SF_UINT_INT:
34410 case V4SF_FTYPE_V4SF_UINT64_INT:
34411 case V2DF_FTYPE_V2DF_UINT64_INT:
34412 case V4SF_FTYPE_V4SF_INT_INT:
34413 case V4SF_FTYPE_V4SF_INT64_INT:
34414 case V2DF_FTYPE_V2DF_INT64_INT:
34415 case V4SF_FTYPE_V4SF_V4SF_INT:
34416 case V2DF_FTYPE_V2DF_V2DF_INT:
34417 case V4SF_FTYPE_V4SF_V2DF_INT:
34418 case V2DF_FTYPE_V2DF_V4SF_INT:
34419 nargs = 3;
34420 break;
34421 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34422 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34423 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34424 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34425 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34426 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34427 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34428 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34429 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34430 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34431 nargs = 4;
34432 break;
34433 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34434 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34435 nargs_constant = 2;
34436 nargs = 4;
34437 break;
34438 case INT_FTYPE_V4SF_V4SF_INT_INT:
34439 case INT_FTYPE_V2DF_V2DF_INT_INT:
34440 return ix86_expand_sse_comi_round (d, exp, target);
34441 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34442 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34443 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34444 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34445 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34446 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34447 nargs = 5;
34448 break;
34449 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34450 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34451 nargs_constant = 4;
34452 nargs = 5;
34453 break;
34454 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34455 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34456 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34457 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34458 nargs_constant = 3;
34459 nargs = 5;
34460 break;
34461 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34462 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34463 nargs = 6;
34464 nargs_constant = 4;
34465 break;
34466 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34467 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34468 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34469 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34470 nargs = 6;
34471 nargs_constant = 3;
34472 break;
34473 default:
34474 gcc_unreachable ();
34475 }
34476 gcc_assert (nargs <= ARRAY_SIZE (args));
34477
34478 if (optimize
34479 || target == 0
34480 || GET_MODE (target) != tmode
34481 || !insn_p->operand[0].predicate (target, tmode))
34482 target = gen_reg_rtx (tmode);
34483
34484 for (i = 0; i < nargs; i++)
34485 {
34486 tree arg = CALL_EXPR_ARG (exp, i);
34487 rtx op = expand_normal (arg);
34488 enum machine_mode mode = insn_p->operand[i + 1].mode;
34489 bool match = insn_p->operand[i + 1].predicate (op, mode);
34490
34491 if (i == nargs - nargs_constant)
34492 {
34493 if (!match)
34494 {
34495 switch (icode)
34496 {
34497 case CODE_FOR_avx512f_getmantv8df_mask_round:
34498 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34499 case CODE_FOR_avx512f_vgetmantv2df_round:
34500 case CODE_FOR_avx512f_vgetmantv4sf_round:
34501 error ("the immediate argument must be a 4-bit immediate");
34502 return const0_rtx;
34503 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34504 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34505 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34506 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34507 error ("the immediate argument must be a 5-bit immediate");
34508 return const0_rtx;
34509 default:
34510 error ("the immediate argument must be an 8-bit immediate");
34511 return const0_rtx;
34512 }
34513 }
34514 }
34515 else if (i == nargs-1)
34516 {
34517 if (!insn_p->operand[nargs].predicate (op, SImode))
34518 {
34519 error ("incorrect rounding operand");
34520 return const0_rtx;
34521 }
34522
34523 /* If there is no rounding use normal version of the pattern. */
34524 if (INTVAL (op) == NO_ROUND)
34525 redundant_embed_rnd = 1;
34526 }
34527 else
34528 {
34529 if (VECTOR_MODE_P (mode))
34530 op = safe_vector_operand (op, mode);
34531
34532 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34533 {
34534 if (optimize || !match)
34535 op = copy_to_mode_reg (mode, op);
34536 }
34537 else
34538 {
34539 op = copy_to_reg (op);
34540 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34541 }
34542 }
34543
34544 args[i].op = op;
34545 args[i].mode = mode;
34546 }
34547
34548 switch (nargs)
34549 {
34550 case 1:
34551 pat = GEN_FCN (icode) (target, args[0].op);
34552 break;
34553 case 2:
34554 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34555 break;
34556 case 3:
34557 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34558 args[2].op);
34559 break;
34560 case 4:
34561 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34562 args[2].op, args[3].op);
34563 break;
34564 case 5:
34565 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34566 args[2].op, args[3].op, args[4].op);
34567 case 6:
34568 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34569 args[2].op, args[3].op, args[4].op,
34570 args[5].op);
34571 break;
34572 default:
34573 gcc_unreachable ();
34574 }
34575
34576 if (!pat)
34577 return 0;
34578
34579 if (redundant_embed_rnd)
34580 pat = ix86_erase_embedded_rounding (pat);
34581
34582 emit_insn (pat);
34583 return target;
34584 }
34585
34586 /* Subroutine of ix86_expand_builtin to take care of special insns
34587 with variable number of operands. */
34588
34589 static rtx
34590 ix86_expand_special_args_builtin (const struct builtin_description *d,
34591 tree exp, rtx target)
34592 {
34593 tree arg;
34594 rtx pat, op;
34595 unsigned int i, nargs, arg_adjust, memory;
34596 bool aligned_mem = false;
34597 struct
34598 {
34599 rtx op;
34600 enum machine_mode mode;
34601 } args[3];
34602 enum insn_code icode = d->icode;
34603 bool last_arg_constant = false;
34604 const struct insn_data_d *insn_p = &insn_data[icode];
34605 enum machine_mode tmode = insn_p->operand[0].mode;
34606 enum { load, store } klass;
34607
34608 switch ((enum ix86_builtin_func_type) d->flag)
34609 {
34610 case VOID_FTYPE_VOID:
34611 emit_insn (GEN_FCN (icode) (target));
34612 return 0;
34613 case VOID_FTYPE_UINT64:
34614 case VOID_FTYPE_UNSIGNED:
34615 nargs = 0;
34616 klass = store;
34617 memory = 0;
34618 break;
34619
34620 case INT_FTYPE_VOID:
34621 case USHORT_FTYPE_VOID:
34622 case UINT64_FTYPE_VOID:
34623 case UNSIGNED_FTYPE_VOID:
34624 nargs = 0;
34625 klass = load;
34626 memory = 0;
34627 break;
34628 case UINT64_FTYPE_PUNSIGNED:
34629 case V2DI_FTYPE_PV2DI:
34630 case V4DI_FTYPE_PV4DI:
34631 case V32QI_FTYPE_PCCHAR:
34632 case V16QI_FTYPE_PCCHAR:
34633 case V8SF_FTYPE_PCV4SF:
34634 case V8SF_FTYPE_PCFLOAT:
34635 case V4SF_FTYPE_PCFLOAT:
34636 case V4DF_FTYPE_PCV2DF:
34637 case V4DF_FTYPE_PCDOUBLE:
34638 case V2DF_FTYPE_PCDOUBLE:
34639 case VOID_FTYPE_PVOID:
34640 case V16SI_FTYPE_PV4SI:
34641 case V16SF_FTYPE_PV4SF:
34642 case V8DI_FTYPE_PV4DI:
34643 case V8DI_FTYPE_PV8DI:
34644 case V8DF_FTYPE_PV4DF:
34645 nargs = 1;
34646 klass = load;
34647 memory = 0;
34648 switch (icode)
34649 {
34650 case CODE_FOR_sse4_1_movntdqa:
34651 case CODE_FOR_avx2_movntdqa:
34652 case CODE_FOR_avx512f_movntdqa:
34653 aligned_mem = true;
34654 break;
34655 default:
34656 break;
34657 }
34658 break;
34659 case VOID_FTYPE_PV2SF_V4SF:
34660 case VOID_FTYPE_PV8DI_V8DI:
34661 case VOID_FTYPE_PV4DI_V4DI:
34662 case VOID_FTYPE_PV2DI_V2DI:
34663 case VOID_FTYPE_PCHAR_V32QI:
34664 case VOID_FTYPE_PCHAR_V16QI:
34665 case VOID_FTYPE_PFLOAT_V16SF:
34666 case VOID_FTYPE_PFLOAT_V8SF:
34667 case VOID_FTYPE_PFLOAT_V4SF:
34668 case VOID_FTYPE_PDOUBLE_V8DF:
34669 case VOID_FTYPE_PDOUBLE_V4DF:
34670 case VOID_FTYPE_PDOUBLE_V2DF:
34671 case VOID_FTYPE_PLONGLONG_LONGLONG:
34672 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34673 case VOID_FTYPE_PINT_INT:
34674 nargs = 1;
34675 klass = store;
34676 /* Reserve memory operand for target. */
34677 memory = ARRAY_SIZE (args);
34678 switch (icode)
34679 {
34680 /* These builtins and instructions require the memory
34681 to be properly aligned. */
34682 case CODE_FOR_avx_movntv4di:
34683 case CODE_FOR_sse2_movntv2di:
34684 case CODE_FOR_avx_movntv8sf:
34685 case CODE_FOR_sse_movntv4sf:
34686 case CODE_FOR_sse4a_vmmovntv4sf:
34687 case CODE_FOR_avx_movntv4df:
34688 case CODE_FOR_sse2_movntv2df:
34689 case CODE_FOR_sse4a_vmmovntv2df:
34690 case CODE_FOR_sse2_movntidi:
34691 case CODE_FOR_sse_movntq:
34692 case CODE_FOR_sse2_movntisi:
34693 case CODE_FOR_avx512f_movntv16sf:
34694 case CODE_FOR_avx512f_movntv8df:
34695 case CODE_FOR_avx512f_movntv8di:
34696 aligned_mem = true;
34697 break;
34698 default:
34699 break;
34700 }
34701 break;
34702 case V4SF_FTYPE_V4SF_PCV2SF:
34703 case V2DF_FTYPE_V2DF_PCDOUBLE:
34704 nargs = 2;
34705 klass = load;
34706 memory = 1;
34707 break;
34708 case V8SF_FTYPE_PCV8SF_V8SI:
34709 case V4DF_FTYPE_PCV4DF_V4DI:
34710 case V4SF_FTYPE_PCV4SF_V4SI:
34711 case V2DF_FTYPE_PCV2DF_V2DI:
34712 case V8SI_FTYPE_PCV8SI_V8SI:
34713 case V4DI_FTYPE_PCV4DI_V4DI:
34714 case V4SI_FTYPE_PCV4SI_V4SI:
34715 case V2DI_FTYPE_PCV2DI_V2DI:
34716 nargs = 2;
34717 klass = load;
34718 memory = 0;
34719 break;
34720 case VOID_FTYPE_PV8DF_V8DF_QI:
34721 case VOID_FTYPE_PV16SF_V16SF_HI:
34722 case VOID_FTYPE_PV8DI_V8DI_QI:
34723 case VOID_FTYPE_PV16SI_V16SI_HI:
34724 switch (icode)
34725 {
34726 /* These builtins and instructions require the memory
34727 to be properly aligned. */
34728 case CODE_FOR_avx512f_storev16sf_mask:
34729 case CODE_FOR_avx512f_storev16si_mask:
34730 case CODE_FOR_avx512f_storev8df_mask:
34731 case CODE_FOR_avx512f_storev8di_mask:
34732 case CODE_FOR_avx512vl_storev8sf_mask:
34733 case CODE_FOR_avx512vl_storev8si_mask:
34734 case CODE_FOR_avx512vl_storev4df_mask:
34735 case CODE_FOR_avx512vl_storev4di_mask:
34736 case CODE_FOR_avx512vl_storev4sf_mask:
34737 case CODE_FOR_avx512vl_storev4si_mask:
34738 case CODE_FOR_avx512vl_storev2df_mask:
34739 case CODE_FOR_avx512vl_storev2di_mask:
34740 aligned_mem = true;
34741 break;
34742 default:
34743 break;
34744 }
34745 /* FALLTHRU */
34746 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34747 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34748 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34749 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34750 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34751 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34752 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34753 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34754 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34755 case VOID_FTYPE_PFLOAT_V4SF_QI:
34756 case VOID_FTYPE_PV8SI_V8DI_QI:
34757 case VOID_FTYPE_PV8HI_V8DI_QI:
34758 case VOID_FTYPE_PV16HI_V16SI_HI:
34759 case VOID_FTYPE_PV16QI_V8DI_QI:
34760 case VOID_FTYPE_PV16QI_V16SI_HI:
34761 nargs = 2;
34762 klass = store;
34763 /* Reserve memory operand for target. */
34764 memory = ARRAY_SIZE (args);
34765 break;
34766 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34767 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34768 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34769 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34770 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34771 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34772 nargs = 3;
34773 klass = load;
34774 memory = 0;
34775 switch (icode)
34776 {
34777 /* These builtins and instructions require the memory
34778 to be properly aligned. */
34779 case CODE_FOR_avx512f_loadv16sf_mask:
34780 case CODE_FOR_avx512f_loadv16si_mask:
34781 case CODE_FOR_avx512f_loadv8df_mask:
34782 case CODE_FOR_avx512f_loadv8di_mask:
34783 case CODE_FOR_avx512vl_loadv8sf_mask:
34784 case CODE_FOR_avx512vl_loadv8si_mask:
34785 case CODE_FOR_avx512vl_loadv4df_mask:
34786 case CODE_FOR_avx512vl_loadv4di_mask:
34787 case CODE_FOR_avx512vl_loadv4sf_mask:
34788 case CODE_FOR_avx512vl_loadv4si_mask:
34789 case CODE_FOR_avx512vl_loadv2df_mask:
34790 case CODE_FOR_avx512vl_loadv2di_mask:
34791 case CODE_FOR_avx512bw_loadv64qi_mask:
34792 case CODE_FOR_avx512vl_loadv32qi_mask:
34793 case CODE_FOR_avx512vl_loadv16qi_mask:
34794 case CODE_FOR_avx512bw_loadv32hi_mask:
34795 case CODE_FOR_avx512vl_loadv16hi_mask:
34796 case CODE_FOR_avx512vl_loadv8hi_mask:
34797 aligned_mem = true;
34798 break;
34799 default:
34800 break;
34801 }
34802 break;
34803 case VOID_FTYPE_UINT_UINT_UINT:
34804 case VOID_FTYPE_UINT64_UINT_UINT:
34805 case UCHAR_FTYPE_UINT_UINT_UINT:
34806 case UCHAR_FTYPE_UINT64_UINT_UINT:
34807 nargs = 3;
34808 klass = load;
34809 memory = ARRAY_SIZE (args);
34810 last_arg_constant = true;
34811 break;
34812 default:
34813 gcc_unreachable ();
34814 }
34815
34816 gcc_assert (nargs <= ARRAY_SIZE (args));
34817
34818 if (klass == store)
34819 {
34820 arg = CALL_EXPR_ARG (exp, 0);
34821 op = expand_normal (arg);
34822 gcc_assert (target == 0);
34823 if (memory)
34824 {
34825 op = ix86_zero_extend_to_Pmode (op);
34826 target = gen_rtx_MEM (tmode, op);
34827 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34828 on it. Try to improve it using get_pointer_alignment,
34829 and if the special builtin is one that requires strict
34830 mode alignment, also from it's GET_MODE_ALIGNMENT.
34831 Failure to do so could lead to ix86_legitimate_combined_insn
34832 rejecting all changes to such insns. */
34833 unsigned int align = get_pointer_alignment (arg);
34834 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34835 align = GET_MODE_ALIGNMENT (tmode);
34836 if (MEM_ALIGN (target) < align)
34837 set_mem_align (target, align);
34838 }
34839 else
34840 target = force_reg (tmode, op);
34841 arg_adjust = 1;
34842 }
34843 else
34844 {
34845 arg_adjust = 0;
34846 if (optimize
34847 || target == 0
34848 || !register_operand (target, tmode)
34849 || GET_MODE (target) != tmode)
34850 target = gen_reg_rtx (tmode);
34851 }
34852
34853 for (i = 0; i < nargs; i++)
34854 {
34855 enum machine_mode mode = insn_p->operand[i + 1].mode;
34856 bool match;
34857
34858 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34859 op = expand_normal (arg);
34860 match = insn_p->operand[i + 1].predicate (op, mode);
34861
34862 if (last_arg_constant && (i + 1) == nargs)
34863 {
34864 if (!match)
34865 {
34866 if (icode == CODE_FOR_lwp_lwpvalsi3
34867 || icode == CODE_FOR_lwp_lwpinssi3
34868 || icode == CODE_FOR_lwp_lwpvaldi3
34869 || icode == CODE_FOR_lwp_lwpinsdi3)
34870 error ("the last argument must be a 32-bit immediate");
34871 else
34872 error ("the last argument must be an 8-bit immediate");
34873 return const0_rtx;
34874 }
34875 }
34876 else
34877 {
34878 if (i == memory)
34879 {
34880 /* This must be the memory operand. */
34881 op = ix86_zero_extend_to_Pmode (op);
34882 op = gen_rtx_MEM (mode, op);
34883 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34884 on it. Try to improve it using get_pointer_alignment,
34885 and if the special builtin is one that requires strict
34886 mode alignment, also from it's GET_MODE_ALIGNMENT.
34887 Failure to do so could lead to ix86_legitimate_combined_insn
34888 rejecting all changes to such insns. */
34889 unsigned int align = get_pointer_alignment (arg);
34890 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34891 align = GET_MODE_ALIGNMENT (mode);
34892 if (MEM_ALIGN (op) < align)
34893 set_mem_align (op, align);
34894 }
34895 else
34896 {
34897 /* This must be register. */
34898 if (VECTOR_MODE_P (mode))
34899 op = safe_vector_operand (op, mode);
34900
34901 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34902 op = copy_to_mode_reg (mode, op);
34903 else
34904 {
34905 op = copy_to_reg (op);
34906 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34907 }
34908 }
34909 }
34910
34911 args[i].op = op;
34912 args[i].mode = mode;
34913 }
34914
34915 switch (nargs)
34916 {
34917 case 0:
34918 pat = GEN_FCN (icode) (target);
34919 break;
34920 case 1:
34921 pat = GEN_FCN (icode) (target, args[0].op);
34922 break;
34923 case 2:
34924 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34925 break;
34926 case 3:
34927 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34928 break;
34929 default:
34930 gcc_unreachable ();
34931 }
34932
34933 if (! pat)
34934 return 0;
34935 emit_insn (pat);
34936 return klass == store ? 0 : target;
34937 }
34938
34939 /* Return the integer constant in ARG. Constrain it to be in the range
34940 of the subparts of VEC_TYPE; issue an error if not. */
34941
34942 static int
34943 get_element_number (tree vec_type, tree arg)
34944 {
34945 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34946
34947 if (!tree_fits_uhwi_p (arg)
34948 || (elt = tree_to_uhwi (arg), elt > max))
34949 {
34950 error ("selector must be an integer constant in the range 0..%wi", max);
34951 return 0;
34952 }
34953
34954 return elt;
34955 }
34956
34957 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34958 ix86_expand_vector_init. We DO have language-level syntax for this, in
34959 the form of (type){ init-list }. Except that since we can't place emms
34960 instructions from inside the compiler, we can't allow the use of MMX
34961 registers unless the user explicitly asks for it. So we do *not* define
34962 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34963 we have builtins invoked by mmintrin.h that gives us license to emit
34964 these sorts of instructions. */
34965
34966 static rtx
34967 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34968 {
34969 enum machine_mode tmode = TYPE_MODE (type);
34970 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34971 int i, n_elt = GET_MODE_NUNITS (tmode);
34972 rtvec v = rtvec_alloc (n_elt);
34973
34974 gcc_assert (VECTOR_MODE_P (tmode));
34975 gcc_assert (call_expr_nargs (exp) == n_elt);
34976
34977 for (i = 0; i < n_elt; ++i)
34978 {
34979 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34980 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34981 }
34982
34983 if (!target || !register_operand (target, tmode))
34984 target = gen_reg_rtx (tmode);
34985
34986 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34987 return target;
34988 }
34989
34990 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34991 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34992 had a language-level syntax for referencing vector elements. */
34993
34994 static rtx
34995 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34996 {
34997 enum machine_mode tmode, mode0;
34998 tree arg0, arg1;
34999 int elt;
35000 rtx op0;
35001
35002 arg0 = CALL_EXPR_ARG (exp, 0);
35003 arg1 = CALL_EXPR_ARG (exp, 1);
35004
35005 op0 = expand_normal (arg0);
35006 elt = get_element_number (TREE_TYPE (arg0), arg1);
35007
35008 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35009 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35010 gcc_assert (VECTOR_MODE_P (mode0));
35011
35012 op0 = force_reg (mode0, op0);
35013
35014 if (optimize || !target || !register_operand (target, tmode))
35015 target = gen_reg_rtx (tmode);
35016
35017 ix86_expand_vector_extract (true, target, op0, elt);
35018
35019 return target;
35020 }
35021
35022 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35023 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35024 a language-level syntax for referencing vector elements. */
35025
35026 static rtx
35027 ix86_expand_vec_set_builtin (tree exp)
35028 {
35029 enum machine_mode tmode, mode1;
35030 tree arg0, arg1, arg2;
35031 int elt;
35032 rtx op0, op1, target;
35033
35034 arg0 = CALL_EXPR_ARG (exp, 0);
35035 arg1 = CALL_EXPR_ARG (exp, 1);
35036 arg2 = CALL_EXPR_ARG (exp, 2);
35037
35038 tmode = TYPE_MODE (TREE_TYPE (arg0));
35039 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35040 gcc_assert (VECTOR_MODE_P (tmode));
35041
35042 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35043 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35044 elt = get_element_number (TREE_TYPE (arg0), arg2);
35045
35046 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35047 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35048
35049 op0 = force_reg (tmode, op0);
35050 op1 = force_reg (mode1, op1);
35051
35052 /* OP0 is the source of these builtin functions and shouldn't be
35053 modified. Create a copy, use it and return it as target. */
35054 target = gen_reg_rtx (tmode);
35055 emit_move_insn (target, op0);
35056 ix86_expand_vector_set (true, target, op1, elt);
35057
35058 return target;
35059 }
35060
35061 /* Expand an expression EXP that calls a built-in function,
35062 with result going to TARGET if that's convenient
35063 (and in mode MODE if that's convenient).
35064 SUBTARGET may be used as the target for computing one of EXP's operands.
35065 IGNORE is nonzero if the value is to be ignored. */
35066
35067 static rtx
35068 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35069 enum machine_mode mode, int ignore)
35070 {
35071 const struct builtin_description *d;
35072 size_t i;
35073 enum insn_code icode;
35074 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35075 tree arg0, arg1, arg2, arg3, arg4;
35076 rtx op0, op1, op2, op3, op4, pat, insn;
35077 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35078 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35079
35080 /* For CPU builtins that can be folded, fold first and expand the fold. */
35081 switch (fcode)
35082 {
35083 case IX86_BUILTIN_CPU_INIT:
35084 {
35085 /* Make it call __cpu_indicator_init in libgcc. */
35086 tree call_expr, fndecl, type;
35087 type = build_function_type_list (integer_type_node, NULL_TREE);
35088 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35089 call_expr = build_call_expr (fndecl, 0);
35090 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35091 }
35092 case IX86_BUILTIN_CPU_IS:
35093 case IX86_BUILTIN_CPU_SUPPORTS:
35094 {
35095 tree arg0 = CALL_EXPR_ARG (exp, 0);
35096 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35097 gcc_assert (fold_expr != NULL_TREE);
35098 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35099 }
35100 }
35101
35102 /* Determine whether the builtin function is available under the current ISA.
35103 Originally the builtin was not created if it wasn't applicable to the
35104 current ISA based on the command line switches. With function specific
35105 options, we need to check in the context of the function making the call
35106 whether it is supported. */
35107 if (ix86_builtins_isa[fcode].isa
35108 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35109 {
35110 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35111 NULL, (enum fpmath_unit) 0, false);
35112
35113 if (!opts)
35114 error ("%qE needs unknown isa option", fndecl);
35115 else
35116 {
35117 gcc_assert (opts != NULL);
35118 error ("%qE needs isa option %s", fndecl, opts);
35119 free (opts);
35120 }
35121 return const0_rtx;
35122 }
35123
35124 switch (fcode)
35125 {
35126 case IX86_BUILTIN_MASKMOVQ:
35127 case IX86_BUILTIN_MASKMOVDQU:
35128 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35129 ? CODE_FOR_mmx_maskmovq
35130 : CODE_FOR_sse2_maskmovdqu);
35131 /* Note the arg order is different from the operand order. */
35132 arg1 = CALL_EXPR_ARG (exp, 0);
35133 arg2 = CALL_EXPR_ARG (exp, 1);
35134 arg0 = CALL_EXPR_ARG (exp, 2);
35135 op0 = expand_normal (arg0);
35136 op1 = expand_normal (arg1);
35137 op2 = expand_normal (arg2);
35138 mode0 = insn_data[icode].operand[0].mode;
35139 mode1 = insn_data[icode].operand[1].mode;
35140 mode2 = insn_data[icode].operand[2].mode;
35141
35142 op0 = ix86_zero_extend_to_Pmode (op0);
35143 op0 = gen_rtx_MEM (mode1, op0);
35144
35145 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35146 op0 = copy_to_mode_reg (mode0, op0);
35147 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35148 op1 = copy_to_mode_reg (mode1, op1);
35149 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35150 op2 = copy_to_mode_reg (mode2, op2);
35151 pat = GEN_FCN (icode) (op0, op1, op2);
35152 if (! pat)
35153 return 0;
35154 emit_insn (pat);
35155 return 0;
35156
35157 case IX86_BUILTIN_LDMXCSR:
35158 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35159 target = assign_386_stack_local (SImode, SLOT_TEMP);
35160 emit_move_insn (target, op0);
35161 emit_insn (gen_sse_ldmxcsr (target));
35162 return 0;
35163
35164 case IX86_BUILTIN_STMXCSR:
35165 target = assign_386_stack_local (SImode, SLOT_TEMP);
35166 emit_insn (gen_sse_stmxcsr (target));
35167 return copy_to_mode_reg (SImode, target);
35168
35169 case IX86_BUILTIN_CLFLUSH:
35170 arg0 = CALL_EXPR_ARG (exp, 0);
35171 op0 = expand_normal (arg0);
35172 icode = CODE_FOR_sse2_clflush;
35173 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35174 op0 = ix86_zero_extend_to_Pmode (op0);
35175
35176 emit_insn (gen_sse2_clflush (op0));
35177 return 0;
35178
35179 case IX86_BUILTIN_CLFLUSHOPT:
35180 arg0 = CALL_EXPR_ARG (exp, 0);
35181 op0 = expand_normal (arg0);
35182 icode = CODE_FOR_clflushopt;
35183 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35184 op0 = ix86_zero_extend_to_Pmode (op0);
35185
35186 emit_insn (gen_clflushopt (op0));
35187 return 0;
35188
35189 case IX86_BUILTIN_MONITOR:
35190 arg0 = CALL_EXPR_ARG (exp, 0);
35191 arg1 = CALL_EXPR_ARG (exp, 1);
35192 arg2 = CALL_EXPR_ARG (exp, 2);
35193 op0 = expand_normal (arg0);
35194 op1 = expand_normal (arg1);
35195 op2 = expand_normal (arg2);
35196 if (!REG_P (op0))
35197 op0 = ix86_zero_extend_to_Pmode (op0);
35198 if (!REG_P (op1))
35199 op1 = copy_to_mode_reg (SImode, op1);
35200 if (!REG_P (op2))
35201 op2 = copy_to_mode_reg (SImode, op2);
35202 emit_insn (ix86_gen_monitor (op0, op1, op2));
35203 return 0;
35204
35205 case IX86_BUILTIN_MWAIT:
35206 arg0 = CALL_EXPR_ARG (exp, 0);
35207 arg1 = CALL_EXPR_ARG (exp, 1);
35208 op0 = expand_normal (arg0);
35209 op1 = expand_normal (arg1);
35210 if (!REG_P (op0))
35211 op0 = copy_to_mode_reg (SImode, op0);
35212 if (!REG_P (op1))
35213 op1 = copy_to_mode_reg (SImode, op1);
35214 emit_insn (gen_sse3_mwait (op0, op1));
35215 return 0;
35216
35217 case IX86_BUILTIN_VEC_INIT_V2SI:
35218 case IX86_BUILTIN_VEC_INIT_V4HI:
35219 case IX86_BUILTIN_VEC_INIT_V8QI:
35220 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35221
35222 case IX86_BUILTIN_VEC_EXT_V2DF:
35223 case IX86_BUILTIN_VEC_EXT_V2DI:
35224 case IX86_BUILTIN_VEC_EXT_V4SF:
35225 case IX86_BUILTIN_VEC_EXT_V4SI:
35226 case IX86_BUILTIN_VEC_EXT_V8HI:
35227 case IX86_BUILTIN_VEC_EXT_V2SI:
35228 case IX86_BUILTIN_VEC_EXT_V4HI:
35229 case IX86_BUILTIN_VEC_EXT_V16QI:
35230 return ix86_expand_vec_ext_builtin (exp, target);
35231
35232 case IX86_BUILTIN_VEC_SET_V2DI:
35233 case IX86_BUILTIN_VEC_SET_V4SF:
35234 case IX86_BUILTIN_VEC_SET_V4SI:
35235 case IX86_BUILTIN_VEC_SET_V8HI:
35236 case IX86_BUILTIN_VEC_SET_V4HI:
35237 case IX86_BUILTIN_VEC_SET_V16QI:
35238 return ix86_expand_vec_set_builtin (exp);
35239
35240 case IX86_BUILTIN_INFQ:
35241 case IX86_BUILTIN_HUGE_VALQ:
35242 {
35243 REAL_VALUE_TYPE inf;
35244 rtx tmp;
35245
35246 real_inf (&inf);
35247 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35248
35249 tmp = validize_mem (force_const_mem (mode, tmp));
35250
35251 if (target == 0)
35252 target = gen_reg_rtx (mode);
35253
35254 emit_move_insn (target, tmp);
35255 return target;
35256 }
35257
35258 case IX86_BUILTIN_RDPMC:
35259 case IX86_BUILTIN_RDTSC:
35260 case IX86_BUILTIN_RDTSCP:
35261
35262 op0 = gen_reg_rtx (DImode);
35263 op1 = gen_reg_rtx (DImode);
35264
35265 if (fcode == IX86_BUILTIN_RDPMC)
35266 {
35267 arg0 = CALL_EXPR_ARG (exp, 0);
35268 op2 = expand_normal (arg0);
35269 if (!register_operand (op2, SImode))
35270 op2 = copy_to_mode_reg (SImode, op2);
35271
35272 insn = (TARGET_64BIT
35273 ? gen_rdpmc_rex64 (op0, op1, op2)
35274 : gen_rdpmc (op0, op2));
35275 emit_insn (insn);
35276 }
35277 else if (fcode == IX86_BUILTIN_RDTSC)
35278 {
35279 insn = (TARGET_64BIT
35280 ? gen_rdtsc_rex64 (op0, op1)
35281 : gen_rdtsc (op0));
35282 emit_insn (insn);
35283 }
35284 else
35285 {
35286 op2 = gen_reg_rtx (SImode);
35287
35288 insn = (TARGET_64BIT
35289 ? gen_rdtscp_rex64 (op0, op1, op2)
35290 : gen_rdtscp (op0, op2));
35291 emit_insn (insn);
35292
35293 arg0 = CALL_EXPR_ARG (exp, 0);
35294 op4 = expand_normal (arg0);
35295 if (!address_operand (op4, VOIDmode))
35296 {
35297 op4 = convert_memory_address (Pmode, op4);
35298 op4 = copy_addr_to_reg (op4);
35299 }
35300 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35301 }
35302
35303 if (target == 0)
35304 {
35305 /* mode is VOIDmode if __builtin_rd* has been called
35306 without lhs. */
35307 if (mode == VOIDmode)
35308 return target;
35309 target = gen_reg_rtx (mode);
35310 }
35311
35312 if (TARGET_64BIT)
35313 {
35314 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35315 op1, 1, OPTAB_DIRECT);
35316 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35317 op0, 1, OPTAB_DIRECT);
35318 }
35319
35320 emit_move_insn (target, op0);
35321 return target;
35322
35323 case IX86_BUILTIN_FXSAVE:
35324 case IX86_BUILTIN_FXRSTOR:
35325 case IX86_BUILTIN_FXSAVE64:
35326 case IX86_BUILTIN_FXRSTOR64:
35327 case IX86_BUILTIN_FNSTENV:
35328 case IX86_BUILTIN_FLDENV:
35329 mode0 = BLKmode;
35330 switch (fcode)
35331 {
35332 case IX86_BUILTIN_FXSAVE:
35333 icode = CODE_FOR_fxsave;
35334 break;
35335 case IX86_BUILTIN_FXRSTOR:
35336 icode = CODE_FOR_fxrstor;
35337 break;
35338 case IX86_BUILTIN_FXSAVE64:
35339 icode = CODE_FOR_fxsave64;
35340 break;
35341 case IX86_BUILTIN_FXRSTOR64:
35342 icode = CODE_FOR_fxrstor64;
35343 break;
35344 case IX86_BUILTIN_FNSTENV:
35345 icode = CODE_FOR_fnstenv;
35346 break;
35347 case IX86_BUILTIN_FLDENV:
35348 icode = CODE_FOR_fldenv;
35349 break;
35350 default:
35351 gcc_unreachable ();
35352 }
35353
35354 arg0 = CALL_EXPR_ARG (exp, 0);
35355 op0 = expand_normal (arg0);
35356
35357 if (!address_operand (op0, VOIDmode))
35358 {
35359 op0 = convert_memory_address (Pmode, op0);
35360 op0 = copy_addr_to_reg (op0);
35361 }
35362 op0 = gen_rtx_MEM (mode0, op0);
35363
35364 pat = GEN_FCN (icode) (op0);
35365 if (pat)
35366 emit_insn (pat);
35367 return 0;
35368
35369 case IX86_BUILTIN_XSAVE:
35370 case IX86_BUILTIN_XRSTOR:
35371 case IX86_BUILTIN_XSAVE64:
35372 case IX86_BUILTIN_XRSTOR64:
35373 case IX86_BUILTIN_XSAVEOPT:
35374 case IX86_BUILTIN_XSAVEOPT64:
35375 case IX86_BUILTIN_XSAVES:
35376 case IX86_BUILTIN_XRSTORS:
35377 case IX86_BUILTIN_XSAVES64:
35378 case IX86_BUILTIN_XRSTORS64:
35379 case IX86_BUILTIN_XSAVEC:
35380 case IX86_BUILTIN_XSAVEC64:
35381 arg0 = CALL_EXPR_ARG (exp, 0);
35382 arg1 = CALL_EXPR_ARG (exp, 1);
35383 op0 = expand_normal (arg0);
35384 op1 = expand_normal (arg1);
35385
35386 if (!address_operand (op0, VOIDmode))
35387 {
35388 op0 = convert_memory_address (Pmode, op0);
35389 op0 = copy_addr_to_reg (op0);
35390 }
35391 op0 = gen_rtx_MEM (BLKmode, op0);
35392
35393 op1 = force_reg (DImode, op1);
35394
35395 if (TARGET_64BIT)
35396 {
35397 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35398 NULL, 1, OPTAB_DIRECT);
35399 switch (fcode)
35400 {
35401 case IX86_BUILTIN_XSAVE:
35402 icode = CODE_FOR_xsave_rex64;
35403 break;
35404 case IX86_BUILTIN_XRSTOR:
35405 icode = CODE_FOR_xrstor_rex64;
35406 break;
35407 case IX86_BUILTIN_XSAVE64:
35408 icode = CODE_FOR_xsave64;
35409 break;
35410 case IX86_BUILTIN_XRSTOR64:
35411 icode = CODE_FOR_xrstor64;
35412 break;
35413 case IX86_BUILTIN_XSAVEOPT:
35414 icode = CODE_FOR_xsaveopt_rex64;
35415 break;
35416 case IX86_BUILTIN_XSAVEOPT64:
35417 icode = CODE_FOR_xsaveopt64;
35418 break;
35419 case IX86_BUILTIN_XSAVES:
35420 icode = CODE_FOR_xsaves_rex64;
35421 break;
35422 case IX86_BUILTIN_XRSTORS:
35423 icode = CODE_FOR_xrstors_rex64;
35424 break;
35425 case IX86_BUILTIN_XSAVES64:
35426 icode = CODE_FOR_xsaves64;
35427 break;
35428 case IX86_BUILTIN_XRSTORS64:
35429 icode = CODE_FOR_xrstors64;
35430 break;
35431 case IX86_BUILTIN_XSAVEC:
35432 icode = CODE_FOR_xsavec_rex64;
35433 break;
35434 case IX86_BUILTIN_XSAVEC64:
35435 icode = CODE_FOR_xsavec64;
35436 break;
35437 default:
35438 gcc_unreachable ();
35439 }
35440
35441 op2 = gen_lowpart (SImode, op2);
35442 op1 = gen_lowpart (SImode, op1);
35443 pat = GEN_FCN (icode) (op0, op1, op2);
35444 }
35445 else
35446 {
35447 switch (fcode)
35448 {
35449 case IX86_BUILTIN_XSAVE:
35450 icode = CODE_FOR_xsave;
35451 break;
35452 case IX86_BUILTIN_XRSTOR:
35453 icode = CODE_FOR_xrstor;
35454 break;
35455 case IX86_BUILTIN_XSAVEOPT:
35456 icode = CODE_FOR_xsaveopt;
35457 break;
35458 case IX86_BUILTIN_XSAVES:
35459 icode = CODE_FOR_xsaves;
35460 break;
35461 case IX86_BUILTIN_XRSTORS:
35462 icode = CODE_FOR_xrstors;
35463 break;
35464 case IX86_BUILTIN_XSAVEC:
35465 icode = CODE_FOR_xsavec;
35466 break;
35467 default:
35468 gcc_unreachable ();
35469 }
35470 pat = GEN_FCN (icode) (op0, op1);
35471 }
35472
35473 if (pat)
35474 emit_insn (pat);
35475 return 0;
35476
35477 case IX86_BUILTIN_LLWPCB:
35478 arg0 = CALL_EXPR_ARG (exp, 0);
35479 op0 = expand_normal (arg0);
35480 icode = CODE_FOR_lwp_llwpcb;
35481 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35482 op0 = ix86_zero_extend_to_Pmode (op0);
35483 emit_insn (gen_lwp_llwpcb (op0));
35484 return 0;
35485
35486 case IX86_BUILTIN_SLWPCB:
35487 icode = CODE_FOR_lwp_slwpcb;
35488 if (!target
35489 || !insn_data[icode].operand[0].predicate (target, Pmode))
35490 target = gen_reg_rtx (Pmode);
35491 emit_insn (gen_lwp_slwpcb (target));
35492 return target;
35493
35494 case IX86_BUILTIN_BEXTRI32:
35495 case IX86_BUILTIN_BEXTRI64:
35496 arg0 = CALL_EXPR_ARG (exp, 0);
35497 arg1 = CALL_EXPR_ARG (exp, 1);
35498 op0 = expand_normal (arg0);
35499 op1 = expand_normal (arg1);
35500 icode = (fcode == IX86_BUILTIN_BEXTRI32
35501 ? CODE_FOR_tbm_bextri_si
35502 : CODE_FOR_tbm_bextri_di);
35503 if (!CONST_INT_P (op1))
35504 {
35505 error ("last argument must be an immediate");
35506 return const0_rtx;
35507 }
35508 else
35509 {
35510 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35511 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35512 op1 = GEN_INT (length);
35513 op2 = GEN_INT (lsb_index);
35514 pat = GEN_FCN (icode) (target, op0, op1, op2);
35515 if (pat)
35516 emit_insn (pat);
35517 return target;
35518 }
35519
35520 case IX86_BUILTIN_RDRAND16_STEP:
35521 icode = CODE_FOR_rdrandhi_1;
35522 mode0 = HImode;
35523 goto rdrand_step;
35524
35525 case IX86_BUILTIN_RDRAND32_STEP:
35526 icode = CODE_FOR_rdrandsi_1;
35527 mode0 = SImode;
35528 goto rdrand_step;
35529
35530 case IX86_BUILTIN_RDRAND64_STEP:
35531 icode = CODE_FOR_rdranddi_1;
35532 mode0 = DImode;
35533
35534 rdrand_step:
35535 op0 = gen_reg_rtx (mode0);
35536 emit_insn (GEN_FCN (icode) (op0));
35537
35538 arg0 = CALL_EXPR_ARG (exp, 0);
35539 op1 = expand_normal (arg0);
35540 if (!address_operand (op1, VOIDmode))
35541 {
35542 op1 = convert_memory_address (Pmode, op1);
35543 op1 = copy_addr_to_reg (op1);
35544 }
35545 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35546
35547 op1 = gen_reg_rtx (SImode);
35548 emit_move_insn (op1, CONST1_RTX (SImode));
35549
35550 /* Emit SImode conditional move. */
35551 if (mode0 == HImode)
35552 {
35553 op2 = gen_reg_rtx (SImode);
35554 emit_insn (gen_zero_extendhisi2 (op2, op0));
35555 }
35556 else if (mode0 == SImode)
35557 op2 = op0;
35558 else
35559 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35560
35561 if (target == 0
35562 || !register_operand (target, SImode))
35563 target = gen_reg_rtx (SImode);
35564
35565 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35566 const0_rtx);
35567 emit_insn (gen_rtx_SET (VOIDmode, target,
35568 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35569 return target;
35570
35571 case IX86_BUILTIN_RDSEED16_STEP:
35572 icode = CODE_FOR_rdseedhi_1;
35573 mode0 = HImode;
35574 goto rdseed_step;
35575
35576 case IX86_BUILTIN_RDSEED32_STEP:
35577 icode = CODE_FOR_rdseedsi_1;
35578 mode0 = SImode;
35579 goto rdseed_step;
35580
35581 case IX86_BUILTIN_RDSEED64_STEP:
35582 icode = CODE_FOR_rdseeddi_1;
35583 mode0 = DImode;
35584
35585 rdseed_step:
35586 op0 = gen_reg_rtx (mode0);
35587 emit_insn (GEN_FCN (icode) (op0));
35588
35589 arg0 = CALL_EXPR_ARG (exp, 0);
35590 op1 = expand_normal (arg0);
35591 if (!address_operand (op1, VOIDmode))
35592 {
35593 op1 = convert_memory_address (Pmode, op1);
35594 op1 = copy_addr_to_reg (op1);
35595 }
35596 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35597
35598 op2 = gen_reg_rtx (QImode);
35599
35600 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35601 const0_rtx);
35602 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35603
35604 if (target == 0
35605 || !register_operand (target, SImode))
35606 target = gen_reg_rtx (SImode);
35607
35608 emit_insn (gen_zero_extendqisi2 (target, op2));
35609 return target;
35610
35611 case IX86_BUILTIN_SBB32:
35612 icode = CODE_FOR_subsi3_carry;
35613 mode0 = SImode;
35614 goto addcarryx;
35615
35616 case IX86_BUILTIN_SBB64:
35617 icode = CODE_FOR_subdi3_carry;
35618 mode0 = DImode;
35619 goto addcarryx;
35620
35621 case IX86_BUILTIN_ADDCARRYX32:
35622 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35623 mode0 = SImode;
35624 goto addcarryx;
35625
35626 case IX86_BUILTIN_ADDCARRYX64:
35627 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35628 mode0 = DImode;
35629
35630 addcarryx:
35631 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35632 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35633 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35634 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35635
35636 op0 = gen_reg_rtx (QImode);
35637
35638 /* Generate CF from input operand. */
35639 op1 = expand_normal (arg0);
35640 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35641 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35642
35643 /* Gen ADCX instruction to compute X+Y+CF. */
35644 op2 = expand_normal (arg1);
35645 op3 = expand_normal (arg2);
35646
35647 if (!REG_P (op2))
35648 op2 = copy_to_mode_reg (mode0, op2);
35649 if (!REG_P (op3))
35650 op3 = copy_to_mode_reg (mode0, op3);
35651
35652 op0 = gen_reg_rtx (mode0);
35653
35654 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35655 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35656 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35657
35658 /* Store the result. */
35659 op4 = expand_normal (arg3);
35660 if (!address_operand (op4, VOIDmode))
35661 {
35662 op4 = convert_memory_address (Pmode, op4);
35663 op4 = copy_addr_to_reg (op4);
35664 }
35665 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35666
35667 /* Return current CF value. */
35668 if (target == 0)
35669 target = gen_reg_rtx (QImode);
35670
35671 PUT_MODE (pat, QImode);
35672 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35673 return target;
35674
35675 case IX86_BUILTIN_READ_FLAGS:
35676 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35677
35678 if (optimize
35679 || target == NULL_RTX
35680 || !nonimmediate_operand (target, word_mode)
35681 || GET_MODE (target) != word_mode)
35682 target = gen_reg_rtx (word_mode);
35683
35684 emit_insn (gen_pop (target));
35685 return target;
35686
35687 case IX86_BUILTIN_WRITE_FLAGS:
35688
35689 arg0 = CALL_EXPR_ARG (exp, 0);
35690 op0 = expand_normal (arg0);
35691 if (!general_no_elim_operand (op0, word_mode))
35692 op0 = copy_to_mode_reg (word_mode, op0);
35693
35694 emit_insn (gen_push (op0));
35695 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35696 return 0;
35697
35698 case IX86_BUILTIN_KORTESTC16:
35699 icode = CODE_FOR_kortestchi;
35700 mode0 = HImode;
35701 mode1 = CCCmode;
35702 goto kortest;
35703
35704 case IX86_BUILTIN_KORTESTZ16:
35705 icode = CODE_FOR_kortestzhi;
35706 mode0 = HImode;
35707 mode1 = CCZmode;
35708
35709 kortest:
35710 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35711 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35712 op0 = expand_normal (arg0);
35713 op1 = expand_normal (arg1);
35714
35715 op0 = copy_to_reg (op0);
35716 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35717 op1 = copy_to_reg (op1);
35718 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35719
35720 target = gen_reg_rtx (QImode);
35721 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35722
35723 /* Emit kortest. */
35724 emit_insn (GEN_FCN (icode) (op0, op1));
35725 /* And use setcc to return result from flags. */
35726 ix86_expand_setcc (target, EQ,
35727 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35728 return target;
35729
35730 case IX86_BUILTIN_GATHERSIV2DF:
35731 icode = CODE_FOR_avx2_gathersiv2df;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERSIV4DF:
35734 icode = CODE_FOR_avx2_gathersiv4df;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHERDIV2DF:
35737 icode = CODE_FOR_avx2_gatherdiv2df;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHERDIV4DF:
35740 icode = CODE_FOR_avx2_gatherdiv4df;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHERSIV4SF:
35743 icode = CODE_FOR_avx2_gathersiv4sf;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHERSIV8SF:
35746 icode = CODE_FOR_avx2_gathersiv8sf;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHERDIV4SF:
35749 icode = CODE_FOR_avx2_gatherdiv4sf;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHERDIV8SF:
35752 icode = CODE_FOR_avx2_gatherdiv8sf;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHERSIV2DI:
35755 icode = CODE_FOR_avx2_gathersiv2di;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHERSIV4DI:
35758 icode = CODE_FOR_avx2_gathersiv4di;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHERDIV2DI:
35761 icode = CODE_FOR_avx2_gatherdiv2di;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHERDIV4DI:
35764 icode = CODE_FOR_avx2_gatherdiv4di;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHERSIV4SI:
35767 icode = CODE_FOR_avx2_gathersiv4si;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHERSIV8SI:
35770 icode = CODE_FOR_avx2_gathersiv8si;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHERDIV4SI:
35773 icode = CODE_FOR_avx2_gatherdiv4si;
35774 goto gather_gen;
35775 case IX86_BUILTIN_GATHERDIV8SI:
35776 icode = CODE_FOR_avx2_gatherdiv8si;
35777 goto gather_gen;
35778 case IX86_BUILTIN_GATHERALTSIV4DF:
35779 icode = CODE_FOR_avx2_gathersiv4df;
35780 goto gather_gen;
35781 case IX86_BUILTIN_GATHERALTDIV8SF:
35782 icode = CODE_FOR_avx2_gatherdiv8sf;
35783 goto gather_gen;
35784 case IX86_BUILTIN_GATHERALTSIV4DI:
35785 icode = CODE_FOR_avx2_gathersiv4di;
35786 goto gather_gen;
35787 case IX86_BUILTIN_GATHERALTDIV8SI:
35788 icode = CODE_FOR_avx2_gatherdiv8si;
35789 goto gather_gen;
35790 case IX86_BUILTIN_GATHER3SIV16SF:
35791 icode = CODE_FOR_avx512f_gathersiv16sf;
35792 goto gather_gen;
35793 case IX86_BUILTIN_GATHER3SIV8DF:
35794 icode = CODE_FOR_avx512f_gathersiv8df;
35795 goto gather_gen;
35796 case IX86_BUILTIN_GATHER3DIV16SF:
35797 icode = CODE_FOR_avx512f_gatherdiv16sf;
35798 goto gather_gen;
35799 case IX86_BUILTIN_GATHER3DIV8DF:
35800 icode = CODE_FOR_avx512f_gatherdiv8df;
35801 goto gather_gen;
35802 case IX86_BUILTIN_GATHER3SIV16SI:
35803 icode = CODE_FOR_avx512f_gathersiv16si;
35804 goto gather_gen;
35805 case IX86_BUILTIN_GATHER3SIV8DI:
35806 icode = CODE_FOR_avx512f_gathersiv8di;
35807 goto gather_gen;
35808 case IX86_BUILTIN_GATHER3DIV16SI:
35809 icode = CODE_FOR_avx512f_gatherdiv16si;
35810 goto gather_gen;
35811 case IX86_BUILTIN_GATHER3DIV8DI:
35812 icode = CODE_FOR_avx512f_gatherdiv8di;
35813 goto gather_gen;
35814 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35815 icode = CODE_FOR_avx512f_gathersiv8df;
35816 goto gather_gen;
35817 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35818 icode = CODE_FOR_avx512f_gatherdiv16sf;
35819 goto gather_gen;
35820 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35821 icode = CODE_FOR_avx512f_gathersiv8di;
35822 goto gather_gen;
35823 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35824 icode = CODE_FOR_avx512f_gatherdiv16si;
35825 goto gather_gen;
35826 case IX86_BUILTIN_SCATTERSIV16SF:
35827 icode = CODE_FOR_avx512f_scattersiv16sf;
35828 goto scatter_gen;
35829 case IX86_BUILTIN_SCATTERSIV8DF:
35830 icode = CODE_FOR_avx512f_scattersiv8df;
35831 goto scatter_gen;
35832 case IX86_BUILTIN_SCATTERDIV16SF:
35833 icode = CODE_FOR_avx512f_scatterdiv16sf;
35834 goto scatter_gen;
35835 case IX86_BUILTIN_SCATTERDIV8DF:
35836 icode = CODE_FOR_avx512f_scatterdiv8df;
35837 goto scatter_gen;
35838 case IX86_BUILTIN_SCATTERSIV16SI:
35839 icode = CODE_FOR_avx512f_scattersiv16si;
35840 goto scatter_gen;
35841 case IX86_BUILTIN_SCATTERSIV8DI:
35842 icode = CODE_FOR_avx512f_scattersiv8di;
35843 goto scatter_gen;
35844 case IX86_BUILTIN_SCATTERDIV16SI:
35845 icode = CODE_FOR_avx512f_scatterdiv16si;
35846 goto scatter_gen;
35847 case IX86_BUILTIN_SCATTERDIV8DI:
35848 icode = CODE_FOR_avx512f_scatterdiv8di;
35849 goto scatter_gen;
35850
35851 case IX86_BUILTIN_GATHERPFDPD:
35852 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35853 goto vec_prefetch_gen;
35854 case IX86_BUILTIN_GATHERPFDPS:
35855 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35856 goto vec_prefetch_gen;
35857 case IX86_BUILTIN_GATHERPFQPD:
35858 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35859 goto vec_prefetch_gen;
35860 case IX86_BUILTIN_GATHERPFQPS:
35861 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35862 goto vec_prefetch_gen;
35863 case IX86_BUILTIN_SCATTERPFDPD:
35864 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35865 goto vec_prefetch_gen;
35866 case IX86_BUILTIN_SCATTERPFDPS:
35867 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35868 goto vec_prefetch_gen;
35869 case IX86_BUILTIN_SCATTERPFQPD:
35870 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35871 goto vec_prefetch_gen;
35872 case IX86_BUILTIN_SCATTERPFQPS:
35873 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35874 goto vec_prefetch_gen;
35875
35876 gather_gen:
35877 rtx half;
35878 rtx (*gen) (rtx, rtx);
35879
35880 arg0 = CALL_EXPR_ARG (exp, 0);
35881 arg1 = CALL_EXPR_ARG (exp, 1);
35882 arg2 = CALL_EXPR_ARG (exp, 2);
35883 arg3 = CALL_EXPR_ARG (exp, 3);
35884 arg4 = CALL_EXPR_ARG (exp, 4);
35885 op0 = expand_normal (arg0);
35886 op1 = expand_normal (arg1);
35887 op2 = expand_normal (arg2);
35888 op3 = expand_normal (arg3);
35889 op4 = expand_normal (arg4);
35890 /* Note the arg order is different from the operand order. */
35891 mode0 = insn_data[icode].operand[1].mode;
35892 mode2 = insn_data[icode].operand[3].mode;
35893 mode3 = insn_data[icode].operand[4].mode;
35894 mode4 = insn_data[icode].operand[5].mode;
35895
35896 if (target == NULL_RTX
35897 || GET_MODE (target) != insn_data[icode].operand[0].mode
35898 || !insn_data[icode].operand[0].predicate (target,
35899 GET_MODE (target)))
35900 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35901 else
35902 subtarget = target;
35903
35904 switch (fcode)
35905 {
35906 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35907 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35908 half = gen_reg_rtx (V8SImode);
35909 if (!nonimmediate_operand (op2, V16SImode))
35910 op2 = copy_to_mode_reg (V16SImode, op2);
35911 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35912 op2 = half;
35913 break;
35914 case IX86_BUILTIN_GATHERALTSIV4DF:
35915 case IX86_BUILTIN_GATHERALTSIV4DI:
35916 half = gen_reg_rtx (V4SImode);
35917 if (!nonimmediate_operand (op2, V8SImode))
35918 op2 = copy_to_mode_reg (V8SImode, op2);
35919 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35920 op2 = half;
35921 break;
35922 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35923 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35924 half = gen_reg_rtx (mode0);
35925 if (mode0 == V8SFmode)
35926 gen = gen_vec_extract_lo_v16sf;
35927 else
35928 gen = gen_vec_extract_lo_v16si;
35929 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35930 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35931 emit_insn (gen (half, op0));
35932 op0 = half;
35933 if (GET_MODE (op3) != VOIDmode)
35934 {
35935 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35936 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35937 emit_insn (gen (half, op3));
35938 op3 = half;
35939 }
35940 break;
35941 case IX86_BUILTIN_GATHERALTDIV8SF:
35942 case IX86_BUILTIN_GATHERALTDIV8SI:
35943 half = gen_reg_rtx (mode0);
35944 if (mode0 == V4SFmode)
35945 gen = gen_vec_extract_lo_v8sf;
35946 else
35947 gen = gen_vec_extract_lo_v8si;
35948 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35949 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35950 emit_insn (gen (half, op0));
35951 op0 = half;
35952 if (GET_MODE (op3) != VOIDmode)
35953 {
35954 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35955 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35956 emit_insn (gen (half, op3));
35957 op3 = half;
35958 }
35959 break;
35960 default:
35961 break;
35962 }
35963
35964 /* Force memory operand only with base register here. But we
35965 don't want to do it on memory operand for other builtin
35966 functions. */
35967 op1 = ix86_zero_extend_to_Pmode (op1);
35968
35969 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35970 op0 = copy_to_mode_reg (mode0, op0);
35971 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35972 op1 = copy_to_mode_reg (Pmode, op1);
35973 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35974 op2 = copy_to_mode_reg (mode2, op2);
35975 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35976 {
35977 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35978 op3 = copy_to_mode_reg (mode3, op3);
35979 }
35980 else
35981 {
35982 op3 = copy_to_reg (op3);
35983 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35984 }
35985 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35986 {
35987 error ("the last argument must be scale 1, 2, 4, 8");
35988 return const0_rtx;
35989 }
35990
35991 /* Optimize. If mask is known to have all high bits set,
35992 replace op0 with pc_rtx to signal that the instruction
35993 overwrites the whole destination and doesn't use its
35994 previous contents. */
35995 if (optimize)
35996 {
35997 if (TREE_CODE (arg3) == INTEGER_CST)
35998 {
35999 if (integer_all_onesp (arg3))
36000 op0 = pc_rtx;
36001 }
36002 else if (TREE_CODE (arg3) == VECTOR_CST)
36003 {
36004 unsigned int negative = 0;
36005 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36006 {
36007 tree cst = VECTOR_CST_ELT (arg3, i);
36008 if (TREE_CODE (cst) == INTEGER_CST
36009 && tree_int_cst_sign_bit (cst))
36010 negative++;
36011 else if (TREE_CODE (cst) == REAL_CST
36012 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36013 negative++;
36014 }
36015 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36016 op0 = pc_rtx;
36017 }
36018 else if (TREE_CODE (arg3) == SSA_NAME
36019 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36020 {
36021 /* Recognize also when mask is like:
36022 __v2df src = _mm_setzero_pd ();
36023 __v2df mask = _mm_cmpeq_pd (src, src);
36024 or
36025 __v8sf src = _mm256_setzero_ps ();
36026 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36027 as that is a cheaper way to load all ones into
36028 a register than having to load a constant from
36029 memory. */
36030 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36031 if (is_gimple_call (def_stmt))
36032 {
36033 tree fndecl = gimple_call_fndecl (def_stmt);
36034 if (fndecl
36035 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36036 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36037 {
36038 case IX86_BUILTIN_CMPPD:
36039 case IX86_BUILTIN_CMPPS:
36040 case IX86_BUILTIN_CMPPD256:
36041 case IX86_BUILTIN_CMPPS256:
36042 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36043 break;
36044 /* FALLTHRU */
36045 case IX86_BUILTIN_CMPEQPD:
36046 case IX86_BUILTIN_CMPEQPS:
36047 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36048 && initializer_zerop (gimple_call_arg (def_stmt,
36049 1)))
36050 op0 = pc_rtx;
36051 break;
36052 default:
36053 break;
36054 }
36055 }
36056 }
36057 }
36058
36059 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36060 if (! pat)
36061 return const0_rtx;
36062 emit_insn (pat);
36063
36064 switch (fcode)
36065 {
36066 case IX86_BUILTIN_GATHER3DIV16SF:
36067 if (target == NULL_RTX)
36068 target = gen_reg_rtx (V8SFmode);
36069 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36070 break;
36071 case IX86_BUILTIN_GATHER3DIV16SI:
36072 if (target == NULL_RTX)
36073 target = gen_reg_rtx (V8SImode);
36074 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36075 break;
36076 case IX86_BUILTIN_GATHERDIV8SF:
36077 if (target == NULL_RTX)
36078 target = gen_reg_rtx (V4SFmode);
36079 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36080 break;
36081 case IX86_BUILTIN_GATHERDIV8SI:
36082 if (target == NULL_RTX)
36083 target = gen_reg_rtx (V4SImode);
36084 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36085 break;
36086 default:
36087 target = subtarget;
36088 break;
36089 }
36090 return target;
36091
36092 scatter_gen:
36093 arg0 = CALL_EXPR_ARG (exp, 0);
36094 arg1 = CALL_EXPR_ARG (exp, 1);
36095 arg2 = CALL_EXPR_ARG (exp, 2);
36096 arg3 = CALL_EXPR_ARG (exp, 3);
36097 arg4 = CALL_EXPR_ARG (exp, 4);
36098 op0 = expand_normal (arg0);
36099 op1 = expand_normal (arg1);
36100 op2 = expand_normal (arg2);
36101 op3 = expand_normal (arg3);
36102 op4 = expand_normal (arg4);
36103 mode1 = insn_data[icode].operand[1].mode;
36104 mode2 = insn_data[icode].operand[2].mode;
36105 mode3 = insn_data[icode].operand[3].mode;
36106 mode4 = insn_data[icode].operand[4].mode;
36107
36108 /* Force memory operand only with base register here. But we
36109 don't want to do it on memory operand for other builtin
36110 functions. */
36111 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36112
36113 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36114 op0 = copy_to_mode_reg (Pmode, op0);
36115
36116 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36117 {
36118 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36119 op1 = copy_to_mode_reg (mode1, op1);
36120 }
36121 else
36122 {
36123 op1 = copy_to_reg (op1);
36124 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36125 }
36126
36127 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36128 op2 = copy_to_mode_reg (mode2, op2);
36129
36130 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36131 op3 = copy_to_mode_reg (mode3, op3);
36132
36133 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36134 {
36135 error ("the last argument must be scale 1, 2, 4, 8");
36136 return const0_rtx;
36137 }
36138
36139 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36140 if (! pat)
36141 return const0_rtx;
36142
36143 emit_insn (pat);
36144 return 0;
36145
36146 vec_prefetch_gen:
36147 arg0 = CALL_EXPR_ARG (exp, 0);
36148 arg1 = CALL_EXPR_ARG (exp, 1);
36149 arg2 = CALL_EXPR_ARG (exp, 2);
36150 arg3 = CALL_EXPR_ARG (exp, 3);
36151 arg4 = CALL_EXPR_ARG (exp, 4);
36152 op0 = expand_normal (arg0);
36153 op1 = expand_normal (arg1);
36154 op2 = expand_normal (arg2);
36155 op3 = expand_normal (arg3);
36156 op4 = expand_normal (arg4);
36157 mode0 = insn_data[icode].operand[0].mode;
36158 mode1 = insn_data[icode].operand[1].mode;
36159 mode3 = insn_data[icode].operand[3].mode;
36160 mode4 = insn_data[icode].operand[4].mode;
36161
36162 if (GET_MODE (op0) == mode0
36163 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36164 {
36165 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36166 op0 = copy_to_mode_reg (mode0, op0);
36167 }
36168 else if (op0 != constm1_rtx)
36169 {
36170 op0 = copy_to_reg (op0);
36171 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36172 }
36173
36174 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36175 op1 = copy_to_mode_reg (mode1, op1);
36176
36177 /* Force memory operand only with base register here. But we
36178 don't want to do it on memory operand for other builtin
36179 functions. */
36180 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36181
36182 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36183 op2 = copy_to_mode_reg (Pmode, op2);
36184
36185 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36186 {
36187 error ("the forth argument must be scale 1, 2, 4, 8");
36188 return const0_rtx;
36189 }
36190
36191 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36192 {
36193 error ("incorrect hint operand");
36194 return const0_rtx;
36195 }
36196
36197 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36198 if (! pat)
36199 return const0_rtx;
36200
36201 emit_insn (pat);
36202
36203 return 0;
36204
36205 case IX86_BUILTIN_XABORT:
36206 icode = CODE_FOR_xabort;
36207 arg0 = CALL_EXPR_ARG (exp, 0);
36208 op0 = expand_normal (arg0);
36209 mode0 = insn_data[icode].operand[0].mode;
36210 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36211 {
36212 error ("the xabort's argument must be an 8-bit immediate");
36213 return const0_rtx;
36214 }
36215 emit_insn (gen_xabort (op0));
36216 return 0;
36217
36218 default:
36219 break;
36220 }
36221
36222 for (i = 0, d = bdesc_special_args;
36223 i < ARRAY_SIZE (bdesc_special_args);
36224 i++, d++)
36225 if (d->code == fcode)
36226 return ix86_expand_special_args_builtin (d, exp, target);
36227
36228 for (i = 0, d = bdesc_args;
36229 i < ARRAY_SIZE (bdesc_args);
36230 i++, d++)
36231 if (d->code == fcode)
36232 switch (fcode)
36233 {
36234 case IX86_BUILTIN_FABSQ:
36235 case IX86_BUILTIN_COPYSIGNQ:
36236 if (!TARGET_SSE)
36237 /* Emit a normal call if SSE isn't available. */
36238 return expand_call (exp, target, ignore);
36239 default:
36240 return ix86_expand_args_builtin (d, exp, target);
36241 }
36242
36243 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36244 if (d->code == fcode)
36245 return ix86_expand_sse_comi (d, exp, target);
36246
36247 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36248 if (d->code == fcode)
36249 return ix86_expand_round_builtin (d, exp, target);
36250
36251 for (i = 0, d = bdesc_pcmpestr;
36252 i < ARRAY_SIZE (bdesc_pcmpestr);
36253 i++, d++)
36254 if (d->code == fcode)
36255 return ix86_expand_sse_pcmpestr (d, exp, target);
36256
36257 for (i = 0, d = bdesc_pcmpistr;
36258 i < ARRAY_SIZE (bdesc_pcmpistr);
36259 i++, d++)
36260 if (d->code == fcode)
36261 return ix86_expand_sse_pcmpistr (d, exp, target);
36262
36263 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36264 if (d->code == fcode)
36265 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36266 (enum ix86_builtin_func_type)
36267 d->flag, d->comparison);
36268
36269 gcc_unreachable ();
36270 }
36271
36272 /* This returns the target-specific builtin with code CODE if
36273 current_function_decl has visibility on this builtin, which is checked
36274 using isa flags. Returns NULL_TREE otherwise. */
36275
36276 static tree ix86_get_builtin (enum ix86_builtins code)
36277 {
36278 struct cl_target_option *opts;
36279 tree target_tree = NULL_TREE;
36280
36281 /* Determine the isa flags of current_function_decl. */
36282
36283 if (current_function_decl)
36284 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36285
36286 if (target_tree == NULL)
36287 target_tree = target_option_default_node;
36288
36289 opts = TREE_TARGET_OPTION (target_tree);
36290
36291 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36292 return ix86_builtin_decl (code, true);
36293 else
36294 return NULL_TREE;
36295 }
36296
36297 /* Returns a function decl for a vectorized version of the builtin function
36298 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36299 if it is not available. */
36300
36301 static tree
36302 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36303 tree type_in)
36304 {
36305 enum machine_mode in_mode, out_mode;
36306 int in_n, out_n;
36307 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36308
36309 if (TREE_CODE (type_out) != VECTOR_TYPE
36310 || TREE_CODE (type_in) != VECTOR_TYPE
36311 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36312 return NULL_TREE;
36313
36314 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36315 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36316 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36317 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36318
36319 switch (fn)
36320 {
36321 case BUILT_IN_SQRT:
36322 if (out_mode == DFmode && in_mode == DFmode)
36323 {
36324 if (out_n == 2 && in_n == 2)
36325 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36326 else if (out_n == 4 && in_n == 4)
36327 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36328 else if (out_n == 8 && in_n == 8)
36329 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36330 }
36331 break;
36332
36333 case BUILT_IN_EXP2F:
36334 if (out_mode == SFmode && in_mode == SFmode)
36335 {
36336 if (out_n == 16 && in_n == 16)
36337 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36338 }
36339 break;
36340
36341 case BUILT_IN_SQRTF:
36342 if (out_mode == SFmode && in_mode == SFmode)
36343 {
36344 if (out_n == 4 && in_n == 4)
36345 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36346 else if (out_n == 8 && in_n == 8)
36347 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36348 else if (out_n == 16 && in_n == 16)
36349 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36350 }
36351 break;
36352
36353 case BUILT_IN_IFLOOR:
36354 case BUILT_IN_LFLOOR:
36355 case BUILT_IN_LLFLOOR:
36356 /* The round insn does not trap on denormals. */
36357 if (flag_trapping_math || !TARGET_ROUND)
36358 break;
36359
36360 if (out_mode == SImode && in_mode == DFmode)
36361 {
36362 if (out_n == 4 && in_n == 2)
36363 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36364 else if (out_n == 8 && in_n == 4)
36365 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36366 else if (out_n == 16 && in_n == 8)
36367 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36368 }
36369 break;
36370
36371 case BUILT_IN_IFLOORF:
36372 case BUILT_IN_LFLOORF:
36373 case BUILT_IN_LLFLOORF:
36374 /* The round insn does not trap on denormals. */
36375 if (flag_trapping_math || !TARGET_ROUND)
36376 break;
36377
36378 if (out_mode == SImode && in_mode == SFmode)
36379 {
36380 if (out_n == 4 && in_n == 4)
36381 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36382 else if (out_n == 8 && in_n == 8)
36383 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36384 }
36385 break;
36386
36387 case BUILT_IN_ICEIL:
36388 case BUILT_IN_LCEIL:
36389 case BUILT_IN_LLCEIL:
36390 /* The round insn does not trap on denormals. */
36391 if (flag_trapping_math || !TARGET_ROUND)
36392 break;
36393
36394 if (out_mode == SImode && in_mode == DFmode)
36395 {
36396 if (out_n == 4 && in_n == 2)
36397 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36398 else if (out_n == 8 && in_n == 4)
36399 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36400 else if (out_n == 16 && in_n == 8)
36401 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36402 }
36403 break;
36404
36405 case BUILT_IN_ICEILF:
36406 case BUILT_IN_LCEILF:
36407 case BUILT_IN_LLCEILF:
36408 /* The round insn does not trap on denormals. */
36409 if (flag_trapping_math || !TARGET_ROUND)
36410 break;
36411
36412 if (out_mode == SImode && in_mode == SFmode)
36413 {
36414 if (out_n == 4 && in_n == 4)
36415 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36416 else if (out_n == 8 && in_n == 8)
36417 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36418 }
36419 break;
36420
36421 case BUILT_IN_IRINT:
36422 case BUILT_IN_LRINT:
36423 case BUILT_IN_LLRINT:
36424 if (out_mode == SImode && in_mode == DFmode)
36425 {
36426 if (out_n == 4 && in_n == 2)
36427 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36428 else if (out_n == 8 && in_n == 4)
36429 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36430 }
36431 break;
36432
36433 case BUILT_IN_IRINTF:
36434 case BUILT_IN_LRINTF:
36435 case BUILT_IN_LLRINTF:
36436 if (out_mode == SImode && in_mode == SFmode)
36437 {
36438 if (out_n == 4 && in_n == 4)
36439 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36440 else if (out_n == 8 && in_n == 8)
36441 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36442 }
36443 break;
36444
36445 case BUILT_IN_IROUND:
36446 case BUILT_IN_LROUND:
36447 case BUILT_IN_LLROUND:
36448 /* The round insn does not trap on denormals. */
36449 if (flag_trapping_math || !TARGET_ROUND)
36450 break;
36451
36452 if (out_mode == SImode && in_mode == DFmode)
36453 {
36454 if (out_n == 4 && in_n == 2)
36455 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36456 else if (out_n == 8 && in_n == 4)
36457 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36458 else if (out_n == 16 && in_n == 8)
36459 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36460 }
36461 break;
36462
36463 case BUILT_IN_IROUNDF:
36464 case BUILT_IN_LROUNDF:
36465 case BUILT_IN_LLROUNDF:
36466 /* The round insn does not trap on denormals. */
36467 if (flag_trapping_math || !TARGET_ROUND)
36468 break;
36469
36470 if (out_mode == SImode && in_mode == SFmode)
36471 {
36472 if (out_n == 4 && in_n == 4)
36473 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36474 else if (out_n == 8 && in_n == 8)
36475 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36476 }
36477 break;
36478
36479 case BUILT_IN_COPYSIGN:
36480 if (out_mode == DFmode && in_mode == DFmode)
36481 {
36482 if (out_n == 2 && in_n == 2)
36483 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36484 else if (out_n == 4 && in_n == 4)
36485 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36486 else if (out_n == 8 && in_n == 8)
36487 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36488 }
36489 break;
36490
36491 case BUILT_IN_COPYSIGNF:
36492 if (out_mode == SFmode && in_mode == SFmode)
36493 {
36494 if (out_n == 4 && in_n == 4)
36495 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36496 else if (out_n == 8 && in_n == 8)
36497 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36498 else if (out_n == 16 && in_n == 16)
36499 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36500 }
36501 break;
36502
36503 case BUILT_IN_FLOOR:
36504 /* The round insn does not trap on denormals. */
36505 if (flag_trapping_math || !TARGET_ROUND)
36506 break;
36507
36508 if (out_mode == DFmode && in_mode == DFmode)
36509 {
36510 if (out_n == 2 && in_n == 2)
36511 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36512 else if (out_n == 4 && in_n == 4)
36513 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36514 }
36515 break;
36516
36517 case BUILT_IN_FLOORF:
36518 /* The round insn does not trap on denormals. */
36519 if (flag_trapping_math || !TARGET_ROUND)
36520 break;
36521
36522 if (out_mode == SFmode && in_mode == SFmode)
36523 {
36524 if (out_n == 4 && in_n == 4)
36525 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36526 else if (out_n == 8 && in_n == 8)
36527 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36528 }
36529 break;
36530
36531 case BUILT_IN_CEIL:
36532 /* The round insn does not trap on denormals. */
36533 if (flag_trapping_math || !TARGET_ROUND)
36534 break;
36535
36536 if (out_mode == DFmode && in_mode == DFmode)
36537 {
36538 if (out_n == 2 && in_n == 2)
36539 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36540 else if (out_n == 4 && in_n == 4)
36541 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36542 }
36543 break;
36544
36545 case BUILT_IN_CEILF:
36546 /* The round insn does not trap on denormals. */
36547 if (flag_trapping_math || !TARGET_ROUND)
36548 break;
36549
36550 if (out_mode == SFmode && in_mode == SFmode)
36551 {
36552 if (out_n == 4 && in_n == 4)
36553 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36554 else if (out_n == 8 && in_n == 8)
36555 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36556 }
36557 break;
36558
36559 case BUILT_IN_TRUNC:
36560 /* The round insn does not trap on denormals. */
36561 if (flag_trapping_math || !TARGET_ROUND)
36562 break;
36563
36564 if (out_mode == DFmode && in_mode == DFmode)
36565 {
36566 if (out_n == 2 && in_n == 2)
36567 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36568 else if (out_n == 4 && in_n == 4)
36569 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36570 }
36571 break;
36572
36573 case BUILT_IN_TRUNCF:
36574 /* The round insn does not trap on denormals. */
36575 if (flag_trapping_math || !TARGET_ROUND)
36576 break;
36577
36578 if (out_mode == SFmode && in_mode == SFmode)
36579 {
36580 if (out_n == 4 && in_n == 4)
36581 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36582 else if (out_n == 8 && in_n == 8)
36583 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36584 }
36585 break;
36586
36587 case BUILT_IN_RINT:
36588 /* The round insn does not trap on denormals. */
36589 if (flag_trapping_math || !TARGET_ROUND)
36590 break;
36591
36592 if (out_mode == DFmode && in_mode == DFmode)
36593 {
36594 if (out_n == 2 && in_n == 2)
36595 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36596 else if (out_n == 4 && in_n == 4)
36597 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36598 }
36599 break;
36600
36601 case BUILT_IN_RINTF:
36602 /* The round insn does not trap on denormals. */
36603 if (flag_trapping_math || !TARGET_ROUND)
36604 break;
36605
36606 if (out_mode == SFmode && in_mode == SFmode)
36607 {
36608 if (out_n == 4 && in_n == 4)
36609 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36610 else if (out_n == 8 && in_n == 8)
36611 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36612 }
36613 break;
36614
36615 case BUILT_IN_ROUND:
36616 /* The round insn does not trap on denormals. */
36617 if (flag_trapping_math || !TARGET_ROUND)
36618 break;
36619
36620 if (out_mode == DFmode && in_mode == DFmode)
36621 {
36622 if (out_n == 2 && in_n == 2)
36623 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36624 else if (out_n == 4 && in_n == 4)
36625 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36626 }
36627 break;
36628
36629 case BUILT_IN_ROUNDF:
36630 /* The round insn does not trap on denormals. */
36631 if (flag_trapping_math || !TARGET_ROUND)
36632 break;
36633
36634 if (out_mode == SFmode && in_mode == SFmode)
36635 {
36636 if (out_n == 4 && in_n == 4)
36637 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36638 else if (out_n == 8 && in_n == 8)
36639 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36640 }
36641 break;
36642
36643 case BUILT_IN_FMA:
36644 if (out_mode == DFmode && in_mode == DFmode)
36645 {
36646 if (out_n == 2 && in_n == 2)
36647 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36648 if (out_n == 4 && in_n == 4)
36649 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36650 }
36651 break;
36652
36653 case BUILT_IN_FMAF:
36654 if (out_mode == SFmode && in_mode == SFmode)
36655 {
36656 if (out_n == 4 && in_n == 4)
36657 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36658 if (out_n == 8 && in_n == 8)
36659 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36660 }
36661 break;
36662
36663 default:
36664 break;
36665 }
36666
36667 /* Dispatch to a handler for a vectorization library. */
36668 if (ix86_veclib_handler)
36669 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36670 type_in);
36671
36672 return NULL_TREE;
36673 }
36674
36675 /* Handler for an SVML-style interface to
36676 a library with vectorized intrinsics. */
36677
36678 static tree
36679 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36680 {
36681 char name[20];
36682 tree fntype, new_fndecl, args;
36683 unsigned arity;
36684 const char *bname;
36685 enum machine_mode el_mode, in_mode;
36686 int n, in_n;
36687
36688 /* The SVML is suitable for unsafe math only. */
36689 if (!flag_unsafe_math_optimizations)
36690 return NULL_TREE;
36691
36692 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36693 n = TYPE_VECTOR_SUBPARTS (type_out);
36694 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36695 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36696 if (el_mode != in_mode
36697 || n != in_n)
36698 return NULL_TREE;
36699
36700 switch (fn)
36701 {
36702 case BUILT_IN_EXP:
36703 case BUILT_IN_LOG:
36704 case BUILT_IN_LOG10:
36705 case BUILT_IN_POW:
36706 case BUILT_IN_TANH:
36707 case BUILT_IN_TAN:
36708 case BUILT_IN_ATAN:
36709 case BUILT_IN_ATAN2:
36710 case BUILT_IN_ATANH:
36711 case BUILT_IN_CBRT:
36712 case BUILT_IN_SINH:
36713 case BUILT_IN_SIN:
36714 case BUILT_IN_ASINH:
36715 case BUILT_IN_ASIN:
36716 case BUILT_IN_COSH:
36717 case BUILT_IN_COS:
36718 case BUILT_IN_ACOSH:
36719 case BUILT_IN_ACOS:
36720 if (el_mode != DFmode || n != 2)
36721 return NULL_TREE;
36722 break;
36723
36724 case BUILT_IN_EXPF:
36725 case BUILT_IN_LOGF:
36726 case BUILT_IN_LOG10F:
36727 case BUILT_IN_POWF:
36728 case BUILT_IN_TANHF:
36729 case BUILT_IN_TANF:
36730 case BUILT_IN_ATANF:
36731 case BUILT_IN_ATAN2F:
36732 case BUILT_IN_ATANHF:
36733 case BUILT_IN_CBRTF:
36734 case BUILT_IN_SINHF:
36735 case BUILT_IN_SINF:
36736 case BUILT_IN_ASINHF:
36737 case BUILT_IN_ASINF:
36738 case BUILT_IN_COSHF:
36739 case BUILT_IN_COSF:
36740 case BUILT_IN_ACOSHF:
36741 case BUILT_IN_ACOSF:
36742 if (el_mode != SFmode || n != 4)
36743 return NULL_TREE;
36744 break;
36745
36746 default:
36747 return NULL_TREE;
36748 }
36749
36750 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36751
36752 if (fn == BUILT_IN_LOGF)
36753 strcpy (name, "vmlsLn4");
36754 else if (fn == BUILT_IN_LOG)
36755 strcpy (name, "vmldLn2");
36756 else if (n == 4)
36757 {
36758 sprintf (name, "vmls%s", bname+10);
36759 name[strlen (name)-1] = '4';
36760 }
36761 else
36762 sprintf (name, "vmld%s2", bname+10);
36763
36764 /* Convert to uppercase. */
36765 name[4] &= ~0x20;
36766
36767 arity = 0;
36768 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36769 args;
36770 args = TREE_CHAIN (args))
36771 arity++;
36772
36773 if (arity == 1)
36774 fntype = build_function_type_list (type_out, type_in, NULL);
36775 else
36776 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36777
36778 /* Build a function declaration for the vectorized function. */
36779 new_fndecl = build_decl (BUILTINS_LOCATION,
36780 FUNCTION_DECL, get_identifier (name), fntype);
36781 TREE_PUBLIC (new_fndecl) = 1;
36782 DECL_EXTERNAL (new_fndecl) = 1;
36783 DECL_IS_NOVOPS (new_fndecl) = 1;
36784 TREE_READONLY (new_fndecl) = 1;
36785
36786 return new_fndecl;
36787 }
36788
36789 /* Handler for an ACML-style interface to
36790 a library with vectorized intrinsics. */
36791
36792 static tree
36793 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36794 {
36795 char name[20] = "__vr.._";
36796 tree fntype, new_fndecl, args;
36797 unsigned arity;
36798 const char *bname;
36799 enum machine_mode el_mode, in_mode;
36800 int n, in_n;
36801
36802 /* The ACML is 64bits only and suitable for unsafe math only as
36803 it does not correctly support parts of IEEE with the required
36804 precision such as denormals. */
36805 if (!TARGET_64BIT
36806 || !flag_unsafe_math_optimizations)
36807 return NULL_TREE;
36808
36809 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36810 n = TYPE_VECTOR_SUBPARTS (type_out);
36811 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36812 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36813 if (el_mode != in_mode
36814 || n != in_n)
36815 return NULL_TREE;
36816
36817 switch (fn)
36818 {
36819 case BUILT_IN_SIN:
36820 case BUILT_IN_COS:
36821 case BUILT_IN_EXP:
36822 case BUILT_IN_LOG:
36823 case BUILT_IN_LOG2:
36824 case BUILT_IN_LOG10:
36825 name[4] = 'd';
36826 name[5] = '2';
36827 if (el_mode != DFmode
36828 || n != 2)
36829 return NULL_TREE;
36830 break;
36831
36832 case BUILT_IN_SINF:
36833 case BUILT_IN_COSF:
36834 case BUILT_IN_EXPF:
36835 case BUILT_IN_POWF:
36836 case BUILT_IN_LOGF:
36837 case BUILT_IN_LOG2F:
36838 case BUILT_IN_LOG10F:
36839 name[4] = 's';
36840 name[5] = '4';
36841 if (el_mode != SFmode
36842 || n != 4)
36843 return NULL_TREE;
36844 break;
36845
36846 default:
36847 return NULL_TREE;
36848 }
36849
36850 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36851 sprintf (name + 7, "%s", bname+10);
36852
36853 arity = 0;
36854 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36855 args;
36856 args = TREE_CHAIN (args))
36857 arity++;
36858
36859 if (arity == 1)
36860 fntype = build_function_type_list (type_out, type_in, NULL);
36861 else
36862 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36863
36864 /* Build a function declaration for the vectorized function. */
36865 new_fndecl = build_decl (BUILTINS_LOCATION,
36866 FUNCTION_DECL, get_identifier (name), fntype);
36867 TREE_PUBLIC (new_fndecl) = 1;
36868 DECL_EXTERNAL (new_fndecl) = 1;
36869 DECL_IS_NOVOPS (new_fndecl) = 1;
36870 TREE_READONLY (new_fndecl) = 1;
36871
36872 return new_fndecl;
36873 }
36874
36875 /* Returns a decl of a function that implements gather load with
36876 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36877 Return NULL_TREE if it is not available. */
36878
36879 static tree
36880 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36881 const_tree index_type, int scale)
36882 {
36883 bool si;
36884 enum ix86_builtins code;
36885
36886 if (! TARGET_AVX2)
36887 return NULL_TREE;
36888
36889 if ((TREE_CODE (index_type) != INTEGER_TYPE
36890 && !POINTER_TYPE_P (index_type))
36891 || (TYPE_MODE (index_type) != SImode
36892 && TYPE_MODE (index_type) != DImode))
36893 return NULL_TREE;
36894
36895 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36896 return NULL_TREE;
36897
36898 /* v*gather* insn sign extends index to pointer mode. */
36899 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36900 && TYPE_UNSIGNED (index_type))
36901 return NULL_TREE;
36902
36903 if (scale <= 0
36904 || scale > 8
36905 || (scale & (scale - 1)) != 0)
36906 return NULL_TREE;
36907
36908 si = TYPE_MODE (index_type) == SImode;
36909 switch (TYPE_MODE (mem_vectype))
36910 {
36911 case V2DFmode:
36912 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36913 break;
36914 case V4DFmode:
36915 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36916 break;
36917 case V2DImode:
36918 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36919 break;
36920 case V4DImode:
36921 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36922 break;
36923 case V4SFmode:
36924 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36925 break;
36926 case V8SFmode:
36927 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36928 break;
36929 case V4SImode:
36930 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36931 break;
36932 case V8SImode:
36933 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36934 break;
36935 case V8DFmode:
36936 if (TARGET_AVX512F)
36937 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36938 else
36939 return NULL_TREE;
36940 break;
36941 case V8DImode:
36942 if (TARGET_AVX512F)
36943 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36944 else
36945 return NULL_TREE;
36946 break;
36947 case V16SFmode:
36948 if (TARGET_AVX512F)
36949 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36950 else
36951 return NULL_TREE;
36952 break;
36953 case V16SImode:
36954 if (TARGET_AVX512F)
36955 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36956 else
36957 return NULL_TREE;
36958 break;
36959 default:
36960 return NULL_TREE;
36961 }
36962
36963 return ix86_get_builtin (code);
36964 }
36965
36966 /* Returns a code for a target-specific builtin that implements
36967 reciprocal of the function, or NULL_TREE if not available. */
36968
36969 static tree
36970 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36971 {
36972 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36973 && flag_finite_math_only && !flag_trapping_math
36974 && flag_unsafe_math_optimizations))
36975 return NULL_TREE;
36976
36977 if (md_fn)
36978 /* Machine dependent builtins. */
36979 switch (fn)
36980 {
36981 /* Vectorized version of sqrt to rsqrt conversion. */
36982 case IX86_BUILTIN_SQRTPS_NR:
36983 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36984
36985 case IX86_BUILTIN_SQRTPS_NR256:
36986 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36987
36988 default:
36989 return NULL_TREE;
36990 }
36991 else
36992 /* Normal builtins. */
36993 switch (fn)
36994 {
36995 /* Sqrt to rsqrt conversion. */
36996 case BUILT_IN_SQRTF:
36997 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36998
36999 default:
37000 return NULL_TREE;
37001 }
37002 }
37003 \f
37004 /* Helper for avx_vpermilps256_operand et al. This is also used by
37005 the expansion functions to turn the parallel back into a mask.
37006 The return value is 0 for no match and the imm8+1 for a match. */
37007
37008 int
37009 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37010 {
37011 unsigned i, nelt = GET_MODE_NUNITS (mode);
37012 unsigned mask = 0;
37013 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37014
37015 if (XVECLEN (par, 0) != (int) nelt)
37016 return 0;
37017
37018 /* Validate that all of the elements are constants, and not totally
37019 out of range. Copy the data into an integral array to make the
37020 subsequent checks easier. */
37021 for (i = 0; i < nelt; ++i)
37022 {
37023 rtx er = XVECEXP (par, 0, i);
37024 unsigned HOST_WIDE_INT ei;
37025
37026 if (!CONST_INT_P (er))
37027 return 0;
37028 ei = INTVAL (er);
37029 if (ei >= nelt)
37030 return 0;
37031 ipar[i] = ei;
37032 }
37033
37034 switch (mode)
37035 {
37036 case V8DFmode:
37037 /* In the 512-bit DFmode case, we can only move elements within
37038 a 128-bit lane. First fill the second part of the mask,
37039 then fallthru. */
37040 for (i = 4; i < 6; ++i)
37041 {
37042 if (ipar[i] < 4 || ipar[i] >= 6)
37043 return 0;
37044 mask |= (ipar[i] - 4) << i;
37045 }
37046 for (i = 6; i < 8; ++i)
37047 {
37048 if (ipar[i] < 6)
37049 return 0;
37050 mask |= (ipar[i] - 6) << i;
37051 }
37052 /* FALLTHRU */
37053
37054 case V4DFmode:
37055 /* In the 256-bit DFmode case, we can only move elements within
37056 a 128-bit lane. */
37057 for (i = 0; i < 2; ++i)
37058 {
37059 if (ipar[i] >= 2)
37060 return 0;
37061 mask |= ipar[i] << i;
37062 }
37063 for (i = 2; i < 4; ++i)
37064 {
37065 if (ipar[i] < 2)
37066 return 0;
37067 mask |= (ipar[i] - 2) << i;
37068 }
37069 break;
37070
37071 case V16SFmode:
37072 /* In 512 bit SFmode case, permutation in the upper 256 bits
37073 must mirror the permutation in the lower 256-bits. */
37074 for (i = 0; i < 8; ++i)
37075 if (ipar[i] + 8 != ipar[i + 8])
37076 return 0;
37077 /* FALLTHRU */
37078
37079 case V8SFmode:
37080 /* In 256 bit SFmode case, we have full freedom of
37081 movement within the low 128-bit lane, but the high 128-bit
37082 lane must mirror the exact same pattern. */
37083 for (i = 0; i < 4; ++i)
37084 if (ipar[i] + 4 != ipar[i + 4])
37085 return 0;
37086 nelt = 4;
37087 /* FALLTHRU */
37088
37089 case V2DFmode:
37090 case V4SFmode:
37091 /* In the 128-bit case, we've full freedom in the placement of
37092 the elements from the source operand. */
37093 for (i = 0; i < nelt; ++i)
37094 mask |= ipar[i] << (i * (nelt / 2));
37095 break;
37096
37097 default:
37098 gcc_unreachable ();
37099 }
37100
37101 /* Make sure success has a non-zero value by adding one. */
37102 return mask + 1;
37103 }
37104
37105 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37106 the expansion functions to turn the parallel back into a mask.
37107 The return value is 0 for no match and the imm8+1 for a match. */
37108
37109 int
37110 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37111 {
37112 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37113 unsigned mask = 0;
37114 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37115
37116 if (XVECLEN (par, 0) != (int) nelt)
37117 return 0;
37118
37119 /* Validate that all of the elements are constants, and not totally
37120 out of range. Copy the data into an integral array to make the
37121 subsequent checks easier. */
37122 for (i = 0; i < nelt; ++i)
37123 {
37124 rtx er = XVECEXP (par, 0, i);
37125 unsigned HOST_WIDE_INT ei;
37126
37127 if (!CONST_INT_P (er))
37128 return 0;
37129 ei = INTVAL (er);
37130 if (ei >= 2 * nelt)
37131 return 0;
37132 ipar[i] = ei;
37133 }
37134
37135 /* Validate that the halves of the permute are halves. */
37136 for (i = 0; i < nelt2 - 1; ++i)
37137 if (ipar[i] + 1 != ipar[i + 1])
37138 return 0;
37139 for (i = nelt2; i < nelt - 1; ++i)
37140 if (ipar[i] + 1 != ipar[i + 1])
37141 return 0;
37142
37143 /* Reconstruct the mask. */
37144 for (i = 0; i < 2; ++i)
37145 {
37146 unsigned e = ipar[i * nelt2];
37147 if (e % nelt2)
37148 return 0;
37149 e /= nelt2;
37150 mask |= e << (i * 4);
37151 }
37152
37153 /* Make sure success has a non-zero value by adding one. */
37154 return mask + 1;
37155 }
37156 \f
37157 /* Return a register priority for hard reg REGNO. */
37158 static int
37159 ix86_register_priority (int hard_regno)
37160 {
37161 /* ebp and r13 as the base always wants a displacement, r12 as the
37162 base always wants an index. So discourage their usage in an
37163 address. */
37164 if (hard_regno == R12_REG || hard_regno == R13_REG)
37165 return 0;
37166 if (hard_regno == BP_REG)
37167 return 1;
37168 /* New x86-64 int registers result in bigger code size. Discourage
37169 them. */
37170 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37171 return 2;
37172 /* New x86-64 SSE registers result in bigger code size. Discourage
37173 them. */
37174 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37175 return 2;
37176 /* Usage of AX register results in smaller code. Prefer it. */
37177 if (hard_regno == 0)
37178 return 4;
37179 return 3;
37180 }
37181
37182 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37183
37184 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37185 QImode must go into class Q_REGS.
37186 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37187 movdf to do mem-to-mem moves through integer regs. */
37188
37189 static reg_class_t
37190 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37191 {
37192 enum machine_mode mode = GET_MODE (x);
37193
37194 /* We're only allowed to return a subclass of CLASS. Many of the
37195 following checks fail for NO_REGS, so eliminate that early. */
37196 if (regclass == NO_REGS)
37197 return NO_REGS;
37198
37199 /* All classes can load zeros. */
37200 if (x == CONST0_RTX (mode))
37201 return regclass;
37202
37203 /* Force constants into memory if we are loading a (nonzero) constant into
37204 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37205 instructions to load from a constant. */
37206 if (CONSTANT_P (x)
37207 && (MAYBE_MMX_CLASS_P (regclass)
37208 || MAYBE_SSE_CLASS_P (regclass)
37209 || MAYBE_MASK_CLASS_P (regclass)))
37210 return NO_REGS;
37211
37212 /* Prefer SSE regs only, if we can use them for math. */
37213 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37214 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37215
37216 /* Floating-point constants need more complex checks. */
37217 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37218 {
37219 /* General regs can load everything. */
37220 if (reg_class_subset_p (regclass, GENERAL_REGS))
37221 return regclass;
37222
37223 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37224 zero above. We only want to wind up preferring 80387 registers if
37225 we plan on doing computation with them. */
37226 if (TARGET_80387
37227 && standard_80387_constant_p (x) > 0)
37228 {
37229 /* Limit class to non-sse. */
37230 if (regclass == FLOAT_SSE_REGS)
37231 return FLOAT_REGS;
37232 if (regclass == FP_TOP_SSE_REGS)
37233 return FP_TOP_REG;
37234 if (regclass == FP_SECOND_SSE_REGS)
37235 return FP_SECOND_REG;
37236 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37237 return regclass;
37238 }
37239
37240 return NO_REGS;
37241 }
37242
37243 /* Generally when we see PLUS here, it's the function invariant
37244 (plus soft-fp const_int). Which can only be computed into general
37245 regs. */
37246 if (GET_CODE (x) == PLUS)
37247 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37248
37249 /* QImode constants are easy to load, but non-constant QImode data
37250 must go into Q_REGS. */
37251 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37252 {
37253 if (reg_class_subset_p (regclass, Q_REGS))
37254 return regclass;
37255 if (reg_class_subset_p (Q_REGS, regclass))
37256 return Q_REGS;
37257 return NO_REGS;
37258 }
37259
37260 return regclass;
37261 }
37262
37263 /* Discourage putting floating-point values in SSE registers unless
37264 SSE math is being used, and likewise for the 387 registers. */
37265 static reg_class_t
37266 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37267 {
37268 enum machine_mode mode = GET_MODE (x);
37269
37270 /* Restrict the output reload class to the register bank that we are doing
37271 math on. If we would like not to return a subset of CLASS, reject this
37272 alternative: if reload cannot do this, it will still use its choice. */
37273 mode = GET_MODE (x);
37274 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37275 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37276
37277 if (X87_FLOAT_MODE_P (mode))
37278 {
37279 if (regclass == FP_TOP_SSE_REGS)
37280 return FP_TOP_REG;
37281 else if (regclass == FP_SECOND_SSE_REGS)
37282 return FP_SECOND_REG;
37283 else
37284 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37285 }
37286
37287 return regclass;
37288 }
37289
37290 static reg_class_t
37291 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37292 enum machine_mode mode, secondary_reload_info *sri)
37293 {
37294 /* Double-word spills from general registers to non-offsettable memory
37295 references (zero-extended addresses) require special handling. */
37296 if (TARGET_64BIT
37297 && MEM_P (x)
37298 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37299 && INTEGER_CLASS_P (rclass)
37300 && !offsettable_memref_p (x))
37301 {
37302 sri->icode = (in_p
37303 ? CODE_FOR_reload_noff_load
37304 : CODE_FOR_reload_noff_store);
37305 /* Add the cost of moving address to a temporary. */
37306 sri->extra_cost = 1;
37307
37308 return NO_REGS;
37309 }
37310
37311 /* QImode spills from non-QI registers require
37312 intermediate register on 32bit targets. */
37313 if (mode == QImode
37314 && (MAYBE_MASK_CLASS_P (rclass)
37315 || (!TARGET_64BIT && !in_p
37316 && INTEGER_CLASS_P (rclass)
37317 && MAYBE_NON_Q_CLASS_P (rclass))))
37318 {
37319 int regno;
37320
37321 if (REG_P (x))
37322 regno = REGNO (x);
37323 else
37324 regno = -1;
37325
37326 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37327 regno = true_regnum (x);
37328
37329 /* Return Q_REGS if the operand is in memory. */
37330 if (regno == -1)
37331 return Q_REGS;
37332 }
37333
37334 /* This condition handles corner case where an expression involving
37335 pointers gets vectorized. We're trying to use the address of a
37336 stack slot as a vector initializer.
37337
37338 (set (reg:V2DI 74 [ vect_cst_.2 ])
37339 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37340
37341 Eventually frame gets turned into sp+offset like this:
37342
37343 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37344 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37345 (const_int 392 [0x188]))))
37346
37347 That later gets turned into:
37348
37349 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37350 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37351 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37352
37353 We'll have the following reload recorded:
37354
37355 Reload 0: reload_in (DI) =
37356 (plus:DI (reg/f:DI 7 sp)
37357 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37358 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37359 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37360 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37361 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37362 reload_reg_rtx: (reg:V2DI 22 xmm1)
37363
37364 Which isn't going to work since SSE instructions can't handle scalar
37365 additions. Returning GENERAL_REGS forces the addition into integer
37366 register and reload can handle subsequent reloads without problems. */
37367
37368 if (in_p && GET_CODE (x) == PLUS
37369 && SSE_CLASS_P (rclass)
37370 && SCALAR_INT_MODE_P (mode))
37371 return GENERAL_REGS;
37372
37373 return NO_REGS;
37374 }
37375
37376 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37377
37378 static bool
37379 ix86_class_likely_spilled_p (reg_class_t rclass)
37380 {
37381 switch (rclass)
37382 {
37383 case AREG:
37384 case DREG:
37385 case CREG:
37386 case BREG:
37387 case AD_REGS:
37388 case SIREG:
37389 case DIREG:
37390 case SSE_FIRST_REG:
37391 case FP_TOP_REG:
37392 case FP_SECOND_REG:
37393 return true;
37394
37395 default:
37396 break;
37397 }
37398
37399 return false;
37400 }
37401
37402 /* If we are copying between general and FP registers, we need a memory
37403 location. The same is true for SSE and MMX registers.
37404
37405 To optimize register_move_cost performance, allow inline variant.
37406
37407 The macro can't work reliably when one of the CLASSES is class containing
37408 registers from multiple units (SSE, MMX, integer). We avoid this by never
37409 combining those units in single alternative in the machine description.
37410 Ensure that this constraint holds to avoid unexpected surprises.
37411
37412 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37413 enforce these sanity checks. */
37414
37415 static inline bool
37416 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37417 enum machine_mode mode, int strict)
37418 {
37419 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37420 return false;
37421 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37422 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37423 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37424 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37425 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37426 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37427 {
37428 gcc_assert (!strict || lra_in_progress);
37429 return true;
37430 }
37431
37432 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37433 return true;
37434
37435 /* Between mask and general, we have moves no larger than word size. */
37436 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37437 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37438 return true;
37439
37440 /* ??? This is a lie. We do have moves between mmx/general, and for
37441 mmx/sse2. But by saying we need secondary memory we discourage the
37442 register allocator from using the mmx registers unless needed. */
37443 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37444 return true;
37445
37446 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37447 {
37448 /* SSE1 doesn't have any direct moves from other classes. */
37449 if (!TARGET_SSE2)
37450 return true;
37451
37452 /* If the target says that inter-unit moves are more expensive
37453 than moving through memory, then don't generate them. */
37454 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37455 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37456 return true;
37457
37458 /* Between SSE and general, we have moves no larger than word size. */
37459 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37460 return true;
37461 }
37462
37463 return false;
37464 }
37465
37466 bool
37467 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37468 enum machine_mode mode, int strict)
37469 {
37470 return inline_secondary_memory_needed (class1, class2, mode, strict);
37471 }
37472
37473 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37474
37475 On the 80386, this is the size of MODE in words,
37476 except in the FP regs, where a single reg is always enough. */
37477
37478 static unsigned char
37479 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37480 {
37481 if (MAYBE_INTEGER_CLASS_P (rclass))
37482 {
37483 if (mode == XFmode)
37484 return (TARGET_64BIT ? 2 : 3);
37485 else if (mode == XCmode)
37486 return (TARGET_64BIT ? 4 : 6);
37487 else
37488 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37489 }
37490 else
37491 {
37492 if (COMPLEX_MODE_P (mode))
37493 return 2;
37494 else
37495 return 1;
37496 }
37497 }
37498
37499 /* Return true if the registers in CLASS cannot represent the change from
37500 modes FROM to TO. */
37501
37502 bool
37503 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37504 enum reg_class regclass)
37505 {
37506 if (from == to)
37507 return false;
37508
37509 /* x87 registers can't do subreg at all, as all values are reformatted
37510 to extended precision. */
37511 if (MAYBE_FLOAT_CLASS_P (regclass))
37512 return true;
37513
37514 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37515 {
37516 /* Vector registers do not support QI or HImode loads. If we don't
37517 disallow a change to these modes, reload will assume it's ok to
37518 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37519 the vec_dupv4hi pattern. */
37520 if (GET_MODE_SIZE (from) < 4)
37521 return true;
37522
37523 /* Vector registers do not support subreg with nonzero offsets, which
37524 are otherwise valid for integer registers. Since we can't see
37525 whether we have a nonzero offset from here, prohibit all
37526 nonparadoxical subregs changing size. */
37527 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37528 return true;
37529 }
37530
37531 return false;
37532 }
37533
37534 /* Return the cost of moving data of mode M between a
37535 register and memory. A value of 2 is the default; this cost is
37536 relative to those in `REGISTER_MOVE_COST'.
37537
37538 This function is used extensively by register_move_cost that is used to
37539 build tables at startup. Make it inline in this case.
37540 When IN is 2, return maximum of in and out move cost.
37541
37542 If moving between registers and memory is more expensive than
37543 between two registers, you should define this macro to express the
37544 relative cost.
37545
37546 Model also increased moving costs of QImode registers in non
37547 Q_REGS classes.
37548 */
37549 static inline int
37550 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37551 int in)
37552 {
37553 int cost;
37554 if (FLOAT_CLASS_P (regclass))
37555 {
37556 int index;
37557 switch (mode)
37558 {
37559 case SFmode:
37560 index = 0;
37561 break;
37562 case DFmode:
37563 index = 1;
37564 break;
37565 case XFmode:
37566 index = 2;
37567 break;
37568 default:
37569 return 100;
37570 }
37571 if (in == 2)
37572 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37573 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37574 }
37575 if (SSE_CLASS_P (regclass))
37576 {
37577 int index;
37578 switch (GET_MODE_SIZE (mode))
37579 {
37580 case 4:
37581 index = 0;
37582 break;
37583 case 8:
37584 index = 1;
37585 break;
37586 case 16:
37587 index = 2;
37588 break;
37589 default:
37590 return 100;
37591 }
37592 if (in == 2)
37593 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37594 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37595 }
37596 if (MMX_CLASS_P (regclass))
37597 {
37598 int index;
37599 switch (GET_MODE_SIZE (mode))
37600 {
37601 case 4:
37602 index = 0;
37603 break;
37604 case 8:
37605 index = 1;
37606 break;
37607 default:
37608 return 100;
37609 }
37610 if (in)
37611 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37612 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37613 }
37614 switch (GET_MODE_SIZE (mode))
37615 {
37616 case 1:
37617 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37618 {
37619 if (!in)
37620 return ix86_cost->int_store[0];
37621 if (TARGET_PARTIAL_REG_DEPENDENCY
37622 && optimize_function_for_speed_p (cfun))
37623 cost = ix86_cost->movzbl_load;
37624 else
37625 cost = ix86_cost->int_load[0];
37626 if (in == 2)
37627 return MAX (cost, ix86_cost->int_store[0]);
37628 return cost;
37629 }
37630 else
37631 {
37632 if (in == 2)
37633 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37634 if (in)
37635 return ix86_cost->movzbl_load;
37636 else
37637 return ix86_cost->int_store[0] + 4;
37638 }
37639 break;
37640 case 2:
37641 if (in == 2)
37642 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37643 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37644 default:
37645 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37646 if (mode == TFmode)
37647 mode = XFmode;
37648 if (in == 2)
37649 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37650 else if (in)
37651 cost = ix86_cost->int_load[2];
37652 else
37653 cost = ix86_cost->int_store[2];
37654 return (cost * (((int) GET_MODE_SIZE (mode)
37655 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37656 }
37657 }
37658
37659 static int
37660 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37661 bool in)
37662 {
37663 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37664 }
37665
37666
37667 /* Return the cost of moving data from a register in class CLASS1 to
37668 one in class CLASS2.
37669
37670 It is not required that the cost always equal 2 when FROM is the same as TO;
37671 on some machines it is expensive to move between registers if they are not
37672 general registers. */
37673
37674 static int
37675 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37676 reg_class_t class2_i)
37677 {
37678 enum reg_class class1 = (enum reg_class) class1_i;
37679 enum reg_class class2 = (enum reg_class) class2_i;
37680
37681 /* In case we require secondary memory, compute cost of the store followed
37682 by load. In order to avoid bad register allocation choices, we need
37683 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37684
37685 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37686 {
37687 int cost = 1;
37688
37689 cost += inline_memory_move_cost (mode, class1, 2);
37690 cost += inline_memory_move_cost (mode, class2, 2);
37691
37692 /* In case of copying from general_purpose_register we may emit multiple
37693 stores followed by single load causing memory size mismatch stall.
37694 Count this as arbitrarily high cost of 20. */
37695 if (targetm.class_max_nregs (class1, mode)
37696 > targetm.class_max_nregs (class2, mode))
37697 cost += 20;
37698
37699 /* In the case of FP/MMX moves, the registers actually overlap, and we
37700 have to switch modes in order to treat them differently. */
37701 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37702 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37703 cost += 20;
37704
37705 return cost;
37706 }
37707
37708 /* Moves between SSE/MMX and integer unit are expensive. */
37709 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37710 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37711
37712 /* ??? By keeping returned value relatively high, we limit the number
37713 of moves between integer and MMX/SSE registers for all targets.
37714 Additionally, high value prevents problem with x86_modes_tieable_p(),
37715 where integer modes in MMX/SSE registers are not tieable
37716 because of missing QImode and HImode moves to, from or between
37717 MMX/SSE registers. */
37718 return MAX (8, ix86_cost->mmxsse_to_integer);
37719
37720 if (MAYBE_FLOAT_CLASS_P (class1))
37721 return ix86_cost->fp_move;
37722 if (MAYBE_SSE_CLASS_P (class1))
37723 return ix86_cost->sse_move;
37724 if (MAYBE_MMX_CLASS_P (class1))
37725 return ix86_cost->mmx_move;
37726 return 2;
37727 }
37728
37729 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37730 MODE. */
37731
37732 bool
37733 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37734 {
37735 /* Flags and only flags can only hold CCmode values. */
37736 if (CC_REGNO_P (regno))
37737 return GET_MODE_CLASS (mode) == MODE_CC;
37738 if (GET_MODE_CLASS (mode) == MODE_CC
37739 || GET_MODE_CLASS (mode) == MODE_RANDOM
37740 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37741 return false;
37742 if (STACK_REGNO_P (regno))
37743 return VALID_FP_MODE_P (mode);
37744 if (MASK_REGNO_P (regno))
37745 return (VALID_MASK_REG_MODE (mode)
37746 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37747 if (SSE_REGNO_P (regno))
37748 {
37749 /* We implement the move patterns for all vector modes into and
37750 out of SSE registers, even when no operation instructions
37751 are available. */
37752
37753 /* For AVX-512 we allow, regardless of regno:
37754 - XI mode
37755 - any of 512-bit wide vector mode
37756 - any scalar mode. */
37757 if (TARGET_AVX512F
37758 && (mode == XImode
37759 || VALID_AVX512F_REG_MODE (mode)
37760 || VALID_AVX512F_SCALAR_MODE (mode)))
37761 return true;
37762
37763 /* TODO check for QI/HI scalars. */
37764 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37765 if (TARGET_AVX512VL
37766 && (mode == OImode
37767 || mode == TImode
37768 || VALID_AVX256_REG_MODE (mode)
37769 || VALID_AVX512VL_128_REG_MODE (mode)))
37770 return true;
37771
37772 /* xmm16-xmm31 are only available for AVX-512. */
37773 if (EXT_REX_SSE_REGNO_P (regno))
37774 return false;
37775
37776 /* OImode and AVX modes are available only when AVX is enabled. */
37777 return ((TARGET_AVX
37778 && VALID_AVX256_REG_OR_OI_MODE (mode))
37779 || VALID_SSE_REG_MODE (mode)
37780 || VALID_SSE2_REG_MODE (mode)
37781 || VALID_MMX_REG_MODE (mode)
37782 || VALID_MMX_REG_MODE_3DNOW (mode));
37783 }
37784 if (MMX_REGNO_P (regno))
37785 {
37786 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37787 so if the register is available at all, then we can move data of
37788 the given mode into or out of it. */
37789 return (VALID_MMX_REG_MODE (mode)
37790 || VALID_MMX_REG_MODE_3DNOW (mode));
37791 }
37792
37793 if (mode == QImode)
37794 {
37795 /* Take care for QImode values - they can be in non-QI regs,
37796 but then they do cause partial register stalls. */
37797 if (ANY_QI_REGNO_P (regno))
37798 return true;
37799 if (!TARGET_PARTIAL_REG_STALL)
37800 return true;
37801 /* LRA checks if the hard register is OK for the given mode.
37802 QImode values can live in non-QI regs, so we allow all
37803 registers here. */
37804 if (lra_in_progress)
37805 return true;
37806 return !can_create_pseudo_p ();
37807 }
37808 /* We handle both integer and floats in the general purpose registers. */
37809 else if (VALID_INT_MODE_P (mode))
37810 return true;
37811 else if (VALID_FP_MODE_P (mode))
37812 return true;
37813 else if (VALID_DFP_MODE_P (mode))
37814 return true;
37815 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37816 on to use that value in smaller contexts, this can easily force a
37817 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37818 supporting DImode, allow it. */
37819 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37820 return true;
37821
37822 return false;
37823 }
37824
37825 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37826 tieable integer mode. */
37827
37828 static bool
37829 ix86_tieable_integer_mode_p (enum machine_mode mode)
37830 {
37831 switch (mode)
37832 {
37833 case HImode:
37834 case SImode:
37835 return true;
37836
37837 case QImode:
37838 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37839
37840 case DImode:
37841 return TARGET_64BIT;
37842
37843 default:
37844 return false;
37845 }
37846 }
37847
37848 /* Return true if MODE1 is accessible in a register that can hold MODE2
37849 without copying. That is, all register classes that can hold MODE2
37850 can also hold MODE1. */
37851
37852 bool
37853 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37854 {
37855 if (mode1 == mode2)
37856 return true;
37857
37858 if (ix86_tieable_integer_mode_p (mode1)
37859 && ix86_tieable_integer_mode_p (mode2))
37860 return true;
37861
37862 /* MODE2 being XFmode implies fp stack or general regs, which means we
37863 can tie any smaller floating point modes to it. Note that we do not
37864 tie this with TFmode. */
37865 if (mode2 == XFmode)
37866 return mode1 == SFmode || mode1 == DFmode;
37867
37868 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37869 that we can tie it with SFmode. */
37870 if (mode2 == DFmode)
37871 return mode1 == SFmode;
37872
37873 /* If MODE2 is only appropriate for an SSE register, then tie with
37874 any other mode acceptable to SSE registers. */
37875 if (GET_MODE_SIZE (mode2) == 32
37876 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37877 return (GET_MODE_SIZE (mode1) == 32
37878 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37879 if (GET_MODE_SIZE (mode2) == 16
37880 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37881 return (GET_MODE_SIZE (mode1) == 16
37882 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37883
37884 /* If MODE2 is appropriate for an MMX register, then tie
37885 with any other mode acceptable to MMX registers. */
37886 if (GET_MODE_SIZE (mode2) == 8
37887 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37888 return (GET_MODE_SIZE (mode1) == 8
37889 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37890
37891 return false;
37892 }
37893
37894 /* Return the cost of moving between two registers of mode MODE. */
37895
37896 static int
37897 ix86_set_reg_reg_cost (enum machine_mode mode)
37898 {
37899 unsigned int units = UNITS_PER_WORD;
37900
37901 switch (GET_MODE_CLASS (mode))
37902 {
37903 default:
37904 break;
37905
37906 case MODE_CC:
37907 units = GET_MODE_SIZE (CCmode);
37908 break;
37909
37910 case MODE_FLOAT:
37911 if ((TARGET_SSE && mode == TFmode)
37912 || (TARGET_80387 && mode == XFmode)
37913 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37914 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37915 units = GET_MODE_SIZE (mode);
37916 break;
37917
37918 case MODE_COMPLEX_FLOAT:
37919 if ((TARGET_SSE && mode == TCmode)
37920 || (TARGET_80387 && mode == XCmode)
37921 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37922 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37923 units = GET_MODE_SIZE (mode);
37924 break;
37925
37926 case MODE_VECTOR_INT:
37927 case MODE_VECTOR_FLOAT:
37928 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37929 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37930 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37931 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37932 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37933 units = GET_MODE_SIZE (mode);
37934 }
37935
37936 /* Return the cost of moving between two registers of mode MODE,
37937 assuming that the move will be in pieces of at most UNITS bytes. */
37938 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37939 }
37940
37941 /* Compute a (partial) cost for rtx X. Return true if the complete
37942 cost has been computed, and false if subexpressions should be
37943 scanned. In either case, *TOTAL contains the cost result. */
37944
37945 static bool
37946 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37947 bool speed)
37948 {
37949 rtx mask;
37950 enum rtx_code code = (enum rtx_code) code_i;
37951 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37952 enum machine_mode mode = GET_MODE (x);
37953 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37954
37955 switch (code)
37956 {
37957 case SET:
37958 if (register_operand (SET_DEST (x), VOIDmode)
37959 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37960 {
37961 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37962 return true;
37963 }
37964 return false;
37965
37966 case CONST_INT:
37967 case CONST:
37968 case LABEL_REF:
37969 case SYMBOL_REF:
37970 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37971 *total = 3;
37972 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37973 *total = 2;
37974 else if (flag_pic && SYMBOLIC_CONST (x)
37975 && !(TARGET_64BIT
37976 && (GET_CODE (x) == LABEL_REF
37977 || (GET_CODE (x) == SYMBOL_REF
37978 && SYMBOL_REF_LOCAL_P (x)))))
37979 *total = 1;
37980 else
37981 *total = 0;
37982 return true;
37983
37984 case CONST_DOUBLE:
37985 if (mode == VOIDmode)
37986 {
37987 *total = 0;
37988 return true;
37989 }
37990 switch (standard_80387_constant_p (x))
37991 {
37992 case 1: /* 0.0 */
37993 *total = 1;
37994 return true;
37995 default: /* Other constants */
37996 *total = 2;
37997 return true;
37998 case 0:
37999 case -1:
38000 break;
38001 }
38002 if (SSE_FLOAT_MODE_P (mode))
38003 {
38004 case CONST_VECTOR:
38005 switch (standard_sse_constant_p (x))
38006 {
38007 case 0:
38008 break;
38009 case 1: /* 0: xor eliminates false dependency */
38010 *total = 0;
38011 return true;
38012 default: /* -1: cmp contains false dependency */
38013 *total = 1;
38014 return true;
38015 }
38016 }
38017 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38018 it'll probably end up. Add a penalty for size. */
38019 *total = (COSTS_N_INSNS (1)
38020 + (flag_pic != 0 && !TARGET_64BIT)
38021 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38022 return true;
38023
38024 case ZERO_EXTEND:
38025 /* The zero extensions is often completely free on x86_64, so make
38026 it as cheap as possible. */
38027 if (TARGET_64BIT && mode == DImode
38028 && GET_MODE (XEXP (x, 0)) == SImode)
38029 *total = 1;
38030 else if (TARGET_ZERO_EXTEND_WITH_AND)
38031 *total = cost->add;
38032 else
38033 *total = cost->movzx;
38034 return false;
38035
38036 case SIGN_EXTEND:
38037 *total = cost->movsx;
38038 return false;
38039
38040 case ASHIFT:
38041 if (SCALAR_INT_MODE_P (mode)
38042 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38043 && CONST_INT_P (XEXP (x, 1)))
38044 {
38045 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38046 if (value == 1)
38047 {
38048 *total = cost->add;
38049 return false;
38050 }
38051 if ((value == 2 || value == 3)
38052 && cost->lea <= cost->shift_const)
38053 {
38054 *total = cost->lea;
38055 return false;
38056 }
38057 }
38058 /* FALLTHRU */
38059
38060 case ROTATE:
38061 case ASHIFTRT:
38062 case LSHIFTRT:
38063 case ROTATERT:
38064 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38065 {
38066 /* ??? Should be SSE vector operation cost. */
38067 /* At least for published AMD latencies, this really is the same
38068 as the latency for a simple fpu operation like fabs. */
38069 /* V*QImode is emulated with 1-11 insns. */
38070 if (mode == V16QImode || mode == V32QImode)
38071 {
38072 int count = 11;
38073 if (TARGET_XOP && mode == V16QImode)
38074 {
38075 /* For XOP we use vpshab, which requires a broadcast of the
38076 value to the variable shift insn. For constants this
38077 means a V16Q const in mem; even when we can perform the
38078 shift with one insn set the cost to prefer paddb. */
38079 if (CONSTANT_P (XEXP (x, 1)))
38080 {
38081 *total = (cost->fabs
38082 + rtx_cost (XEXP (x, 0), code, 0, speed)
38083 + (speed ? 2 : COSTS_N_BYTES (16)));
38084 return true;
38085 }
38086 count = 3;
38087 }
38088 else if (TARGET_SSSE3)
38089 count = 7;
38090 *total = cost->fabs * count;
38091 }
38092 else
38093 *total = cost->fabs;
38094 }
38095 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38096 {
38097 if (CONST_INT_P (XEXP (x, 1)))
38098 {
38099 if (INTVAL (XEXP (x, 1)) > 32)
38100 *total = cost->shift_const + COSTS_N_INSNS (2);
38101 else
38102 *total = cost->shift_const * 2;
38103 }
38104 else
38105 {
38106 if (GET_CODE (XEXP (x, 1)) == AND)
38107 *total = cost->shift_var * 2;
38108 else
38109 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38110 }
38111 }
38112 else
38113 {
38114 if (CONST_INT_P (XEXP (x, 1)))
38115 *total = cost->shift_const;
38116 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38117 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38118 {
38119 /* Return the cost after shift-and truncation. */
38120 *total = cost->shift_var;
38121 return true;
38122 }
38123 else
38124 *total = cost->shift_var;
38125 }
38126 return false;
38127
38128 case FMA:
38129 {
38130 rtx sub;
38131
38132 gcc_assert (FLOAT_MODE_P (mode));
38133 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38134
38135 /* ??? SSE scalar/vector cost should be used here. */
38136 /* ??? Bald assumption that fma has the same cost as fmul. */
38137 *total = cost->fmul;
38138 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38139
38140 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38141 sub = XEXP (x, 0);
38142 if (GET_CODE (sub) == NEG)
38143 sub = XEXP (sub, 0);
38144 *total += rtx_cost (sub, FMA, 0, speed);
38145
38146 sub = XEXP (x, 2);
38147 if (GET_CODE (sub) == NEG)
38148 sub = XEXP (sub, 0);
38149 *total += rtx_cost (sub, FMA, 2, speed);
38150 return true;
38151 }
38152
38153 case MULT:
38154 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38155 {
38156 /* ??? SSE scalar cost should be used here. */
38157 *total = cost->fmul;
38158 return false;
38159 }
38160 else if (X87_FLOAT_MODE_P (mode))
38161 {
38162 *total = cost->fmul;
38163 return false;
38164 }
38165 else if (FLOAT_MODE_P (mode))
38166 {
38167 /* ??? SSE vector cost should be used here. */
38168 *total = cost->fmul;
38169 return false;
38170 }
38171 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38172 {
38173 /* V*QImode is emulated with 7-13 insns. */
38174 if (mode == V16QImode || mode == V32QImode)
38175 {
38176 int extra = 11;
38177 if (TARGET_XOP && mode == V16QImode)
38178 extra = 5;
38179 else if (TARGET_SSSE3)
38180 extra = 6;
38181 *total = cost->fmul * 2 + cost->fabs * extra;
38182 }
38183 /* V*DImode is emulated with 5-8 insns. */
38184 else if (mode == V2DImode || mode == V4DImode)
38185 {
38186 if (TARGET_XOP && mode == V2DImode)
38187 *total = cost->fmul * 2 + cost->fabs * 3;
38188 else
38189 *total = cost->fmul * 3 + cost->fabs * 5;
38190 }
38191 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38192 insns, including two PMULUDQ. */
38193 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38194 *total = cost->fmul * 2 + cost->fabs * 5;
38195 else
38196 *total = cost->fmul;
38197 return false;
38198 }
38199 else
38200 {
38201 rtx op0 = XEXP (x, 0);
38202 rtx op1 = XEXP (x, 1);
38203 int nbits;
38204 if (CONST_INT_P (XEXP (x, 1)))
38205 {
38206 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38207 for (nbits = 0; value != 0; value &= value - 1)
38208 nbits++;
38209 }
38210 else
38211 /* This is arbitrary. */
38212 nbits = 7;
38213
38214 /* Compute costs correctly for widening multiplication. */
38215 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38216 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38217 == GET_MODE_SIZE (mode))
38218 {
38219 int is_mulwiden = 0;
38220 enum machine_mode inner_mode = GET_MODE (op0);
38221
38222 if (GET_CODE (op0) == GET_CODE (op1))
38223 is_mulwiden = 1, op1 = XEXP (op1, 0);
38224 else if (CONST_INT_P (op1))
38225 {
38226 if (GET_CODE (op0) == SIGN_EXTEND)
38227 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38228 == INTVAL (op1);
38229 else
38230 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38231 }
38232
38233 if (is_mulwiden)
38234 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38235 }
38236
38237 *total = (cost->mult_init[MODE_INDEX (mode)]
38238 + nbits * cost->mult_bit
38239 + rtx_cost (op0, outer_code, opno, speed)
38240 + rtx_cost (op1, outer_code, opno, speed));
38241
38242 return true;
38243 }
38244
38245 case DIV:
38246 case UDIV:
38247 case MOD:
38248 case UMOD:
38249 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38250 /* ??? SSE cost should be used here. */
38251 *total = cost->fdiv;
38252 else if (X87_FLOAT_MODE_P (mode))
38253 *total = cost->fdiv;
38254 else if (FLOAT_MODE_P (mode))
38255 /* ??? SSE vector cost should be used here. */
38256 *total = cost->fdiv;
38257 else
38258 *total = cost->divide[MODE_INDEX (mode)];
38259 return false;
38260
38261 case PLUS:
38262 if (GET_MODE_CLASS (mode) == MODE_INT
38263 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38264 {
38265 if (GET_CODE (XEXP (x, 0)) == PLUS
38266 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38267 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38268 && CONSTANT_P (XEXP (x, 1)))
38269 {
38270 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38271 if (val == 2 || val == 4 || val == 8)
38272 {
38273 *total = cost->lea;
38274 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38275 outer_code, opno, speed);
38276 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38277 outer_code, opno, speed);
38278 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38279 return true;
38280 }
38281 }
38282 else if (GET_CODE (XEXP (x, 0)) == MULT
38283 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38284 {
38285 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38286 if (val == 2 || val == 4 || val == 8)
38287 {
38288 *total = cost->lea;
38289 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38290 outer_code, opno, speed);
38291 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38292 return true;
38293 }
38294 }
38295 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38296 {
38297 *total = cost->lea;
38298 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38299 outer_code, opno, speed);
38300 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38301 outer_code, opno, speed);
38302 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38303 return true;
38304 }
38305 }
38306 /* FALLTHRU */
38307
38308 case MINUS:
38309 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38310 {
38311 /* ??? SSE cost should be used here. */
38312 *total = cost->fadd;
38313 return false;
38314 }
38315 else if (X87_FLOAT_MODE_P (mode))
38316 {
38317 *total = cost->fadd;
38318 return false;
38319 }
38320 else if (FLOAT_MODE_P (mode))
38321 {
38322 /* ??? SSE vector cost should be used here. */
38323 *total = cost->fadd;
38324 return false;
38325 }
38326 /* FALLTHRU */
38327
38328 case AND:
38329 case IOR:
38330 case XOR:
38331 if (GET_MODE_CLASS (mode) == MODE_INT
38332 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38333 {
38334 *total = (cost->add * 2
38335 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38336 << (GET_MODE (XEXP (x, 0)) != DImode))
38337 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38338 << (GET_MODE (XEXP (x, 1)) != DImode)));
38339 return true;
38340 }
38341 /* FALLTHRU */
38342
38343 case NEG:
38344 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38345 {
38346 /* ??? SSE cost should be used here. */
38347 *total = cost->fchs;
38348 return false;
38349 }
38350 else if (X87_FLOAT_MODE_P (mode))
38351 {
38352 *total = cost->fchs;
38353 return false;
38354 }
38355 else if (FLOAT_MODE_P (mode))
38356 {
38357 /* ??? SSE vector cost should be used here. */
38358 *total = cost->fchs;
38359 return false;
38360 }
38361 /* FALLTHRU */
38362
38363 case NOT:
38364 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38365 {
38366 /* ??? Should be SSE vector operation cost. */
38367 /* At least for published AMD latencies, this really is the same
38368 as the latency for a simple fpu operation like fabs. */
38369 *total = cost->fabs;
38370 }
38371 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38372 *total = cost->add * 2;
38373 else
38374 *total = cost->add;
38375 return false;
38376
38377 case COMPARE:
38378 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38379 && XEXP (XEXP (x, 0), 1) == const1_rtx
38380 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38381 && XEXP (x, 1) == const0_rtx)
38382 {
38383 /* This kind of construct is implemented using test[bwl].
38384 Treat it as if we had an AND. */
38385 *total = (cost->add
38386 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38387 + rtx_cost (const1_rtx, outer_code, opno, speed));
38388 return true;
38389 }
38390 return false;
38391
38392 case FLOAT_EXTEND:
38393 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38394 *total = 0;
38395 return false;
38396
38397 case ABS:
38398 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38399 /* ??? SSE cost should be used here. */
38400 *total = cost->fabs;
38401 else if (X87_FLOAT_MODE_P (mode))
38402 *total = cost->fabs;
38403 else if (FLOAT_MODE_P (mode))
38404 /* ??? SSE vector cost should be used here. */
38405 *total = cost->fabs;
38406 return false;
38407
38408 case SQRT:
38409 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38410 /* ??? SSE cost should be used here. */
38411 *total = cost->fsqrt;
38412 else if (X87_FLOAT_MODE_P (mode))
38413 *total = cost->fsqrt;
38414 else if (FLOAT_MODE_P (mode))
38415 /* ??? SSE vector cost should be used here. */
38416 *total = cost->fsqrt;
38417 return false;
38418
38419 case UNSPEC:
38420 if (XINT (x, 1) == UNSPEC_TP)
38421 *total = 0;
38422 return false;
38423
38424 case VEC_SELECT:
38425 case VEC_CONCAT:
38426 case VEC_DUPLICATE:
38427 /* ??? Assume all of these vector manipulation patterns are
38428 recognizable. In which case they all pretty much have the
38429 same cost. */
38430 *total = cost->fabs;
38431 return true;
38432 case VEC_MERGE:
38433 mask = XEXP (x, 2);
38434 /* This is masked instruction, assume the same cost,
38435 as nonmasked variant. */
38436 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38437 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38438 else
38439 *total = cost->fabs;
38440 return true;
38441
38442 default:
38443 return false;
38444 }
38445 }
38446
38447 #if TARGET_MACHO
38448
38449 static int current_machopic_label_num;
38450
38451 /* Given a symbol name and its associated stub, write out the
38452 definition of the stub. */
38453
38454 void
38455 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38456 {
38457 unsigned int length;
38458 char *binder_name, *symbol_name, lazy_ptr_name[32];
38459 int label = ++current_machopic_label_num;
38460
38461 /* For 64-bit we shouldn't get here. */
38462 gcc_assert (!TARGET_64BIT);
38463
38464 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38465 symb = targetm.strip_name_encoding (symb);
38466
38467 length = strlen (stub);
38468 binder_name = XALLOCAVEC (char, length + 32);
38469 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38470
38471 length = strlen (symb);
38472 symbol_name = XALLOCAVEC (char, length + 32);
38473 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38474
38475 sprintf (lazy_ptr_name, "L%d$lz", label);
38476
38477 if (MACHOPIC_ATT_STUB)
38478 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38479 else if (MACHOPIC_PURE)
38480 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38481 else
38482 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38483
38484 fprintf (file, "%s:\n", stub);
38485 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38486
38487 if (MACHOPIC_ATT_STUB)
38488 {
38489 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38490 }
38491 else if (MACHOPIC_PURE)
38492 {
38493 /* PIC stub. */
38494 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38495 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38496 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38497 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38498 label, lazy_ptr_name, label);
38499 fprintf (file, "\tjmp\t*%%ecx\n");
38500 }
38501 else
38502 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38503
38504 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38505 it needs no stub-binding-helper. */
38506 if (MACHOPIC_ATT_STUB)
38507 return;
38508
38509 fprintf (file, "%s:\n", binder_name);
38510
38511 if (MACHOPIC_PURE)
38512 {
38513 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38514 fprintf (file, "\tpushl\t%%ecx\n");
38515 }
38516 else
38517 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38518
38519 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38520
38521 /* N.B. Keep the correspondence of these
38522 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38523 old-pic/new-pic/non-pic stubs; altering this will break
38524 compatibility with existing dylibs. */
38525 if (MACHOPIC_PURE)
38526 {
38527 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38528 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38529 }
38530 else
38531 /* 16-byte -mdynamic-no-pic stub. */
38532 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38533
38534 fprintf (file, "%s:\n", lazy_ptr_name);
38535 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38536 fprintf (file, ASM_LONG "%s\n", binder_name);
38537 }
38538 #endif /* TARGET_MACHO */
38539
38540 /* Order the registers for register allocator. */
38541
38542 void
38543 x86_order_regs_for_local_alloc (void)
38544 {
38545 int pos = 0;
38546 int i;
38547
38548 /* First allocate the local general purpose registers. */
38549 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38550 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38551 reg_alloc_order [pos++] = i;
38552
38553 /* Global general purpose registers. */
38554 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38555 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38556 reg_alloc_order [pos++] = i;
38557
38558 /* x87 registers come first in case we are doing FP math
38559 using them. */
38560 if (!TARGET_SSE_MATH)
38561 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38562 reg_alloc_order [pos++] = i;
38563
38564 /* SSE registers. */
38565 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38566 reg_alloc_order [pos++] = i;
38567 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38568 reg_alloc_order [pos++] = i;
38569
38570 /* Extended REX SSE registers. */
38571 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38572 reg_alloc_order [pos++] = i;
38573
38574 /* Mask register. */
38575 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38576 reg_alloc_order [pos++] = i;
38577
38578 /* x87 registers. */
38579 if (TARGET_SSE_MATH)
38580 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38581 reg_alloc_order [pos++] = i;
38582
38583 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38584 reg_alloc_order [pos++] = i;
38585
38586 /* Initialize the rest of array as we do not allocate some registers
38587 at all. */
38588 while (pos < FIRST_PSEUDO_REGISTER)
38589 reg_alloc_order [pos++] = 0;
38590 }
38591
38592 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38593 in struct attribute_spec handler. */
38594 static tree
38595 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38596 tree args,
38597 int,
38598 bool *no_add_attrs)
38599 {
38600 if (TREE_CODE (*node) != FUNCTION_TYPE
38601 && TREE_CODE (*node) != METHOD_TYPE
38602 && TREE_CODE (*node) != FIELD_DECL
38603 && TREE_CODE (*node) != TYPE_DECL)
38604 {
38605 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38606 name);
38607 *no_add_attrs = true;
38608 return NULL_TREE;
38609 }
38610 if (TARGET_64BIT)
38611 {
38612 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38613 name);
38614 *no_add_attrs = true;
38615 return NULL_TREE;
38616 }
38617 if (is_attribute_p ("callee_pop_aggregate_return", name))
38618 {
38619 tree cst;
38620
38621 cst = TREE_VALUE (args);
38622 if (TREE_CODE (cst) != INTEGER_CST)
38623 {
38624 warning (OPT_Wattributes,
38625 "%qE attribute requires an integer constant argument",
38626 name);
38627 *no_add_attrs = true;
38628 }
38629 else if (compare_tree_int (cst, 0) != 0
38630 && compare_tree_int (cst, 1) != 0)
38631 {
38632 warning (OPT_Wattributes,
38633 "argument to %qE attribute is neither zero, nor one",
38634 name);
38635 *no_add_attrs = true;
38636 }
38637
38638 return NULL_TREE;
38639 }
38640
38641 return NULL_TREE;
38642 }
38643
38644 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38645 struct attribute_spec.handler. */
38646 static tree
38647 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38648 bool *no_add_attrs)
38649 {
38650 if (TREE_CODE (*node) != FUNCTION_TYPE
38651 && TREE_CODE (*node) != METHOD_TYPE
38652 && TREE_CODE (*node) != FIELD_DECL
38653 && TREE_CODE (*node) != TYPE_DECL)
38654 {
38655 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38656 name);
38657 *no_add_attrs = true;
38658 return NULL_TREE;
38659 }
38660
38661 /* Can combine regparm with all attributes but fastcall. */
38662 if (is_attribute_p ("ms_abi", name))
38663 {
38664 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38665 {
38666 error ("ms_abi and sysv_abi attributes are not compatible");
38667 }
38668
38669 return NULL_TREE;
38670 }
38671 else if (is_attribute_p ("sysv_abi", name))
38672 {
38673 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38674 {
38675 error ("ms_abi and sysv_abi attributes are not compatible");
38676 }
38677
38678 return NULL_TREE;
38679 }
38680
38681 return NULL_TREE;
38682 }
38683
38684 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38685 struct attribute_spec.handler. */
38686 static tree
38687 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38688 bool *no_add_attrs)
38689 {
38690 tree *type = NULL;
38691 if (DECL_P (*node))
38692 {
38693 if (TREE_CODE (*node) == TYPE_DECL)
38694 type = &TREE_TYPE (*node);
38695 }
38696 else
38697 type = node;
38698
38699 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38700 {
38701 warning (OPT_Wattributes, "%qE attribute ignored",
38702 name);
38703 *no_add_attrs = true;
38704 }
38705
38706 else if ((is_attribute_p ("ms_struct", name)
38707 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38708 || ((is_attribute_p ("gcc_struct", name)
38709 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38710 {
38711 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38712 name);
38713 *no_add_attrs = true;
38714 }
38715
38716 return NULL_TREE;
38717 }
38718
38719 static tree
38720 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38721 bool *no_add_attrs)
38722 {
38723 if (TREE_CODE (*node) != FUNCTION_DECL)
38724 {
38725 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38726 name);
38727 *no_add_attrs = true;
38728 }
38729 return NULL_TREE;
38730 }
38731
38732 static bool
38733 ix86_ms_bitfield_layout_p (const_tree record_type)
38734 {
38735 return ((TARGET_MS_BITFIELD_LAYOUT
38736 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38737 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38738 }
38739
38740 /* Returns an expression indicating where the this parameter is
38741 located on entry to the FUNCTION. */
38742
38743 static rtx
38744 x86_this_parameter (tree function)
38745 {
38746 tree type = TREE_TYPE (function);
38747 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38748 int nregs;
38749
38750 if (TARGET_64BIT)
38751 {
38752 const int *parm_regs;
38753
38754 if (ix86_function_type_abi (type) == MS_ABI)
38755 parm_regs = x86_64_ms_abi_int_parameter_registers;
38756 else
38757 parm_regs = x86_64_int_parameter_registers;
38758 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38759 }
38760
38761 nregs = ix86_function_regparm (type, function);
38762
38763 if (nregs > 0 && !stdarg_p (type))
38764 {
38765 int regno;
38766 unsigned int ccvt = ix86_get_callcvt (type);
38767
38768 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38769 regno = aggr ? DX_REG : CX_REG;
38770 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38771 {
38772 regno = CX_REG;
38773 if (aggr)
38774 return gen_rtx_MEM (SImode,
38775 plus_constant (Pmode, stack_pointer_rtx, 4));
38776 }
38777 else
38778 {
38779 regno = AX_REG;
38780 if (aggr)
38781 {
38782 regno = DX_REG;
38783 if (nregs == 1)
38784 return gen_rtx_MEM (SImode,
38785 plus_constant (Pmode,
38786 stack_pointer_rtx, 4));
38787 }
38788 }
38789 return gen_rtx_REG (SImode, regno);
38790 }
38791
38792 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38793 aggr ? 8 : 4));
38794 }
38795
38796 /* Determine whether x86_output_mi_thunk can succeed. */
38797
38798 static bool
38799 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38800 const_tree function)
38801 {
38802 /* 64-bit can handle anything. */
38803 if (TARGET_64BIT)
38804 return true;
38805
38806 /* For 32-bit, everything's fine if we have one free register. */
38807 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38808 return true;
38809
38810 /* Need a free register for vcall_offset. */
38811 if (vcall_offset)
38812 return false;
38813
38814 /* Need a free register for GOT references. */
38815 if (flag_pic && !targetm.binds_local_p (function))
38816 return false;
38817
38818 /* Otherwise ok. */
38819 return true;
38820 }
38821
38822 /* Output the assembler code for a thunk function. THUNK_DECL is the
38823 declaration for the thunk function itself, FUNCTION is the decl for
38824 the target function. DELTA is an immediate constant offset to be
38825 added to THIS. If VCALL_OFFSET is nonzero, the word at
38826 *(*this + vcall_offset) should be added to THIS. */
38827
38828 static void
38829 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38830 HOST_WIDE_INT vcall_offset, tree function)
38831 {
38832 rtx this_param = x86_this_parameter (function);
38833 rtx this_reg, tmp, fnaddr;
38834 unsigned int tmp_regno;
38835 rtx_insn *insn;
38836
38837 if (TARGET_64BIT)
38838 tmp_regno = R10_REG;
38839 else
38840 {
38841 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38842 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38843 tmp_regno = AX_REG;
38844 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38845 tmp_regno = DX_REG;
38846 else
38847 tmp_regno = CX_REG;
38848 }
38849
38850 emit_note (NOTE_INSN_PROLOGUE_END);
38851
38852 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38853 pull it in now and let DELTA benefit. */
38854 if (REG_P (this_param))
38855 this_reg = this_param;
38856 else if (vcall_offset)
38857 {
38858 /* Put the this parameter into %eax. */
38859 this_reg = gen_rtx_REG (Pmode, AX_REG);
38860 emit_move_insn (this_reg, this_param);
38861 }
38862 else
38863 this_reg = NULL_RTX;
38864
38865 /* Adjust the this parameter by a fixed constant. */
38866 if (delta)
38867 {
38868 rtx delta_rtx = GEN_INT (delta);
38869 rtx delta_dst = this_reg ? this_reg : this_param;
38870
38871 if (TARGET_64BIT)
38872 {
38873 if (!x86_64_general_operand (delta_rtx, Pmode))
38874 {
38875 tmp = gen_rtx_REG (Pmode, tmp_regno);
38876 emit_move_insn (tmp, delta_rtx);
38877 delta_rtx = tmp;
38878 }
38879 }
38880
38881 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38882 }
38883
38884 /* Adjust the this parameter by a value stored in the vtable. */
38885 if (vcall_offset)
38886 {
38887 rtx vcall_addr, vcall_mem, this_mem;
38888
38889 tmp = gen_rtx_REG (Pmode, tmp_regno);
38890
38891 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38892 if (Pmode != ptr_mode)
38893 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38894 emit_move_insn (tmp, this_mem);
38895
38896 /* Adjust the this parameter. */
38897 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38898 if (TARGET_64BIT
38899 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38900 {
38901 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38902 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38903 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38904 }
38905
38906 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38907 if (Pmode != ptr_mode)
38908 emit_insn (gen_addsi_1_zext (this_reg,
38909 gen_rtx_REG (ptr_mode,
38910 REGNO (this_reg)),
38911 vcall_mem));
38912 else
38913 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38914 }
38915
38916 /* If necessary, drop THIS back to its stack slot. */
38917 if (this_reg && this_reg != this_param)
38918 emit_move_insn (this_param, this_reg);
38919
38920 fnaddr = XEXP (DECL_RTL (function), 0);
38921 if (TARGET_64BIT)
38922 {
38923 if (!flag_pic || targetm.binds_local_p (function)
38924 || TARGET_PECOFF)
38925 ;
38926 else
38927 {
38928 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38929 tmp = gen_rtx_CONST (Pmode, tmp);
38930 fnaddr = gen_const_mem (Pmode, tmp);
38931 }
38932 }
38933 else
38934 {
38935 if (!flag_pic || targetm.binds_local_p (function))
38936 ;
38937 #if TARGET_MACHO
38938 else if (TARGET_MACHO)
38939 {
38940 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38941 fnaddr = XEXP (fnaddr, 0);
38942 }
38943 #endif /* TARGET_MACHO */
38944 else
38945 {
38946 tmp = gen_rtx_REG (Pmode, CX_REG);
38947 output_set_got (tmp, NULL_RTX);
38948
38949 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38950 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38951 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38952 fnaddr = gen_const_mem (Pmode, fnaddr);
38953 }
38954 }
38955
38956 /* Our sibling call patterns do not allow memories, because we have no
38957 predicate that can distinguish between frame and non-frame memory.
38958 For our purposes here, we can get away with (ab)using a jump pattern,
38959 because we're going to do no optimization. */
38960 if (MEM_P (fnaddr))
38961 {
38962 if (sibcall_insn_operand (fnaddr, word_mode))
38963 {
38964 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38965 tmp = emit_call_insn (tmp);
38966 SIBLING_CALL_P (tmp) = 1;
38967 }
38968 else
38969 emit_jump_insn (gen_indirect_jump (fnaddr));
38970 }
38971 else
38972 {
38973 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38974 fnaddr = legitimize_pic_address (fnaddr,
38975 gen_rtx_REG (Pmode, tmp_regno));
38976
38977 if (!sibcall_insn_operand (fnaddr, word_mode))
38978 {
38979 tmp = gen_rtx_REG (word_mode, tmp_regno);
38980 if (GET_MODE (fnaddr) != word_mode)
38981 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38982 emit_move_insn (tmp, fnaddr);
38983 fnaddr = tmp;
38984 }
38985
38986 tmp = gen_rtx_MEM (QImode, fnaddr);
38987 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38988 tmp = emit_call_insn (tmp);
38989 SIBLING_CALL_P (tmp) = 1;
38990 }
38991 emit_barrier ();
38992
38993 /* Emit just enough of rest_of_compilation to get the insns emitted.
38994 Note that use_thunk calls assemble_start_function et al. */
38995 insn = get_insns ();
38996 shorten_branches (insn);
38997 final_start_function (insn, file, 1);
38998 final (insn, file, 1);
38999 final_end_function ();
39000 }
39001
39002 static void
39003 x86_file_start (void)
39004 {
39005 default_file_start ();
39006 if (TARGET_16BIT)
39007 fputs ("\t.code16gcc\n", asm_out_file);
39008 #if TARGET_MACHO
39009 darwin_file_start ();
39010 #endif
39011 if (X86_FILE_START_VERSION_DIRECTIVE)
39012 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39013 if (X86_FILE_START_FLTUSED)
39014 fputs ("\t.global\t__fltused\n", asm_out_file);
39015 if (ix86_asm_dialect == ASM_INTEL)
39016 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39017 }
39018
39019 int
39020 x86_field_alignment (tree field, int computed)
39021 {
39022 enum machine_mode mode;
39023 tree type = TREE_TYPE (field);
39024
39025 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39026 return computed;
39027 mode = TYPE_MODE (strip_array_types (type));
39028 if (mode == DFmode || mode == DCmode
39029 || GET_MODE_CLASS (mode) == MODE_INT
39030 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39031 return MIN (32, computed);
39032 return computed;
39033 }
39034
39035 /* Output assembler code to FILE to increment profiler label # LABELNO
39036 for profiling a function entry. */
39037 void
39038 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39039 {
39040 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39041 : MCOUNT_NAME);
39042
39043 if (TARGET_64BIT)
39044 {
39045 #ifndef NO_PROFILE_COUNTERS
39046 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39047 #endif
39048
39049 if (!TARGET_PECOFF && flag_pic)
39050 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39051 else
39052 fprintf (file, "\tcall\t%s\n", mcount_name);
39053 }
39054 else if (flag_pic)
39055 {
39056 #ifndef NO_PROFILE_COUNTERS
39057 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39058 LPREFIX, labelno);
39059 #endif
39060 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39061 }
39062 else
39063 {
39064 #ifndef NO_PROFILE_COUNTERS
39065 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39066 LPREFIX, labelno);
39067 #endif
39068 fprintf (file, "\tcall\t%s\n", mcount_name);
39069 }
39070 }
39071
39072 /* We don't have exact information about the insn sizes, but we may assume
39073 quite safely that we are informed about all 1 byte insns and memory
39074 address sizes. This is enough to eliminate unnecessary padding in
39075 99% of cases. */
39076
39077 static int
39078 min_insn_size (rtx insn)
39079 {
39080 int l = 0, len;
39081
39082 if (!INSN_P (insn) || !active_insn_p (insn))
39083 return 0;
39084
39085 /* Discard alignments we've emit and jump instructions. */
39086 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39087 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39088 return 0;
39089
39090 /* Important case - calls are always 5 bytes.
39091 It is common to have many calls in the row. */
39092 if (CALL_P (insn)
39093 && symbolic_reference_mentioned_p (PATTERN (insn))
39094 && !SIBLING_CALL_P (insn))
39095 return 5;
39096 len = get_attr_length (insn);
39097 if (len <= 1)
39098 return 1;
39099
39100 /* For normal instructions we rely on get_attr_length being exact,
39101 with a few exceptions. */
39102 if (!JUMP_P (insn))
39103 {
39104 enum attr_type type = get_attr_type (insn);
39105
39106 switch (type)
39107 {
39108 case TYPE_MULTI:
39109 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39110 || asm_noperands (PATTERN (insn)) >= 0)
39111 return 0;
39112 break;
39113 case TYPE_OTHER:
39114 case TYPE_FCMP:
39115 break;
39116 default:
39117 /* Otherwise trust get_attr_length. */
39118 return len;
39119 }
39120
39121 l = get_attr_length_address (insn);
39122 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39123 l = 4;
39124 }
39125 if (l)
39126 return 1+l;
39127 else
39128 return 2;
39129 }
39130
39131 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39132
39133 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39134 window. */
39135
39136 static void
39137 ix86_avoid_jump_mispredicts (void)
39138 {
39139 rtx_insn *insn, *start = get_insns ();
39140 int nbytes = 0, njumps = 0;
39141 int isjump = 0;
39142
39143 /* Look for all minimal intervals of instructions containing 4 jumps.
39144 The intervals are bounded by START and INSN. NBYTES is the total
39145 size of instructions in the interval including INSN and not including
39146 START. When the NBYTES is smaller than 16 bytes, it is possible
39147 that the end of START and INSN ends up in the same 16byte page.
39148
39149 The smallest offset in the page INSN can start is the case where START
39150 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39151 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39152
39153 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39154 have to, control transfer to label(s) can be performed through other
39155 means, and also we estimate minimum length of all asm stmts as 0. */
39156 for (insn = start; insn; insn = NEXT_INSN (insn))
39157 {
39158 int min_size;
39159
39160 if (LABEL_P (insn))
39161 {
39162 int align = label_to_alignment (insn);
39163 int max_skip = label_to_max_skip (insn);
39164
39165 if (max_skip > 15)
39166 max_skip = 15;
39167 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39168 already in the current 16 byte page, because otherwise
39169 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39170 bytes to reach 16 byte boundary. */
39171 if (align <= 0
39172 || (align <= 3 && max_skip != (1 << align) - 1))
39173 max_skip = 0;
39174 if (dump_file)
39175 fprintf (dump_file, "Label %i with max_skip %i\n",
39176 INSN_UID (insn), max_skip);
39177 if (max_skip)
39178 {
39179 while (nbytes + max_skip >= 16)
39180 {
39181 start = NEXT_INSN (start);
39182 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39183 || CALL_P (start))
39184 njumps--, isjump = 1;
39185 else
39186 isjump = 0;
39187 nbytes -= min_insn_size (start);
39188 }
39189 }
39190 continue;
39191 }
39192
39193 min_size = min_insn_size (insn);
39194 nbytes += min_size;
39195 if (dump_file)
39196 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39197 INSN_UID (insn), min_size);
39198 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39199 || CALL_P (insn))
39200 njumps++;
39201 else
39202 continue;
39203
39204 while (njumps > 3)
39205 {
39206 start = NEXT_INSN (start);
39207 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39208 || CALL_P (start))
39209 njumps--, isjump = 1;
39210 else
39211 isjump = 0;
39212 nbytes -= min_insn_size (start);
39213 }
39214 gcc_assert (njumps >= 0);
39215 if (dump_file)
39216 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39217 INSN_UID (start), INSN_UID (insn), nbytes);
39218
39219 if (njumps == 3 && isjump && nbytes < 16)
39220 {
39221 int padsize = 15 - nbytes + min_insn_size (insn);
39222
39223 if (dump_file)
39224 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39225 INSN_UID (insn), padsize);
39226 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39227 }
39228 }
39229 }
39230 #endif
39231
39232 /* AMD Athlon works faster
39233 when RET is not destination of conditional jump or directly preceded
39234 by other jump instruction. We avoid the penalty by inserting NOP just
39235 before the RET instructions in such cases. */
39236 static void
39237 ix86_pad_returns (void)
39238 {
39239 edge e;
39240 edge_iterator ei;
39241
39242 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39243 {
39244 basic_block bb = e->src;
39245 rtx_insn *ret = BB_END (bb);
39246 rtx_insn *prev;
39247 bool replace = false;
39248
39249 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39250 || optimize_bb_for_size_p (bb))
39251 continue;
39252 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39253 if (active_insn_p (prev) || LABEL_P (prev))
39254 break;
39255 if (prev && LABEL_P (prev))
39256 {
39257 edge e;
39258 edge_iterator ei;
39259
39260 FOR_EACH_EDGE (e, ei, bb->preds)
39261 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39262 && !(e->flags & EDGE_FALLTHRU))
39263 {
39264 replace = true;
39265 break;
39266 }
39267 }
39268 if (!replace)
39269 {
39270 prev = prev_active_insn (ret);
39271 if (prev
39272 && ((JUMP_P (prev) && any_condjump_p (prev))
39273 || CALL_P (prev)))
39274 replace = true;
39275 /* Empty functions get branch mispredict even when
39276 the jump destination is not visible to us. */
39277 if (!prev && !optimize_function_for_size_p (cfun))
39278 replace = true;
39279 }
39280 if (replace)
39281 {
39282 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39283 delete_insn (ret);
39284 }
39285 }
39286 }
39287
39288 /* Count the minimum number of instructions in BB. Return 4 if the
39289 number of instructions >= 4. */
39290
39291 static int
39292 ix86_count_insn_bb (basic_block bb)
39293 {
39294 rtx_insn *insn;
39295 int insn_count = 0;
39296
39297 /* Count number of instructions in this block. Return 4 if the number
39298 of instructions >= 4. */
39299 FOR_BB_INSNS (bb, insn)
39300 {
39301 /* Only happen in exit blocks. */
39302 if (JUMP_P (insn)
39303 && ANY_RETURN_P (PATTERN (insn)))
39304 break;
39305
39306 if (NONDEBUG_INSN_P (insn)
39307 && GET_CODE (PATTERN (insn)) != USE
39308 && GET_CODE (PATTERN (insn)) != CLOBBER)
39309 {
39310 insn_count++;
39311 if (insn_count >= 4)
39312 return insn_count;
39313 }
39314 }
39315
39316 return insn_count;
39317 }
39318
39319
39320 /* Count the minimum number of instructions in code path in BB.
39321 Return 4 if the number of instructions >= 4. */
39322
39323 static int
39324 ix86_count_insn (basic_block bb)
39325 {
39326 edge e;
39327 edge_iterator ei;
39328 int min_prev_count;
39329
39330 /* Only bother counting instructions along paths with no
39331 more than 2 basic blocks between entry and exit. Given
39332 that BB has an edge to exit, determine if a predecessor
39333 of BB has an edge from entry. If so, compute the number
39334 of instructions in the predecessor block. If there
39335 happen to be multiple such blocks, compute the minimum. */
39336 min_prev_count = 4;
39337 FOR_EACH_EDGE (e, ei, bb->preds)
39338 {
39339 edge prev_e;
39340 edge_iterator prev_ei;
39341
39342 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39343 {
39344 min_prev_count = 0;
39345 break;
39346 }
39347 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39348 {
39349 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39350 {
39351 int count = ix86_count_insn_bb (e->src);
39352 if (count < min_prev_count)
39353 min_prev_count = count;
39354 break;
39355 }
39356 }
39357 }
39358
39359 if (min_prev_count < 4)
39360 min_prev_count += ix86_count_insn_bb (bb);
39361
39362 return min_prev_count;
39363 }
39364
39365 /* Pad short function to 4 instructions. */
39366
39367 static void
39368 ix86_pad_short_function (void)
39369 {
39370 edge e;
39371 edge_iterator ei;
39372
39373 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39374 {
39375 rtx_insn *ret = BB_END (e->src);
39376 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39377 {
39378 int insn_count = ix86_count_insn (e->src);
39379
39380 /* Pad short function. */
39381 if (insn_count < 4)
39382 {
39383 rtx_insn *insn = ret;
39384
39385 /* Find epilogue. */
39386 while (insn
39387 && (!NOTE_P (insn)
39388 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39389 insn = PREV_INSN (insn);
39390
39391 if (!insn)
39392 insn = ret;
39393
39394 /* Two NOPs count as one instruction. */
39395 insn_count = 2 * (4 - insn_count);
39396 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39397 }
39398 }
39399 }
39400 }
39401
39402 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39403 the epilogue, the Windows system unwinder will apply epilogue logic and
39404 produce incorrect offsets. This can be avoided by adding a nop between
39405 the last insn that can throw and the first insn of the epilogue. */
39406
39407 static void
39408 ix86_seh_fixup_eh_fallthru (void)
39409 {
39410 edge e;
39411 edge_iterator ei;
39412
39413 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39414 {
39415 rtx_insn *insn, *next;
39416
39417 /* Find the beginning of the epilogue. */
39418 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39419 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39420 break;
39421 if (insn == NULL)
39422 continue;
39423
39424 /* We only care about preceding insns that can throw. */
39425 insn = prev_active_insn (insn);
39426 if (insn == NULL || !can_throw_internal (insn))
39427 continue;
39428
39429 /* Do not separate calls from their debug information. */
39430 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39431 if (NOTE_P (next)
39432 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39433 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39434 insn = next;
39435 else
39436 break;
39437
39438 emit_insn_after (gen_nops (const1_rtx), insn);
39439 }
39440 }
39441
39442 /* Implement machine specific optimizations. We implement padding of returns
39443 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39444 static void
39445 ix86_reorg (void)
39446 {
39447 /* We are freeing block_for_insn in the toplev to keep compatibility
39448 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39449 compute_bb_for_insn ();
39450
39451 if (TARGET_SEH && current_function_has_exception_handlers ())
39452 ix86_seh_fixup_eh_fallthru ();
39453
39454 if (optimize && optimize_function_for_speed_p (cfun))
39455 {
39456 if (TARGET_PAD_SHORT_FUNCTION)
39457 ix86_pad_short_function ();
39458 else if (TARGET_PAD_RETURNS)
39459 ix86_pad_returns ();
39460 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39461 if (TARGET_FOUR_JUMP_LIMIT)
39462 ix86_avoid_jump_mispredicts ();
39463 #endif
39464 }
39465 }
39466
39467 /* Return nonzero when QImode register that must be represented via REX prefix
39468 is used. */
39469 bool
39470 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39471 {
39472 int i;
39473 extract_insn_cached (insn);
39474 for (i = 0; i < recog_data.n_operands; i++)
39475 if (GENERAL_REG_P (recog_data.operand[i])
39476 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39477 return true;
39478 return false;
39479 }
39480
39481 /* Return nonzero when P points to register encoded via REX prefix.
39482 Called via for_each_rtx. */
39483 static int
39484 extended_reg_mentioned_1 (rtx *p, void *)
39485 {
39486 unsigned int regno;
39487 if (!REG_P (*p))
39488 return 0;
39489 regno = REGNO (*p);
39490 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39491 }
39492
39493 /* Return true when INSN mentions register that must be encoded using REX
39494 prefix. */
39495 bool
39496 x86_extended_reg_mentioned_p (rtx insn)
39497 {
39498 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39499 extended_reg_mentioned_1, NULL);
39500 }
39501
39502 /* If profitable, negate (without causing overflow) integer constant
39503 of mode MODE at location LOC. Return true in this case. */
39504 bool
39505 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39506 {
39507 HOST_WIDE_INT val;
39508
39509 if (!CONST_INT_P (*loc))
39510 return false;
39511
39512 switch (mode)
39513 {
39514 case DImode:
39515 /* DImode x86_64 constants must fit in 32 bits. */
39516 gcc_assert (x86_64_immediate_operand (*loc, mode));
39517
39518 mode = SImode;
39519 break;
39520
39521 case SImode:
39522 case HImode:
39523 case QImode:
39524 break;
39525
39526 default:
39527 gcc_unreachable ();
39528 }
39529
39530 /* Avoid overflows. */
39531 if (mode_signbit_p (mode, *loc))
39532 return false;
39533
39534 val = INTVAL (*loc);
39535
39536 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39537 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39538 if ((val < 0 && val != -128)
39539 || val == 128)
39540 {
39541 *loc = GEN_INT (-val);
39542 return true;
39543 }
39544
39545 return false;
39546 }
39547
39548 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39549 optabs would emit if we didn't have TFmode patterns. */
39550
39551 void
39552 x86_emit_floatuns (rtx operands[2])
39553 {
39554 rtx_code_label *neglab, *donelab;
39555 rtx i0, i1, f0, in, out;
39556 enum machine_mode mode, inmode;
39557
39558 inmode = GET_MODE (operands[1]);
39559 gcc_assert (inmode == SImode || inmode == DImode);
39560
39561 out = operands[0];
39562 in = force_reg (inmode, operands[1]);
39563 mode = GET_MODE (out);
39564 neglab = gen_label_rtx ();
39565 donelab = gen_label_rtx ();
39566 f0 = gen_reg_rtx (mode);
39567
39568 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39569
39570 expand_float (out, in, 0);
39571
39572 emit_jump_insn (gen_jump (donelab));
39573 emit_barrier ();
39574
39575 emit_label (neglab);
39576
39577 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39578 1, OPTAB_DIRECT);
39579 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39580 1, OPTAB_DIRECT);
39581 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39582
39583 expand_float (f0, i0, 0);
39584
39585 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39586
39587 emit_label (donelab);
39588 }
39589 \f
39590 /* AVX512F does support 64-byte integer vector operations,
39591 thus the longest vector we are faced with is V64QImode. */
39592 #define MAX_VECT_LEN 64
39593
39594 struct expand_vec_perm_d
39595 {
39596 rtx target, op0, op1;
39597 unsigned char perm[MAX_VECT_LEN];
39598 enum machine_mode vmode;
39599 unsigned char nelt;
39600 bool one_operand_p;
39601 bool testing_p;
39602 };
39603
39604 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39605 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39606 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39607
39608 /* Get a vector mode of the same size as the original but with elements
39609 twice as wide. This is only guaranteed to apply to integral vectors. */
39610
39611 static inline enum machine_mode
39612 get_mode_wider_vector (enum machine_mode o)
39613 {
39614 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39615 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39616 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39617 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39618 return n;
39619 }
39620
39621 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39622 fill target with val via vec_duplicate. */
39623
39624 static bool
39625 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39626 {
39627 bool ok;
39628 rtx_insn *insn;
39629 rtx dup;
39630
39631 /* First attempt to recognize VAL as-is. */
39632 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39633 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39634 if (recog_memoized (insn) < 0)
39635 {
39636 rtx_insn *seq;
39637 /* If that fails, force VAL into a register. */
39638
39639 start_sequence ();
39640 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39641 seq = get_insns ();
39642 end_sequence ();
39643 if (seq)
39644 emit_insn_before (seq, insn);
39645
39646 ok = recog_memoized (insn) >= 0;
39647 gcc_assert (ok);
39648 }
39649 return true;
39650 }
39651
39652 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39653 with all elements equal to VAR. Return true if successful. */
39654
39655 static bool
39656 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39657 rtx target, rtx val)
39658 {
39659 bool ok;
39660
39661 switch (mode)
39662 {
39663 case V2SImode:
39664 case V2SFmode:
39665 if (!mmx_ok)
39666 return false;
39667 /* FALLTHRU */
39668
39669 case V4DFmode:
39670 case V4DImode:
39671 case V8SFmode:
39672 case V8SImode:
39673 case V2DFmode:
39674 case V2DImode:
39675 case V4SFmode:
39676 case V4SImode:
39677 case V16SImode:
39678 case V8DImode:
39679 case V16SFmode:
39680 case V8DFmode:
39681 return ix86_vector_duplicate_value (mode, target, val);
39682
39683 case V4HImode:
39684 if (!mmx_ok)
39685 return false;
39686 if (TARGET_SSE || TARGET_3DNOW_A)
39687 {
39688 rtx x;
39689
39690 val = gen_lowpart (SImode, val);
39691 x = gen_rtx_TRUNCATE (HImode, val);
39692 x = gen_rtx_VEC_DUPLICATE (mode, x);
39693 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39694 return true;
39695 }
39696 goto widen;
39697
39698 case V8QImode:
39699 if (!mmx_ok)
39700 return false;
39701 goto widen;
39702
39703 case V8HImode:
39704 if (TARGET_SSE2)
39705 {
39706 struct expand_vec_perm_d dperm;
39707 rtx tmp1, tmp2;
39708
39709 permute:
39710 memset (&dperm, 0, sizeof (dperm));
39711 dperm.target = target;
39712 dperm.vmode = mode;
39713 dperm.nelt = GET_MODE_NUNITS (mode);
39714 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39715 dperm.one_operand_p = true;
39716
39717 /* Extend to SImode using a paradoxical SUBREG. */
39718 tmp1 = gen_reg_rtx (SImode);
39719 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39720
39721 /* Insert the SImode value as low element of a V4SImode vector. */
39722 tmp2 = gen_reg_rtx (V4SImode);
39723 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39724 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39725
39726 ok = (expand_vec_perm_1 (&dperm)
39727 || expand_vec_perm_broadcast_1 (&dperm));
39728 gcc_assert (ok);
39729 return ok;
39730 }
39731 goto widen;
39732
39733 case V16QImode:
39734 if (TARGET_SSE2)
39735 goto permute;
39736 goto widen;
39737
39738 widen:
39739 /* Replicate the value once into the next wider mode and recurse. */
39740 {
39741 enum machine_mode smode, wsmode, wvmode;
39742 rtx x;
39743
39744 smode = GET_MODE_INNER (mode);
39745 wvmode = get_mode_wider_vector (mode);
39746 wsmode = GET_MODE_INNER (wvmode);
39747
39748 val = convert_modes (wsmode, smode, val, true);
39749 x = expand_simple_binop (wsmode, ASHIFT, val,
39750 GEN_INT (GET_MODE_BITSIZE (smode)),
39751 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39752 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39753
39754 x = gen_reg_rtx (wvmode);
39755 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39756 gcc_assert (ok);
39757 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39758 return ok;
39759 }
39760
39761 case V16HImode:
39762 case V32QImode:
39763 {
39764 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39765 rtx x = gen_reg_rtx (hvmode);
39766
39767 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39768 gcc_assert (ok);
39769
39770 x = gen_rtx_VEC_CONCAT (mode, x, x);
39771 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39772 }
39773 return true;
39774
39775 default:
39776 return false;
39777 }
39778 }
39779
39780 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39781 whose ONE_VAR element is VAR, and other elements are zero. Return true
39782 if successful. */
39783
39784 static bool
39785 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39786 rtx target, rtx var, int one_var)
39787 {
39788 enum machine_mode vsimode;
39789 rtx new_target;
39790 rtx x, tmp;
39791 bool use_vector_set = false;
39792
39793 switch (mode)
39794 {
39795 case V2DImode:
39796 /* For SSE4.1, we normally use vector set. But if the second
39797 element is zero and inter-unit moves are OK, we use movq
39798 instead. */
39799 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39800 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39801 && one_var == 0));
39802 break;
39803 case V16QImode:
39804 case V4SImode:
39805 case V4SFmode:
39806 use_vector_set = TARGET_SSE4_1;
39807 break;
39808 case V8HImode:
39809 use_vector_set = TARGET_SSE2;
39810 break;
39811 case V4HImode:
39812 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39813 break;
39814 case V32QImode:
39815 case V16HImode:
39816 case V8SImode:
39817 case V8SFmode:
39818 case V4DFmode:
39819 use_vector_set = TARGET_AVX;
39820 break;
39821 case V4DImode:
39822 /* Use ix86_expand_vector_set in 64bit mode only. */
39823 use_vector_set = TARGET_AVX && TARGET_64BIT;
39824 break;
39825 default:
39826 break;
39827 }
39828
39829 if (use_vector_set)
39830 {
39831 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39832 var = force_reg (GET_MODE_INNER (mode), var);
39833 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39834 return true;
39835 }
39836
39837 switch (mode)
39838 {
39839 case V2SFmode:
39840 case V2SImode:
39841 if (!mmx_ok)
39842 return false;
39843 /* FALLTHRU */
39844
39845 case V2DFmode:
39846 case V2DImode:
39847 if (one_var != 0)
39848 return false;
39849 var = force_reg (GET_MODE_INNER (mode), var);
39850 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39851 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39852 return true;
39853
39854 case V4SFmode:
39855 case V4SImode:
39856 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39857 new_target = gen_reg_rtx (mode);
39858 else
39859 new_target = target;
39860 var = force_reg (GET_MODE_INNER (mode), var);
39861 x = gen_rtx_VEC_DUPLICATE (mode, var);
39862 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39863 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39864 if (one_var != 0)
39865 {
39866 /* We need to shuffle the value to the correct position, so
39867 create a new pseudo to store the intermediate result. */
39868
39869 /* With SSE2, we can use the integer shuffle insns. */
39870 if (mode != V4SFmode && TARGET_SSE2)
39871 {
39872 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39873 const1_rtx,
39874 GEN_INT (one_var == 1 ? 0 : 1),
39875 GEN_INT (one_var == 2 ? 0 : 1),
39876 GEN_INT (one_var == 3 ? 0 : 1)));
39877 if (target != new_target)
39878 emit_move_insn (target, new_target);
39879 return true;
39880 }
39881
39882 /* Otherwise convert the intermediate result to V4SFmode and
39883 use the SSE1 shuffle instructions. */
39884 if (mode != V4SFmode)
39885 {
39886 tmp = gen_reg_rtx (V4SFmode);
39887 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39888 }
39889 else
39890 tmp = new_target;
39891
39892 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39893 const1_rtx,
39894 GEN_INT (one_var == 1 ? 0 : 1),
39895 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39896 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39897
39898 if (mode != V4SFmode)
39899 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39900 else if (tmp != target)
39901 emit_move_insn (target, tmp);
39902 }
39903 else if (target != new_target)
39904 emit_move_insn (target, new_target);
39905 return true;
39906
39907 case V8HImode:
39908 case V16QImode:
39909 vsimode = V4SImode;
39910 goto widen;
39911 case V4HImode:
39912 case V8QImode:
39913 if (!mmx_ok)
39914 return false;
39915 vsimode = V2SImode;
39916 goto widen;
39917 widen:
39918 if (one_var != 0)
39919 return false;
39920
39921 /* Zero extend the variable element to SImode and recurse. */
39922 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39923
39924 x = gen_reg_rtx (vsimode);
39925 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39926 var, one_var))
39927 gcc_unreachable ();
39928
39929 emit_move_insn (target, gen_lowpart (mode, x));
39930 return true;
39931
39932 default:
39933 return false;
39934 }
39935 }
39936
39937 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39938 consisting of the values in VALS. It is known that all elements
39939 except ONE_VAR are constants. Return true if successful. */
39940
39941 static bool
39942 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39943 rtx target, rtx vals, int one_var)
39944 {
39945 rtx var = XVECEXP (vals, 0, one_var);
39946 enum machine_mode wmode;
39947 rtx const_vec, x;
39948
39949 const_vec = copy_rtx (vals);
39950 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39951 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39952
39953 switch (mode)
39954 {
39955 case V2DFmode:
39956 case V2DImode:
39957 case V2SFmode:
39958 case V2SImode:
39959 /* For the two element vectors, it's just as easy to use
39960 the general case. */
39961 return false;
39962
39963 case V4DImode:
39964 /* Use ix86_expand_vector_set in 64bit mode only. */
39965 if (!TARGET_64BIT)
39966 return false;
39967 case V4DFmode:
39968 case V8SFmode:
39969 case V8SImode:
39970 case V16HImode:
39971 case V32QImode:
39972 case V4SFmode:
39973 case V4SImode:
39974 case V8HImode:
39975 case V4HImode:
39976 break;
39977
39978 case V16QImode:
39979 if (TARGET_SSE4_1)
39980 break;
39981 wmode = V8HImode;
39982 goto widen;
39983 case V8QImode:
39984 wmode = V4HImode;
39985 goto widen;
39986 widen:
39987 /* There's no way to set one QImode entry easily. Combine
39988 the variable value with its adjacent constant value, and
39989 promote to an HImode set. */
39990 x = XVECEXP (vals, 0, one_var ^ 1);
39991 if (one_var & 1)
39992 {
39993 var = convert_modes (HImode, QImode, var, true);
39994 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39995 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39996 x = GEN_INT (INTVAL (x) & 0xff);
39997 }
39998 else
39999 {
40000 var = convert_modes (HImode, QImode, var, true);
40001 x = gen_int_mode (INTVAL (x) << 8, HImode);
40002 }
40003 if (x != const0_rtx)
40004 var = expand_simple_binop (HImode, IOR, var, x, var,
40005 1, OPTAB_LIB_WIDEN);
40006
40007 x = gen_reg_rtx (wmode);
40008 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40009 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40010
40011 emit_move_insn (target, gen_lowpart (mode, x));
40012 return true;
40013
40014 default:
40015 return false;
40016 }
40017
40018 emit_move_insn (target, const_vec);
40019 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40020 return true;
40021 }
40022
40023 /* A subroutine of ix86_expand_vector_init_general. Use vector
40024 concatenate to handle the most general case: all values variable,
40025 and none identical. */
40026
40027 static void
40028 ix86_expand_vector_init_concat (enum machine_mode mode,
40029 rtx target, rtx *ops, int n)
40030 {
40031 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40032 rtx first[16], second[8], third[4];
40033 rtvec v;
40034 int i, j;
40035
40036 switch (n)
40037 {
40038 case 2:
40039 switch (mode)
40040 {
40041 case V16SImode:
40042 cmode = V8SImode;
40043 break;
40044 case V16SFmode:
40045 cmode = V8SFmode;
40046 break;
40047 case V8DImode:
40048 cmode = V4DImode;
40049 break;
40050 case V8DFmode:
40051 cmode = V4DFmode;
40052 break;
40053 case V8SImode:
40054 cmode = V4SImode;
40055 break;
40056 case V8SFmode:
40057 cmode = V4SFmode;
40058 break;
40059 case V4DImode:
40060 cmode = V2DImode;
40061 break;
40062 case V4DFmode:
40063 cmode = V2DFmode;
40064 break;
40065 case V4SImode:
40066 cmode = V2SImode;
40067 break;
40068 case V4SFmode:
40069 cmode = V2SFmode;
40070 break;
40071 case V2DImode:
40072 cmode = DImode;
40073 break;
40074 case V2SImode:
40075 cmode = SImode;
40076 break;
40077 case V2DFmode:
40078 cmode = DFmode;
40079 break;
40080 case V2SFmode:
40081 cmode = SFmode;
40082 break;
40083 default:
40084 gcc_unreachable ();
40085 }
40086
40087 if (!register_operand (ops[1], cmode))
40088 ops[1] = force_reg (cmode, ops[1]);
40089 if (!register_operand (ops[0], cmode))
40090 ops[0] = force_reg (cmode, ops[0]);
40091 emit_insn (gen_rtx_SET (VOIDmode, target,
40092 gen_rtx_VEC_CONCAT (mode, ops[0],
40093 ops[1])));
40094 break;
40095
40096 case 4:
40097 switch (mode)
40098 {
40099 case V4DImode:
40100 cmode = V2DImode;
40101 break;
40102 case V4DFmode:
40103 cmode = V2DFmode;
40104 break;
40105 case V4SImode:
40106 cmode = V2SImode;
40107 break;
40108 case V4SFmode:
40109 cmode = V2SFmode;
40110 break;
40111 default:
40112 gcc_unreachable ();
40113 }
40114 goto half;
40115
40116 case 8:
40117 switch (mode)
40118 {
40119 case V8DImode:
40120 cmode = V2DImode;
40121 hmode = V4DImode;
40122 break;
40123 case V8DFmode:
40124 cmode = V2DFmode;
40125 hmode = V4DFmode;
40126 break;
40127 case V8SImode:
40128 cmode = V2SImode;
40129 hmode = V4SImode;
40130 break;
40131 case V8SFmode:
40132 cmode = V2SFmode;
40133 hmode = V4SFmode;
40134 break;
40135 default:
40136 gcc_unreachable ();
40137 }
40138 goto half;
40139
40140 case 16:
40141 switch (mode)
40142 {
40143 case V16SImode:
40144 cmode = V2SImode;
40145 hmode = V4SImode;
40146 gmode = V8SImode;
40147 break;
40148 case V16SFmode:
40149 cmode = V2SFmode;
40150 hmode = V4SFmode;
40151 gmode = V8SFmode;
40152 break;
40153 default:
40154 gcc_unreachable ();
40155 }
40156 goto half;
40157
40158 half:
40159 /* FIXME: We process inputs backward to help RA. PR 36222. */
40160 i = n - 1;
40161 j = (n >> 1) - 1;
40162 for (; i > 0; i -= 2, j--)
40163 {
40164 first[j] = gen_reg_rtx (cmode);
40165 v = gen_rtvec (2, ops[i - 1], ops[i]);
40166 ix86_expand_vector_init (false, first[j],
40167 gen_rtx_PARALLEL (cmode, v));
40168 }
40169
40170 n >>= 1;
40171 if (n > 4)
40172 {
40173 gcc_assert (hmode != VOIDmode);
40174 gcc_assert (gmode != VOIDmode);
40175 for (i = j = 0; i < n; i += 2, j++)
40176 {
40177 second[j] = gen_reg_rtx (hmode);
40178 ix86_expand_vector_init_concat (hmode, second [j],
40179 &first [i], 2);
40180 }
40181 n >>= 1;
40182 for (i = j = 0; i < n; i += 2, j++)
40183 {
40184 third[j] = gen_reg_rtx (gmode);
40185 ix86_expand_vector_init_concat (gmode, third[j],
40186 &second[i], 2);
40187 }
40188 n >>= 1;
40189 ix86_expand_vector_init_concat (mode, target, third, n);
40190 }
40191 else if (n > 2)
40192 {
40193 gcc_assert (hmode != VOIDmode);
40194 for (i = j = 0; i < n; i += 2, j++)
40195 {
40196 second[j] = gen_reg_rtx (hmode);
40197 ix86_expand_vector_init_concat (hmode, second [j],
40198 &first [i], 2);
40199 }
40200 n >>= 1;
40201 ix86_expand_vector_init_concat (mode, target, second, n);
40202 }
40203 else
40204 ix86_expand_vector_init_concat (mode, target, first, n);
40205 break;
40206
40207 default:
40208 gcc_unreachable ();
40209 }
40210 }
40211
40212 /* A subroutine of ix86_expand_vector_init_general. Use vector
40213 interleave to handle the most general case: all values variable,
40214 and none identical. */
40215
40216 static void
40217 ix86_expand_vector_init_interleave (enum machine_mode mode,
40218 rtx target, rtx *ops, int n)
40219 {
40220 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40221 int i, j;
40222 rtx op0, op1;
40223 rtx (*gen_load_even) (rtx, rtx, rtx);
40224 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40225 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40226
40227 switch (mode)
40228 {
40229 case V8HImode:
40230 gen_load_even = gen_vec_setv8hi;
40231 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40232 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40233 inner_mode = HImode;
40234 first_imode = V4SImode;
40235 second_imode = V2DImode;
40236 third_imode = VOIDmode;
40237 break;
40238 case V16QImode:
40239 gen_load_even = gen_vec_setv16qi;
40240 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40241 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40242 inner_mode = QImode;
40243 first_imode = V8HImode;
40244 second_imode = V4SImode;
40245 third_imode = V2DImode;
40246 break;
40247 default:
40248 gcc_unreachable ();
40249 }
40250
40251 for (i = 0; i < n; i++)
40252 {
40253 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40254 op0 = gen_reg_rtx (SImode);
40255 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40256
40257 /* Insert the SImode value as low element of V4SImode vector. */
40258 op1 = gen_reg_rtx (V4SImode);
40259 op0 = gen_rtx_VEC_MERGE (V4SImode,
40260 gen_rtx_VEC_DUPLICATE (V4SImode,
40261 op0),
40262 CONST0_RTX (V4SImode),
40263 const1_rtx);
40264 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40265
40266 /* Cast the V4SImode vector back to a vector in orignal mode. */
40267 op0 = gen_reg_rtx (mode);
40268 emit_move_insn (op0, gen_lowpart (mode, op1));
40269
40270 /* Load even elements into the second position. */
40271 emit_insn (gen_load_even (op0,
40272 force_reg (inner_mode,
40273 ops [i + i + 1]),
40274 const1_rtx));
40275
40276 /* Cast vector to FIRST_IMODE vector. */
40277 ops[i] = gen_reg_rtx (first_imode);
40278 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40279 }
40280
40281 /* Interleave low FIRST_IMODE vectors. */
40282 for (i = j = 0; i < n; i += 2, j++)
40283 {
40284 op0 = gen_reg_rtx (first_imode);
40285 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40286
40287 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40288 ops[j] = gen_reg_rtx (second_imode);
40289 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40290 }
40291
40292 /* Interleave low SECOND_IMODE vectors. */
40293 switch (second_imode)
40294 {
40295 case V4SImode:
40296 for (i = j = 0; i < n / 2; i += 2, j++)
40297 {
40298 op0 = gen_reg_rtx (second_imode);
40299 emit_insn (gen_interleave_second_low (op0, ops[i],
40300 ops[i + 1]));
40301
40302 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40303 vector. */
40304 ops[j] = gen_reg_rtx (third_imode);
40305 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40306 }
40307 second_imode = V2DImode;
40308 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40309 /* FALLTHRU */
40310
40311 case V2DImode:
40312 op0 = gen_reg_rtx (second_imode);
40313 emit_insn (gen_interleave_second_low (op0, ops[0],
40314 ops[1]));
40315
40316 /* Cast the SECOND_IMODE vector back to a vector on original
40317 mode. */
40318 emit_insn (gen_rtx_SET (VOIDmode, target,
40319 gen_lowpart (mode, op0)));
40320 break;
40321
40322 default:
40323 gcc_unreachable ();
40324 }
40325 }
40326
40327 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40328 all values variable, and none identical. */
40329
40330 static void
40331 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40332 rtx target, rtx vals)
40333 {
40334 rtx ops[64], op0, op1;
40335 enum machine_mode half_mode = VOIDmode;
40336 int n, i;
40337
40338 switch (mode)
40339 {
40340 case V2SFmode:
40341 case V2SImode:
40342 if (!mmx_ok && !TARGET_SSE)
40343 break;
40344 /* FALLTHRU */
40345
40346 case V16SImode:
40347 case V16SFmode:
40348 case V8DFmode:
40349 case V8DImode:
40350 case V8SFmode:
40351 case V8SImode:
40352 case V4DFmode:
40353 case V4DImode:
40354 case V4SFmode:
40355 case V4SImode:
40356 case V2DFmode:
40357 case V2DImode:
40358 n = GET_MODE_NUNITS (mode);
40359 for (i = 0; i < n; i++)
40360 ops[i] = XVECEXP (vals, 0, i);
40361 ix86_expand_vector_init_concat (mode, target, ops, n);
40362 return;
40363
40364 case V32QImode:
40365 half_mode = V16QImode;
40366 goto half;
40367
40368 case V16HImode:
40369 half_mode = V8HImode;
40370 goto half;
40371
40372 half:
40373 n = GET_MODE_NUNITS (mode);
40374 for (i = 0; i < n; i++)
40375 ops[i] = XVECEXP (vals, 0, i);
40376 op0 = gen_reg_rtx (half_mode);
40377 op1 = gen_reg_rtx (half_mode);
40378 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40379 n >> 2);
40380 ix86_expand_vector_init_interleave (half_mode, op1,
40381 &ops [n >> 1], n >> 2);
40382 emit_insn (gen_rtx_SET (VOIDmode, target,
40383 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40384 return;
40385
40386 case V16QImode:
40387 if (!TARGET_SSE4_1)
40388 break;
40389 /* FALLTHRU */
40390
40391 case V8HImode:
40392 if (!TARGET_SSE2)
40393 break;
40394
40395 /* Don't use ix86_expand_vector_init_interleave if we can't
40396 move from GPR to SSE register directly. */
40397 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40398 break;
40399
40400 n = GET_MODE_NUNITS (mode);
40401 for (i = 0; i < n; i++)
40402 ops[i] = XVECEXP (vals, 0, i);
40403 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40404 return;
40405
40406 case V4HImode:
40407 case V8QImode:
40408 break;
40409
40410 default:
40411 gcc_unreachable ();
40412 }
40413
40414 {
40415 int i, j, n_elts, n_words, n_elt_per_word;
40416 enum machine_mode inner_mode;
40417 rtx words[4], shift;
40418
40419 inner_mode = GET_MODE_INNER (mode);
40420 n_elts = GET_MODE_NUNITS (mode);
40421 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40422 n_elt_per_word = n_elts / n_words;
40423 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40424
40425 for (i = 0; i < n_words; ++i)
40426 {
40427 rtx word = NULL_RTX;
40428
40429 for (j = 0; j < n_elt_per_word; ++j)
40430 {
40431 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40432 elt = convert_modes (word_mode, inner_mode, elt, true);
40433
40434 if (j == 0)
40435 word = elt;
40436 else
40437 {
40438 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40439 word, 1, OPTAB_LIB_WIDEN);
40440 word = expand_simple_binop (word_mode, IOR, word, elt,
40441 word, 1, OPTAB_LIB_WIDEN);
40442 }
40443 }
40444
40445 words[i] = word;
40446 }
40447
40448 if (n_words == 1)
40449 emit_move_insn (target, gen_lowpart (mode, words[0]));
40450 else if (n_words == 2)
40451 {
40452 rtx tmp = gen_reg_rtx (mode);
40453 emit_clobber (tmp);
40454 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40455 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40456 emit_move_insn (target, tmp);
40457 }
40458 else if (n_words == 4)
40459 {
40460 rtx tmp = gen_reg_rtx (V4SImode);
40461 gcc_assert (word_mode == SImode);
40462 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40463 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40464 emit_move_insn (target, gen_lowpart (mode, tmp));
40465 }
40466 else
40467 gcc_unreachable ();
40468 }
40469 }
40470
40471 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40472 instructions unless MMX_OK is true. */
40473
40474 void
40475 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40476 {
40477 enum machine_mode mode = GET_MODE (target);
40478 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40479 int n_elts = GET_MODE_NUNITS (mode);
40480 int n_var = 0, one_var = -1;
40481 bool all_same = true, all_const_zero = true;
40482 int i;
40483 rtx x;
40484
40485 for (i = 0; i < n_elts; ++i)
40486 {
40487 x = XVECEXP (vals, 0, i);
40488 if (!(CONST_INT_P (x)
40489 || GET_CODE (x) == CONST_DOUBLE
40490 || GET_CODE (x) == CONST_FIXED))
40491 n_var++, one_var = i;
40492 else if (x != CONST0_RTX (inner_mode))
40493 all_const_zero = false;
40494 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40495 all_same = false;
40496 }
40497
40498 /* Constants are best loaded from the constant pool. */
40499 if (n_var == 0)
40500 {
40501 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40502 return;
40503 }
40504
40505 /* If all values are identical, broadcast the value. */
40506 if (all_same
40507 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40508 XVECEXP (vals, 0, 0)))
40509 return;
40510
40511 /* Values where only one field is non-constant are best loaded from
40512 the pool and overwritten via move later. */
40513 if (n_var == 1)
40514 {
40515 if (all_const_zero
40516 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40517 XVECEXP (vals, 0, one_var),
40518 one_var))
40519 return;
40520
40521 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40522 return;
40523 }
40524
40525 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40526 }
40527
40528 void
40529 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40530 {
40531 enum machine_mode mode = GET_MODE (target);
40532 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40533 enum machine_mode half_mode;
40534 bool use_vec_merge = false;
40535 rtx tmp;
40536 static rtx (*gen_extract[6][2]) (rtx, rtx)
40537 = {
40538 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40539 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40540 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40541 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40542 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40543 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40544 };
40545 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40546 = {
40547 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40548 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40549 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40550 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40551 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40552 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40553 };
40554 int i, j, n;
40555
40556 switch (mode)
40557 {
40558 case V2SFmode:
40559 case V2SImode:
40560 if (mmx_ok)
40561 {
40562 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40563 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40564 if (elt == 0)
40565 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40566 else
40567 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40568 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40569 return;
40570 }
40571 break;
40572
40573 case V2DImode:
40574 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40575 if (use_vec_merge)
40576 break;
40577
40578 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40579 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40580 if (elt == 0)
40581 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40582 else
40583 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40584 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40585 return;
40586
40587 case V2DFmode:
40588 {
40589 rtx op0, op1;
40590
40591 /* For the two element vectors, we implement a VEC_CONCAT with
40592 the extraction of the other element. */
40593
40594 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40595 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40596
40597 if (elt == 0)
40598 op0 = val, op1 = tmp;
40599 else
40600 op0 = tmp, op1 = val;
40601
40602 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40603 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40604 }
40605 return;
40606
40607 case V4SFmode:
40608 use_vec_merge = TARGET_SSE4_1;
40609 if (use_vec_merge)
40610 break;
40611
40612 switch (elt)
40613 {
40614 case 0:
40615 use_vec_merge = true;
40616 break;
40617
40618 case 1:
40619 /* tmp = target = A B C D */
40620 tmp = copy_to_reg (target);
40621 /* target = A A B B */
40622 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40623 /* target = X A B B */
40624 ix86_expand_vector_set (false, target, val, 0);
40625 /* target = A X C D */
40626 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40627 const1_rtx, const0_rtx,
40628 GEN_INT (2+4), GEN_INT (3+4)));
40629 return;
40630
40631 case 2:
40632 /* tmp = target = A B C D */
40633 tmp = copy_to_reg (target);
40634 /* tmp = X B C D */
40635 ix86_expand_vector_set (false, tmp, val, 0);
40636 /* target = A B X D */
40637 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40638 const0_rtx, const1_rtx,
40639 GEN_INT (0+4), GEN_INT (3+4)));
40640 return;
40641
40642 case 3:
40643 /* tmp = target = A B C D */
40644 tmp = copy_to_reg (target);
40645 /* tmp = X B C D */
40646 ix86_expand_vector_set (false, tmp, val, 0);
40647 /* target = A B X D */
40648 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40649 const0_rtx, const1_rtx,
40650 GEN_INT (2+4), GEN_INT (0+4)));
40651 return;
40652
40653 default:
40654 gcc_unreachable ();
40655 }
40656 break;
40657
40658 case V4SImode:
40659 use_vec_merge = TARGET_SSE4_1;
40660 if (use_vec_merge)
40661 break;
40662
40663 /* Element 0 handled by vec_merge below. */
40664 if (elt == 0)
40665 {
40666 use_vec_merge = true;
40667 break;
40668 }
40669
40670 if (TARGET_SSE2)
40671 {
40672 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40673 store into element 0, then shuffle them back. */
40674
40675 rtx order[4];
40676
40677 order[0] = GEN_INT (elt);
40678 order[1] = const1_rtx;
40679 order[2] = const2_rtx;
40680 order[3] = GEN_INT (3);
40681 order[elt] = const0_rtx;
40682
40683 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40684 order[1], order[2], order[3]));
40685
40686 ix86_expand_vector_set (false, target, val, 0);
40687
40688 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40689 order[1], order[2], order[3]));
40690 }
40691 else
40692 {
40693 /* For SSE1, we have to reuse the V4SF code. */
40694 rtx t = gen_reg_rtx (V4SFmode);
40695 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40696 emit_move_insn (target, gen_lowpart (mode, t));
40697 }
40698 return;
40699
40700 case V8HImode:
40701 use_vec_merge = TARGET_SSE2;
40702 break;
40703 case V4HImode:
40704 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40705 break;
40706
40707 case V16QImode:
40708 use_vec_merge = TARGET_SSE4_1;
40709 break;
40710
40711 case V8QImode:
40712 break;
40713
40714 case V32QImode:
40715 half_mode = V16QImode;
40716 j = 0;
40717 n = 16;
40718 goto half;
40719
40720 case V16HImode:
40721 half_mode = V8HImode;
40722 j = 1;
40723 n = 8;
40724 goto half;
40725
40726 case V8SImode:
40727 half_mode = V4SImode;
40728 j = 2;
40729 n = 4;
40730 goto half;
40731
40732 case V4DImode:
40733 half_mode = V2DImode;
40734 j = 3;
40735 n = 2;
40736 goto half;
40737
40738 case V8SFmode:
40739 half_mode = V4SFmode;
40740 j = 4;
40741 n = 4;
40742 goto half;
40743
40744 case V4DFmode:
40745 half_mode = V2DFmode;
40746 j = 5;
40747 n = 2;
40748 goto half;
40749
40750 half:
40751 /* Compute offset. */
40752 i = elt / n;
40753 elt %= n;
40754
40755 gcc_assert (i <= 1);
40756
40757 /* Extract the half. */
40758 tmp = gen_reg_rtx (half_mode);
40759 emit_insn (gen_extract[j][i] (tmp, target));
40760
40761 /* Put val in tmp at elt. */
40762 ix86_expand_vector_set (false, tmp, val, elt);
40763
40764 /* Put it back. */
40765 emit_insn (gen_insert[j][i] (target, target, tmp));
40766 return;
40767
40768 default:
40769 break;
40770 }
40771
40772 if (use_vec_merge)
40773 {
40774 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40775 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40776 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40777 }
40778 else
40779 {
40780 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40781
40782 emit_move_insn (mem, target);
40783
40784 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40785 emit_move_insn (tmp, val);
40786
40787 emit_move_insn (target, mem);
40788 }
40789 }
40790
40791 void
40792 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40793 {
40794 enum machine_mode mode = GET_MODE (vec);
40795 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40796 bool use_vec_extr = false;
40797 rtx tmp;
40798
40799 switch (mode)
40800 {
40801 case V2SImode:
40802 case V2SFmode:
40803 if (!mmx_ok)
40804 break;
40805 /* FALLTHRU */
40806
40807 case V2DFmode:
40808 case V2DImode:
40809 use_vec_extr = true;
40810 break;
40811
40812 case V4SFmode:
40813 use_vec_extr = TARGET_SSE4_1;
40814 if (use_vec_extr)
40815 break;
40816
40817 switch (elt)
40818 {
40819 case 0:
40820 tmp = vec;
40821 break;
40822
40823 case 1:
40824 case 3:
40825 tmp = gen_reg_rtx (mode);
40826 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40827 GEN_INT (elt), GEN_INT (elt),
40828 GEN_INT (elt+4), GEN_INT (elt+4)));
40829 break;
40830
40831 case 2:
40832 tmp = gen_reg_rtx (mode);
40833 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40834 break;
40835
40836 default:
40837 gcc_unreachable ();
40838 }
40839 vec = tmp;
40840 use_vec_extr = true;
40841 elt = 0;
40842 break;
40843
40844 case V4SImode:
40845 use_vec_extr = TARGET_SSE4_1;
40846 if (use_vec_extr)
40847 break;
40848
40849 if (TARGET_SSE2)
40850 {
40851 switch (elt)
40852 {
40853 case 0:
40854 tmp = vec;
40855 break;
40856
40857 case 1:
40858 case 3:
40859 tmp = gen_reg_rtx (mode);
40860 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40861 GEN_INT (elt), GEN_INT (elt),
40862 GEN_INT (elt), GEN_INT (elt)));
40863 break;
40864
40865 case 2:
40866 tmp = gen_reg_rtx (mode);
40867 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40868 break;
40869
40870 default:
40871 gcc_unreachable ();
40872 }
40873 vec = tmp;
40874 use_vec_extr = true;
40875 elt = 0;
40876 }
40877 else
40878 {
40879 /* For SSE1, we have to reuse the V4SF code. */
40880 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40881 gen_lowpart (V4SFmode, vec), elt);
40882 return;
40883 }
40884 break;
40885
40886 case V8HImode:
40887 use_vec_extr = TARGET_SSE2;
40888 break;
40889 case V4HImode:
40890 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40891 break;
40892
40893 case V16QImode:
40894 use_vec_extr = TARGET_SSE4_1;
40895 break;
40896
40897 case V8SFmode:
40898 if (TARGET_AVX)
40899 {
40900 tmp = gen_reg_rtx (V4SFmode);
40901 if (elt < 4)
40902 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40903 else
40904 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40905 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40906 return;
40907 }
40908 break;
40909
40910 case V4DFmode:
40911 if (TARGET_AVX)
40912 {
40913 tmp = gen_reg_rtx (V2DFmode);
40914 if (elt < 2)
40915 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40916 else
40917 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40918 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40919 return;
40920 }
40921 break;
40922
40923 case V32QImode:
40924 if (TARGET_AVX)
40925 {
40926 tmp = gen_reg_rtx (V16QImode);
40927 if (elt < 16)
40928 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40929 else
40930 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40931 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40932 return;
40933 }
40934 break;
40935
40936 case V16HImode:
40937 if (TARGET_AVX)
40938 {
40939 tmp = gen_reg_rtx (V8HImode);
40940 if (elt < 8)
40941 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40942 else
40943 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40944 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40945 return;
40946 }
40947 break;
40948
40949 case V8SImode:
40950 if (TARGET_AVX)
40951 {
40952 tmp = gen_reg_rtx (V4SImode);
40953 if (elt < 4)
40954 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40955 else
40956 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40957 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40958 return;
40959 }
40960 break;
40961
40962 case V4DImode:
40963 if (TARGET_AVX)
40964 {
40965 tmp = gen_reg_rtx (V2DImode);
40966 if (elt < 2)
40967 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40968 else
40969 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40970 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40971 return;
40972 }
40973 break;
40974
40975 case V16SFmode:
40976 tmp = gen_reg_rtx (V8SFmode);
40977 if (elt < 8)
40978 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40979 else
40980 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40981 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40982 return;
40983
40984 case V8DFmode:
40985 tmp = gen_reg_rtx (V4DFmode);
40986 if (elt < 4)
40987 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40988 else
40989 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40990 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40991 return;
40992
40993 case V16SImode:
40994 tmp = gen_reg_rtx (V8SImode);
40995 if (elt < 8)
40996 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40997 else
40998 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40999 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41000 return;
41001
41002 case V8DImode:
41003 tmp = gen_reg_rtx (V4DImode);
41004 if (elt < 4)
41005 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41006 else
41007 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41008 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41009 return;
41010
41011 case V8QImode:
41012 /* ??? Could extract the appropriate HImode element and shift. */
41013 default:
41014 break;
41015 }
41016
41017 if (use_vec_extr)
41018 {
41019 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41020 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41021
41022 /* Let the rtl optimizers know about the zero extension performed. */
41023 if (inner_mode == QImode || inner_mode == HImode)
41024 {
41025 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41026 target = gen_lowpart (SImode, target);
41027 }
41028
41029 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41030 }
41031 else
41032 {
41033 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41034
41035 emit_move_insn (mem, vec);
41036
41037 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41038 emit_move_insn (target, tmp);
41039 }
41040 }
41041
41042 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41043 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41044 The upper bits of DEST are undefined, though they shouldn't cause
41045 exceptions (some bits from src or all zeros are ok). */
41046
41047 static void
41048 emit_reduc_half (rtx dest, rtx src, int i)
41049 {
41050 rtx tem, d = dest;
41051 switch (GET_MODE (src))
41052 {
41053 case V4SFmode:
41054 if (i == 128)
41055 tem = gen_sse_movhlps (dest, src, src);
41056 else
41057 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41058 GEN_INT (1 + 4), GEN_INT (1 + 4));
41059 break;
41060 case V2DFmode:
41061 tem = gen_vec_interleave_highv2df (dest, src, src);
41062 break;
41063 case V16QImode:
41064 case V8HImode:
41065 case V4SImode:
41066 case V2DImode:
41067 d = gen_reg_rtx (V1TImode);
41068 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41069 GEN_INT (i / 2));
41070 break;
41071 case V8SFmode:
41072 if (i == 256)
41073 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41074 else
41075 tem = gen_avx_shufps256 (dest, src, src,
41076 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41077 break;
41078 case V4DFmode:
41079 if (i == 256)
41080 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41081 else
41082 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41083 break;
41084 case V32QImode:
41085 case V16HImode:
41086 case V8SImode:
41087 case V4DImode:
41088 if (i == 256)
41089 {
41090 if (GET_MODE (dest) != V4DImode)
41091 d = gen_reg_rtx (V4DImode);
41092 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41093 gen_lowpart (V4DImode, src),
41094 const1_rtx);
41095 }
41096 else
41097 {
41098 d = gen_reg_rtx (V2TImode);
41099 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41100 GEN_INT (i / 2));
41101 }
41102 break;
41103 case V16SImode:
41104 case V16SFmode:
41105 case V8DImode:
41106 case V8DFmode:
41107 if (i > 128)
41108 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41109 gen_lowpart (V16SImode, src),
41110 gen_lowpart (V16SImode, src),
41111 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41112 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41113 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41114 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41115 GEN_INT (0xC), GEN_INT (0xD),
41116 GEN_INT (0xE), GEN_INT (0xF),
41117 GEN_INT (0x10), GEN_INT (0x11),
41118 GEN_INT (0x12), GEN_INT (0x13),
41119 GEN_INT (0x14), GEN_INT (0x15),
41120 GEN_INT (0x16), GEN_INT (0x17));
41121 else
41122 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41123 gen_lowpart (V16SImode, src),
41124 GEN_INT (i == 128 ? 0x2 : 0x1),
41125 GEN_INT (0x3),
41126 GEN_INT (0x3),
41127 GEN_INT (0x3),
41128 GEN_INT (i == 128 ? 0x6 : 0x5),
41129 GEN_INT (0x7),
41130 GEN_INT (0x7),
41131 GEN_INT (0x7),
41132 GEN_INT (i == 128 ? 0xA : 0x9),
41133 GEN_INT (0xB),
41134 GEN_INT (0xB),
41135 GEN_INT (0xB),
41136 GEN_INT (i == 128 ? 0xE : 0xD),
41137 GEN_INT (0xF),
41138 GEN_INT (0xF),
41139 GEN_INT (0xF));
41140 break;
41141 default:
41142 gcc_unreachable ();
41143 }
41144 emit_insn (tem);
41145 if (d != dest)
41146 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41147 }
41148
41149 /* Expand a vector reduction. FN is the binary pattern to reduce;
41150 DEST is the destination; IN is the input vector. */
41151
41152 void
41153 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41154 {
41155 rtx half, dst, vec = in;
41156 enum machine_mode mode = GET_MODE (in);
41157 int i;
41158
41159 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41160 if (TARGET_SSE4_1
41161 && mode == V8HImode
41162 && fn == gen_uminv8hi3)
41163 {
41164 emit_insn (gen_sse4_1_phminposuw (dest, in));
41165 return;
41166 }
41167
41168 for (i = GET_MODE_BITSIZE (mode);
41169 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41170 i >>= 1)
41171 {
41172 half = gen_reg_rtx (mode);
41173 emit_reduc_half (half, vec, i);
41174 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41175 dst = dest;
41176 else
41177 dst = gen_reg_rtx (mode);
41178 emit_insn (fn (dst, half, vec));
41179 vec = dst;
41180 }
41181 }
41182 \f
41183 /* Target hook for scalar_mode_supported_p. */
41184 static bool
41185 ix86_scalar_mode_supported_p (enum machine_mode mode)
41186 {
41187 if (DECIMAL_FLOAT_MODE_P (mode))
41188 return default_decimal_float_supported_p ();
41189 else if (mode == TFmode)
41190 return true;
41191 else
41192 return default_scalar_mode_supported_p (mode);
41193 }
41194
41195 /* Implements target hook vector_mode_supported_p. */
41196 static bool
41197 ix86_vector_mode_supported_p (enum machine_mode mode)
41198 {
41199 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41200 return true;
41201 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41202 return true;
41203 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41204 return true;
41205 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41206 return true;
41207 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41208 return true;
41209 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41210 return true;
41211 return false;
41212 }
41213
41214 /* Target hook for c_mode_for_suffix. */
41215 static enum machine_mode
41216 ix86_c_mode_for_suffix (char suffix)
41217 {
41218 if (suffix == 'q')
41219 return TFmode;
41220 if (suffix == 'w')
41221 return XFmode;
41222
41223 return VOIDmode;
41224 }
41225
41226 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41227
41228 We do this in the new i386 backend to maintain source compatibility
41229 with the old cc0-based compiler. */
41230
41231 static tree
41232 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41233 {
41234 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41235 clobbers);
41236 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41237 clobbers);
41238 return clobbers;
41239 }
41240
41241 /* Implements target vector targetm.asm.encode_section_info. */
41242
41243 static void ATTRIBUTE_UNUSED
41244 ix86_encode_section_info (tree decl, rtx rtl, int first)
41245 {
41246 default_encode_section_info (decl, rtl, first);
41247
41248 if (TREE_CODE (decl) == VAR_DECL
41249 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41250 && ix86_in_large_data_p (decl))
41251 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41252 }
41253
41254 /* Worker function for REVERSE_CONDITION. */
41255
41256 enum rtx_code
41257 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41258 {
41259 return (mode != CCFPmode && mode != CCFPUmode
41260 ? reverse_condition (code)
41261 : reverse_condition_maybe_unordered (code));
41262 }
41263
41264 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41265 to OPERANDS[0]. */
41266
41267 const char *
41268 output_387_reg_move (rtx insn, rtx *operands)
41269 {
41270 if (REG_P (operands[0]))
41271 {
41272 if (REG_P (operands[1])
41273 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41274 {
41275 if (REGNO (operands[0]) == FIRST_STACK_REG)
41276 return output_387_ffreep (operands, 0);
41277 return "fstp\t%y0";
41278 }
41279 if (STACK_TOP_P (operands[0]))
41280 return "fld%Z1\t%y1";
41281 return "fst\t%y0";
41282 }
41283 else if (MEM_P (operands[0]))
41284 {
41285 gcc_assert (REG_P (operands[1]));
41286 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41287 return "fstp%Z0\t%y0";
41288 else
41289 {
41290 /* There is no non-popping store to memory for XFmode.
41291 So if we need one, follow the store with a load. */
41292 if (GET_MODE (operands[0]) == XFmode)
41293 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41294 else
41295 return "fst%Z0\t%y0";
41296 }
41297 }
41298 else
41299 gcc_unreachable();
41300 }
41301
41302 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41303 FP status register is set. */
41304
41305 void
41306 ix86_emit_fp_unordered_jump (rtx label)
41307 {
41308 rtx reg = gen_reg_rtx (HImode);
41309 rtx temp;
41310
41311 emit_insn (gen_x86_fnstsw_1 (reg));
41312
41313 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41314 {
41315 emit_insn (gen_x86_sahf_1 (reg));
41316
41317 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41318 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41319 }
41320 else
41321 {
41322 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41323
41324 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41325 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41326 }
41327
41328 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41329 gen_rtx_LABEL_REF (VOIDmode, label),
41330 pc_rtx);
41331 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41332
41333 emit_jump_insn (temp);
41334 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41335 }
41336
41337 /* Output code to perform a log1p XFmode calculation. */
41338
41339 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41340 {
41341 rtx_code_label *label1 = gen_label_rtx ();
41342 rtx_code_label *label2 = gen_label_rtx ();
41343
41344 rtx tmp = gen_reg_rtx (XFmode);
41345 rtx tmp2 = gen_reg_rtx (XFmode);
41346 rtx test;
41347
41348 emit_insn (gen_absxf2 (tmp, op1));
41349 test = gen_rtx_GE (VOIDmode, tmp,
41350 CONST_DOUBLE_FROM_REAL_VALUE (
41351 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41352 XFmode));
41353 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41354
41355 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41356 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41357 emit_jump (label2);
41358
41359 emit_label (label1);
41360 emit_move_insn (tmp, CONST1_RTX (XFmode));
41361 emit_insn (gen_addxf3 (tmp, op1, tmp));
41362 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41363 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41364
41365 emit_label (label2);
41366 }
41367
41368 /* Emit code for round calculation. */
41369 void ix86_emit_i387_round (rtx op0, rtx op1)
41370 {
41371 enum machine_mode inmode = GET_MODE (op1);
41372 enum machine_mode outmode = GET_MODE (op0);
41373 rtx e1, e2, res, tmp, tmp1, half;
41374 rtx scratch = gen_reg_rtx (HImode);
41375 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41376 rtx_code_label *jump_label = gen_label_rtx ();
41377 rtx insn;
41378 rtx (*gen_abs) (rtx, rtx);
41379 rtx (*gen_neg) (rtx, rtx);
41380
41381 switch (inmode)
41382 {
41383 case SFmode:
41384 gen_abs = gen_abssf2;
41385 break;
41386 case DFmode:
41387 gen_abs = gen_absdf2;
41388 break;
41389 case XFmode:
41390 gen_abs = gen_absxf2;
41391 break;
41392 default:
41393 gcc_unreachable ();
41394 }
41395
41396 switch (outmode)
41397 {
41398 case SFmode:
41399 gen_neg = gen_negsf2;
41400 break;
41401 case DFmode:
41402 gen_neg = gen_negdf2;
41403 break;
41404 case XFmode:
41405 gen_neg = gen_negxf2;
41406 break;
41407 case HImode:
41408 gen_neg = gen_neghi2;
41409 break;
41410 case SImode:
41411 gen_neg = gen_negsi2;
41412 break;
41413 case DImode:
41414 gen_neg = gen_negdi2;
41415 break;
41416 default:
41417 gcc_unreachable ();
41418 }
41419
41420 e1 = gen_reg_rtx (inmode);
41421 e2 = gen_reg_rtx (inmode);
41422 res = gen_reg_rtx (outmode);
41423
41424 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41425
41426 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41427
41428 /* scratch = fxam(op1) */
41429 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41430 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41431 UNSPEC_FXAM)));
41432 /* e1 = fabs(op1) */
41433 emit_insn (gen_abs (e1, op1));
41434
41435 /* e2 = e1 + 0.5 */
41436 half = force_reg (inmode, half);
41437 emit_insn (gen_rtx_SET (VOIDmode, e2,
41438 gen_rtx_PLUS (inmode, e1, half)));
41439
41440 /* res = floor(e2) */
41441 if (inmode != XFmode)
41442 {
41443 tmp1 = gen_reg_rtx (XFmode);
41444
41445 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41446 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41447 }
41448 else
41449 tmp1 = e2;
41450
41451 switch (outmode)
41452 {
41453 case SFmode:
41454 case DFmode:
41455 {
41456 rtx tmp0 = gen_reg_rtx (XFmode);
41457
41458 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41459
41460 emit_insn (gen_rtx_SET (VOIDmode, res,
41461 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41462 UNSPEC_TRUNC_NOOP)));
41463 }
41464 break;
41465 case XFmode:
41466 emit_insn (gen_frndintxf2_floor (res, tmp1));
41467 break;
41468 case HImode:
41469 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41470 break;
41471 case SImode:
41472 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41473 break;
41474 case DImode:
41475 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41476 break;
41477 default:
41478 gcc_unreachable ();
41479 }
41480
41481 /* flags = signbit(a) */
41482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41483
41484 /* if (flags) then res = -res */
41485 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41486 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41487 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41488 pc_rtx);
41489 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41490 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41491 JUMP_LABEL (insn) = jump_label;
41492
41493 emit_insn (gen_neg (res, res));
41494
41495 emit_label (jump_label);
41496 LABEL_NUSES (jump_label) = 1;
41497
41498 emit_move_insn (op0, res);
41499 }
41500
41501 /* Output code to perform a Newton-Rhapson approximation of a single precision
41502 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41503
41504 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41505 {
41506 rtx x0, x1, e0, e1;
41507
41508 x0 = gen_reg_rtx (mode);
41509 e0 = gen_reg_rtx (mode);
41510 e1 = gen_reg_rtx (mode);
41511 x1 = gen_reg_rtx (mode);
41512
41513 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41514
41515 b = force_reg (mode, b);
41516
41517 /* x0 = rcp(b) estimate */
41518 if (mode == V16SFmode || mode == V8DFmode)
41519 emit_insn (gen_rtx_SET (VOIDmode, x0,
41520 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41521 UNSPEC_RCP14)));
41522 else
41523 emit_insn (gen_rtx_SET (VOIDmode, x0,
41524 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41525 UNSPEC_RCP)));
41526
41527 /* e0 = x0 * b */
41528 emit_insn (gen_rtx_SET (VOIDmode, e0,
41529 gen_rtx_MULT (mode, x0, b)));
41530
41531 /* e0 = x0 * e0 */
41532 emit_insn (gen_rtx_SET (VOIDmode, e0,
41533 gen_rtx_MULT (mode, x0, e0)));
41534
41535 /* e1 = x0 + x0 */
41536 emit_insn (gen_rtx_SET (VOIDmode, e1,
41537 gen_rtx_PLUS (mode, x0, x0)));
41538
41539 /* x1 = e1 - e0 */
41540 emit_insn (gen_rtx_SET (VOIDmode, x1,
41541 gen_rtx_MINUS (mode, e1, e0)));
41542
41543 /* res = a * x1 */
41544 emit_insn (gen_rtx_SET (VOIDmode, res,
41545 gen_rtx_MULT (mode, a, x1)));
41546 }
41547
41548 /* Output code to perform a Newton-Rhapson approximation of a
41549 single precision floating point [reciprocal] square root. */
41550
41551 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41552 bool recip)
41553 {
41554 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41555 REAL_VALUE_TYPE r;
41556 int unspec;
41557
41558 x0 = gen_reg_rtx (mode);
41559 e0 = gen_reg_rtx (mode);
41560 e1 = gen_reg_rtx (mode);
41561 e2 = gen_reg_rtx (mode);
41562 e3 = gen_reg_rtx (mode);
41563
41564 real_from_integer (&r, VOIDmode, -3, SIGNED);
41565 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41566
41567 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41568 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41569 unspec = UNSPEC_RSQRT;
41570
41571 if (VECTOR_MODE_P (mode))
41572 {
41573 mthree = ix86_build_const_vector (mode, true, mthree);
41574 mhalf = ix86_build_const_vector (mode, true, mhalf);
41575 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41576 if (GET_MODE_SIZE (mode) == 64)
41577 unspec = UNSPEC_RSQRT14;
41578 }
41579
41580 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41581 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41582
41583 a = force_reg (mode, a);
41584
41585 /* x0 = rsqrt(a) estimate */
41586 emit_insn (gen_rtx_SET (VOIDmode, x0,
41587 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41588 unspec)));
41589
41590 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41591 if (!recip)
41592 {
41593 rtx zero, mask;
41594
41595 zero = gen_reg_rtx (mode);
41596 mask = gen_reg_rtx (mode);
41597
41598 zero = force_reg (mode, CONST0_RTX(mode));
41599
41600 /* Handle masked compare. */
41601 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41602 {
41603 mask = gen_reg_rtx (HImode);
41604 /* Imm value 0x4 corresponds to not-equal comparison. */
41605 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41606 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41607 }
41608 else
41609 {
41610 emit_insn (gen_rtx_SET (VOIDmode, mask,
41611 gen_rtx_NE (mode, zero, a)));
41612
41613 emit_insn (gen_rtx_SET (VOIDmode, x0,
41614 gen_rtx_AND (mode, x0, mask)));
41615 }
41616 }
41617
41618 /* e0 = x0 * a */
41619 emit_insn (gen_rtx_SET (VOIDmode, e0,
41620 gen_rtx_MULT (mode, x0, a)));
41621 /* e1 = e0 * x0 */
41622 emit_insn (gen_rtx_SET (VOIDmode, e1,
41623 gen_rtx_MULT (mode, e0, x0)));
41624
41625 /* e2 = e1 - 3. */
41626 mthree = force_reg (mode, mthree);
41627 emit_insn (gen_rtx_SET (VOIDmode, e2,
41628 gen_rtx_PLUS (mode, e1, mthree)));
41629
41630 mhalf = force_reg (mode, mhalf);
41631 if (recip)
41632 /* e3 = -.5 * x0 */
41633 emit_insn (gen_rtx_SET (VOIDmode, e3,
41634 gen_rtx_MULT (mode, x0, mhalf)));
41635 else
41636 /* e3 = -.5 * e0 */
41637 emit_insn (gen_rtx_SET (VOIDmode, e3,
41638 gen_rtx_MULT (mode, e0, mhalf)));
41639 /* ret = e2 * e3 */
41640 emit_insn (gen_rtx_SET (VOIDmode, res,
41641 gen_rtx_MULT (mode, e2, e3)));
41642 }
41643
41644 #ifdef TARGET_SOLARIS
41645 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41646
41647 static void
41648 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41649 tree decl)
41650 {
41651 /* With Binutils 2.15, the "@unwind" marker must be specified on
41652 every occurrence of the ".eh_frame" section, not just the first
41653 one. */
41654 if (TARGET_64BIT
41655 && strcmp (name, ".eh_frame") == 0)
41656 {
41657 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41658 flags & SECTION_WRITE ? "aw" : "a");
41659 return;
41660 }
41661
41662 #ifndef USE_GAS
41663 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41664 {
41665 solaris_elf_asm_comdat_section (name, flags, decl);
41666 return;
41667 }
41668 #endif
41669
41670 default_elf_asm_named_section (name, flags, decl);
41671 }
41672 #endif /* TARGET_SOLARIS */
41673
41674 /* Return the mangling of TYPE if it is an extended fundamental type. */
41675
41676 static const char *
41677 ix86_mangle_type (const_tree type)
41678 {
41679 type = TYPE_MAIN_VARIANT (type);
41680
41681 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41682 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41683 return NULL;
41684
41685 switch (TYPE_MODE (type))
41686 {
41687 case TFmode:
41688 /* __float128 is "g". */
41689 return "g";
41690 case XFmode:
41691 /* "long double" or __float80 is "e". */
41692 return "e";
41693 default:
41694 return NULL;
41695 }
41696 }
41697
41698 /* For 32-bit code we can save PIC register setup by using
41699 __stack_chk_fail_local hidden function instead of calling
41700 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41701 register, so it is better to call __stack_chk_fail directly. */
41702
41703 static tree ATTRIBUTE_UNUSED
41704 ix86_stack_protect_fail (void)
41705 {
41706 return TARGET_64BIT
41707 ? default_external_stack_protect_fail ()
41708 : default_hidden_stack_protect_fail ();
41709 }
41710
41711 /* Select a format to encode pointers in exception handling data. CODE
41712 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41713 true if the symbol may be affected by dynamic relocations.
41714
41715 ??? All x86 object file formats are capable of representing this.
41716 After all, the relocation needed is the same as for the call insn.
41717 Whether or not a particular assembler allows us to enter such, I
41718 guess we'll have to see. */
41719 int
41720 asm_preferred_eh_data_format (int code, int global)
41721 {
41722 if (flag_pic)
41723 {
41724 int type = DW_EH_PE_sdata8;
41725 if (!TARGET_64BIT
41726 || ix86_cmodel == CM_SMALL_PIC
41727 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41728 type = DW_EH_PE_sdata4;
41729 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41730 }
41731 if (ix86_cmodel == CM_SMALL
41732 || (ix86_cmodel == CM_MEDIUM && code))
41733 return DW_EH_PE_udata4;
41734 return DW_EH_PE_absptr;
41735 }
41736 \f
41737 /* Expand copysign from SIGN to the positive value ABS_VALUE
41738 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41739 the sign-bit. */
41740 static void
41741 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41742 {
41743 enum machine_mode mode = GET_MODE (sign);
41744 rtx sgn = gen_reg_rtx (mode);
41745 if (mask == NULL_RTX)
41746 {
41747 enum machine_mode vmode;
41748
41749 if (mode == SFmode)
41750 vmode = V4SFmode;
41751 else if (mode == DFmode)
41752 vmode = V2DFmode;
41753 else
41754 vmode = mode;
41755
41756 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41757 if (!VECTOR_MODE_P (mode))
41758 {
41759 /* We need to generate a scalar mode mask in this case. */
41760 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41761 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41762 mask = gen_reg_rtx (mode);
41763 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41764 }
41765 }
41766 else
41767 mask = gen_rtx_NOT (mode, mask);
41768 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41769 gen_rtx_AND (mode, mask, sign)));
41770 emit_insn (gen_rtx_SET (VOIDmode, result,
41771 gen_rtx_IOR (mode, abs_value, sgn)));
41772 }
41773
41774 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41775 mask for masking out the sign-bit is stored in *SMASK, if that is
41776 non-null. */
41777 static rtx
41778 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41779 {
41780 enum machine_mode vmode, mode = GET_MODE (op0);
41781 rtx xa, mask;
41782
41783 xa = gen_reg_rtx (mode);
41784 if (mode == SFmode)
41785 vmode = V4SFmode;
41786 else if (mode == DFmode)
41787 vmode = V2DFmode;
41788 else
41789 vmode = mode;
41790 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41791 if (!VECTOR_MODE_P (mode))
41792 {
41793 /* We need to generate a scalar mode mask in this case. */
41794 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41795 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41796 mask = gen_reg_rtx (mode);
41797 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41798 }
41799 emit_insn (gen_rtx_SET (VOIDmode, xa,
41800 gen_rtx_AND (mode, op0, mask)));
41801
41802 if (smask)
41803 *smask = mask;
41804
41805 return xa;
41806 }
41807
41808 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41809 swapping the operands if SWAP_OPERANDS is true. The expanded
41810 code is a forward jump to a newly created label in case the
41811 comparison is true. The generated label rtx is returned. */
41812 static rtx_code_label *
41813 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41814 bool swap_operands)
41815 {
41816 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41817 rtx_code_label *label;
41818 rtx tmp;
41819
41820 if (swap_operands)
41821 {
41822 tmp = op0;
41823 op0 = op1;
41824 op1 = tmp;
41825 }
41826
41827 label = gen_label_rtx ();
41828 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41829 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41830 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41831 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41832 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41833 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41834 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41835 JUMP_LABEL (tmp) = label;
41836
41837 return label;
41838 }
41839
41840 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41841 using comparison code CODE. Operands are swapped for the comparison if
41842 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41843 static rtx
41844 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41845 bool swap_operands)
41846 {
41847 rtx (*insn)(rtx, rtx, rtx, rtx);
41848 enum machine_mode mode = GET_MODE (op0);
41849 rtx mask = gen_reg_rtx (mode);
41850
41851 if (swap_operands)
41852 {
41853 rtx tmp = op0;
41854 op0 = op1;
41855 op1 = tmp;
41856 }
41857
41858 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41859
41860 emit_insn (insn (mask, op0, op1,
41861 gen_rtx_fmt_ee (code, mode, op0, op1)));
41862 return mask;
41863 }
41864
41865 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41866 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41867 static rtx
41868 ix86_gen_TWO52 (enum machine_mode mode)
41869 {
41870 REAL_VALUE_TYPE TWO52r;
41871 rtx TWO52;
41872
41873 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41874 TWO52 = const_double_from_real_value (TWO52r, mode);
41875 TWO52 = force_reg (mode, TWO52);
41876
41877 return TWO52;
41878 }
41879
41880 /* Expand SSE sequence for computing lround from OP1 storing
41881 into OP0. */
41882 void
41883 ix86_expand_lround (rtx op0, rtx op1)
41884 {
41885 /* C code for the stuff we're doing below:
41886 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41887 return (long)tmp;
41888 */
41889 enum machine_mode mode = GET_MODE (op1);
41890 const struct real_format *fmt;
41891 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41892 rtx adj;
41893
41894 /* load nextafter (0.5, 0.0) */
41895 fmt = REAL_MODE_FORMAT (mode);
41896 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41897 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41898
41899 /* adj = copysign (0.5, op1) */
41900 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41901 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41902
41903 /* adj = op1 + adj */
41904 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41905
41906 /* op0 = (imode)adj */
41907 expand_fix (op0, adj, 0);
41908 }
41909
41910 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41911 into OPERAND0. */
41912 void
41913 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41914 {
41915 /* C code for the stuff we're doing below (for do_floor):
41916 xi = (long)op1;
41917 xi -= (double)xi > op1 ? 1 : 0;
41918 return xi;
41919 */
41920 enum machine_mode fmode = GET_MODE (op1);
41921 enum machine_mode imode = GET_MODE (op0);
41922 rtx ireg, freg, tmp;
41923 rtx_code_label *label;
41924
41925 /* reg = (long)op1 */
41926 ireg = gen_reg_rtx (imode);
41927 expand_fix (ireg, op1, 0);
41928
41929 /* freg = (double)reg */
41930 freg = gen_reg_rtx (fmode);
41931 expand_float (freg, ireg, 0);
41932
41933 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41934 label = ix86_expand_sse_compare_and_jump (UNLE,
41935 freg, op1, !do_floor);
41936 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41937 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41938 emit_move_insn (ireg, tmp);
41939
41940 emit_label (label);
41941 LABEL_NUSES (label) = 1;
41942
41943 emit_move_insn (op0, ireg);
41944 }
41945
41946 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41947 result in OPERAND0. */
41948 void
41949 ix86_expand_rint (rtx operand0, rtx operand1)
41950 {
41951 /* C code for the stuff we're doing below:
41952 xa = fabs (operand1);
41953 if (!isless (xa, 2**52))
41954 return operand1;
41955 xa = xa + 2**52 - 2**52;
41956 return copysign (xa, operand1);
41957 */
41958 enum machine_mode mode = GET_MODE (operand0);
41959 rtx res, xa, TWO52, mask;
41960 rtx_code_label *label;
41961
41962 res = gen_reg_rtx (mode);
41963 emit_move_insn (res, operand1);
41964
41965 /* xa = abs (operand1) */
41966 xa = ix86_expand_sse_fabs (res, &mask);
41967
41968 /* if (!isless (xa, TWO52)) goto label; */
41969 TWO52 = ix86_gen_TWO52 (mode);
41970 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41971
41972 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41973 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41974
41975 ix86_sse_copysign_to_positive (res, xa, res, mask);
41976
41977 emit_label (label);
41978 LABEL_NUSES (label) = 1;
41979
41980 emit_move_insn (operand0, res);
41981 }
41982
41983 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41984 into OPERAND0. */
41985 void
41986 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41987 {
41988 /* C code for the stuff we expand below.
41989 double xa = fabs (x), x2;
41990 if (!isless (xa, TWO52))
41991 return x;
41992 xa = xa + TWO52 - TWO52;
41993 x2 = copysign (xa, x);
41994 Compensate. Floor:
41995 if (x2 > x)
41996 x2 -= 1;
41997 Compensate. Ceil:
41998 if (x2 < x)
41999 x2 -= -1;
42000 return x2;
42001 */
42002 enum machine_mode mode = GET_MODE (operand0);
42003 rtx xa, TWO52, tmp, one, res, mask;
42004 rtx_code_label *label;
42005
42006 TWO52 = ix86_gen_TWO52 (mode);
42007
42008 /* Temporary for holding the result, initialized to the input
42009 operand to ease control flow. */
42010 res = gen_reg_rtx (mode);
42011 emit_move_insn (res, operand1);
42012
42013 /* xa = abs (operand1) */
42014 xa = ix86_expand_sse_fabs (res, &mask);
42015
42016 /* if (!isless (xa, TWO52)) goto label; */
42017 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42018
42019 /* xa = xa + TWO52 - TWO52; */
42020 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42021 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42022
42023 /* xa = copysign (xa, operand1) */
42024 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42025
42026 /* generate 1.0 or -1.0 */
42027 one = force_reg (mode,
42028 const_double_from_real_value (do_floor
42029 ? dconst1 : dconstm1, mode));
42030
42031 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42032 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42033 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42034 gen_rtx_AND (mode, one, tmp)));
42035 /* We always need to subtract here to preserve signed zero. */
42036 tmp = expand_simple_binop (mode, MINUS,
42037 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42038 emit_move_insn (res, tmp);
42039
42040 emit_label (label);
42041 LABEL_NUSES (label) = 1;
42042
42043 emit_move_insn (operand0, res);
42044 }
42045
42046 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42047 into OPERAND0. */
42048 void
42049 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42050 {
42051 /* C code for the stuff we expand below.
42052 double xa = fabs (x), x2;
42053 if (!isless (xa, TWO52))
42054 return x;
42055 x2 = (double)(long)x;
42056 Compensate. Floor:
42057 if (x2 > x)
42058 x2 -= 1;
42059 Compensate. Ceil:
42060 if (x2 < x)
42061 x2 += 1;
42062 if (HONOR_SIGNED_ZEROS (mode))
42063 return copysign (x2, x);
42064 return x2;
42065 */
42066 enum machine_mode mode = GET_MODE (operand0);
42067 rtx xa, xi, TWO52, tmp, one, res, mask;
42068 rtx_code_label *label;
42069
42070 TWO52 = ix86_gen_TWO52 (mode);
42071
42072 /* Temporary for holding the result, initialized to the input
42073 operand to ease control flow. */
42074 res = gen_reg_rtx (mode);
42075 emit_move_insn (res, operand1);
42076
42077 /* xa = abs (operand1) */
42078 xa = ix86_expand_sse_fabs (res, &mask);
42079
42080 /* if (!isless (xa, TWO52)) goto label; */
42081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42082
42083 /* xa = (double)(long)x */
42084 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42085 expand_fix (xi, res, 0);
42086 expand_float (xa, xi, 0);
42087
42088 /* generate 1.0 */
42089 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42090
42091 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42092 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42093 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42094 gen_rtx_AND (mode, one, tmp)));
42095 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42096 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42097 emit_move_insn (res, tmp);
42098
42099 if (HONOR_SIGNED_ZEROS (mode))
42100 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42101
42102 emit_label (label);
42103 LABEL_NUSES (label) = 1;
42104
42105 emit_move_insn (operand0, res);
42106 }
42107
42108 /* Expand SSE sequence for computing round from OPERAND1 storing
42109 into OPERAND0. Sequence that works without relying on DImode truncation
42110 via cvttsd2siq that is only available on 64bit targets. */
42111 void
42112 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42113 {
42114 /* C code for the stuff we expand below.
42115 double xa = fabs (x), xa2, x2;
42116 if (!isless (xa, TWO52))
42117 return x;
42118 Using the absolute value and copying back sign makes
42119 -0.0 -> -0.0 correct.
42120 xa2 = xa + TWO52 - TWO52;
42121 Compensate.
42122 dxa = xa2 - xa;
42123 if (dxa <= -0.5)
42124 xa2 += 1;
42125 else if (dxa > 0.5)
42126 xa2 -= 1;
42127 x2 = copysign (xa2, x);
42128 return x2;
42129 */
42130 enum machine_mode mode = GET_MODE (operand0);
42131 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42132 rtx_code_label *label;
42133
42134 TWO52 = ix86_gen_TWO52 (mode);
42135
42136 /* Temporary for holding the result, initialized to the input
42137 operand to ease control flow. */
42138 res = gen_reg_rtx (mode);
42139 emit_move_insn (res, operand1);
42140
42141 /* xa = abs (operand1) */
42142 xa = ix86_expand_sse_fabs (res, &mask);
42143
42144 /* if (!isless (xa, TWO52)) goto label; */
42145 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42146
42147 /* xa2 = xa + TWO52 - TWO52; */
42148 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42149 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42150
42151 /* dxa = xa2 - xa; */
42152 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42153
42154 /* generate 0.5, 1.0 and -0.5 */
42155 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42156 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42157 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42158 0, OPTAB_DIRECT);
42159
42160 /* Compensate. */
42161 tmp = gen_reg_rtx (mode);
42162 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42163 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42164 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42165 gen_rtx_AND (mode, one, tmp)));
42166 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42167 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42168 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42169 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42170 gen_rtx_AND (mode, one, tmp)));
42171 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42172
42173 /* res = copysign (xa2, operand1) */
42174 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42175
42176 emit_label (label);
42177 LABEL_NUSES (label) = 1;
42178
42179 emit_move_insn (operand0, res);
42180 }
42181
42182 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42183 into OPERAND0. */
42184 void
42185 ix86_expand_trunc (rtx operand0, rtx operand1)
42186 {
42187 /* C code for SSE variant we expand below.
42188 double xa = fabs (x), x2;
42189 if (!isless (xa, TWO52))
42190 return x;
42191 x2 = (double)(long)x;
42192 if (HONOR_SIGNED_ZEROS (mode))
42193 return copysign (x2, x);
42194 return x2;
42195 */
42196 enum machine_mode mode = GET_MODE (operand0);
42197 rtx xa, xi, TWO52, res, mask;
42198 rtx_code_label *label;
42199
42200 TWO52 = ix86_gen_TWO52 (mode);
42201
42202 /* Temporary for holding the result, initialized to the input
42203 operand to ease control flow. */
42204 res = gen_reg_rtx (mode);
42205 emit_move_insn (res, operand1);
42206
42207 /* xa = abs (operand1) */
42208 xa = ix86_expand_sse_fabs (res, &mask);
42209
42210 /* if (!isless (xa, TWO52)) goto label; */
42211 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42212
42213 /* x = (double)(long)x */
42214 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42215 expand_fix (xi, res, 0);
42216 expand_float (res, xi, 0);
42217
42218 if (HONOR_SIGNED_ZEROS (mode))
42219 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42220
42221 emit_label (label);
42222 LABEL_NUSES (label) = 1;
42223
42224 emit_move_insn (operand0, res);
42225 }
42226
42227 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42228 into OPERAND0. */
42229 void
42230 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42231 {
42232 enum machine_mode mode = GET_MODE (operand0);
42233 rtx xa, mask, TWO52, one, res, smask, tmp;
42234 rtx_code_label *label;
42235
42236 /* C code for SSE variant we expand below.
42237 double xa = fabs (x), x2;
42238 if (!isless (xa, TWO52))
42239 return x;
42240 xa2 = xa + TWO52 - TWO52;
42241 Compensate:
42242 if (xa2 > xa)
42243 xa2 -= 1.0;
42244 x2 = copysign (xa2, x);
42245 return x2;
42246 */
42247
42248 TWO52 = ix86_gen_TWO52 (mode);
42249
42250 /* Temporary for holding the result, initialized to the input
42251 operand to ease control flow. */
42252 res = gen_reg_rtx (mode);
42253 emit_move_insn (res, operand1);
42254
42255 /* xa = abs (operand1) */
42256 xa = ix86_expand_sse_fabs (res, &smask);
42257
42258 /* if (!isless (xa, TWO52)) goto label; */
42259 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42260
42261 /* res = xa + TWO52 - TWO52; */
42262 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42263 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42264 emit_move_insn (res, tmp);
42265
42266 /* generate 1.0 */
42267 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42268
42269 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42270 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42271 emit_insn (gen_rtx_SET (VOIDmode, mask,
42272 gen_rtx_AND (mode, mask, one)));
42273 tmp = expand_simple_binop (mode, MINUS,
42274 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42275 emit_move_insn (res, tmp);
42276
42277 /* res = copysign (res, operand1) */
42278 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42279
42280 emit_label (label);
42281 LABEL_NUSES (label) = 1;
42282
42283 emit_move_insn (operand0, res);
42284 }
42285
42286 /* Expand SSE sequence for computing round from OPERAND1 storing
42287 into OPERAND0. */
42288 void
42289 ix86_expand_round (rtx operand0, rtx operand1)
42290 {
42291 /* C code for the stuff we're doing below:
42292 double xa = fabs (x);
42293 if (!isless (xa, TWO52))
42294 return x;
42295 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42296 return copysign (xa, x);
42297 */
42298 enum machine_mode mode = GET_MODE (operand0);
42299 rtx res, TWO52, xa, xi, half, mask;
42300 rtx_code_label *label;
42301 const struct real_format *fmt;
42302 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42303
42304 /* Temporary for holding the result, initialized to the input
42305 operand to ease control flow. */
42306 res = gen_reg_rtx (mode);
42307 emit_move_insn (res, operand1);
42308
42309 TWO52 = ix86_gen_TWO52 (mode);
42310 xa = ix86_expand_sse_fabs (res, &mask);
42311 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42312
42313 /* load nextafter (0.5, 0.0) */
42314 fmt = REAL_MODE_FORMAT (mode);
42315 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42316 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42317
42318 /* xa = xa + 0.5 */
42319 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42320 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42321
42322 /* xa = (double)(int64_t)xa */
42323 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42324 expand_fix (xi, xa, 0);
42325 expand_float (xa, xi, 0);
42326
42327 /* res = copysign (xa, operand1) */
42328 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42329
42330 emit_label (label);
42331 LABEL_NUSES (label) = 1;
42332
42333 emit_move_insn (operand0, res);
42334 }
42335
42336 /* Expand SSE sequence for computing round
42337 from OP1 storing into OP0 using sse4 round insn. */
42338 void
42339 ix86_expand_round_sse4 (rtx op0, rtx op1)
42340 {
42341 enum machine_mode mode = GET_MODE (op0);
42342 rtx e1, e2, res, half;
42343 const struct real_format *fmt;
42344 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42345 rtx (*gen_copysign) (rtx, rtx, rtx);
42346 rtx (*gen_round) (rtx, rtx, rtx);
42347
42348 switch (mode)
42349 {
42350 case SFmode:
42351 gen_copysign = gen_copysignsf3;
42352 gen_round = gen_sse4_1_roundsf2;
42353 break;
42354 case DFmode:
42355 gen_copysign = gen_copysigndf3;
42356 gen_round = gen_sse4_1_rounddf2;
42357 break;
42358 default:
42359 gcc_unreachable ();
42360 }
42361
42362 /* round (a) = trunc (a + copysign (0.5, a)) */
42363
42364 /* load nextafter (0.5, 0.0) */
42365 fmt = REAL_MODE_FORMAT (mode);
42366 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42367 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42368 half = const_double_from_real_value (pred_half, mode);
42369
42370 /* e1 = copysign (0.5, op1) */
42371 e1 = gen_reg_rtx (mode);
42372 emit_insn (gen_copysign (e1, half, op1));
42373
42374 /* e2 = op1 + e1 */
42375 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42376
42377 /* res = trunc (e2) */
42378 res = gen_reg_rtx (mode);
42379 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42380
42381 emit_move_insn (op0, res);
42382 }
42383 \f
42384
42385 /* Table of valid machine attributes. */
42386 static const struct attribute_spec ix86_attribute_table[] =
42387 {
42388 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42389 affects_type_identity } */
42390 /* Stdcall attribute says callee is responsible for popping arguments
42391 if they are not variable. */
42392 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42393 true },
42394 /* Fastcall attribute says callee is responsible for popping arguments
42395 if they are not variable. */
42396 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42397 true },
42398 /* Thiscall attribute says callee is responsible for popping arguments
42399 if they are not variable. */
42400 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42401 true },
42402 /* Cdecl attribute says the callee is a normal C declaration */
42403 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42404 true },
42405 /* Regparm attribute specifies how many integer arguments are to be
42406 passed in registers. */
42407 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42408 true },
42409 /* Sseregparm attribute says we are using x86_64 calling conventions
42410 for FP arguments. */
42411 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42412 true },
42413 /* The transactional memory builtins are implicitly regparm or fastcall
42414 depending on the ABI. Override the generic do-nothing attribute that
42415 these builtins were declared with. */
42416 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42417 true },
42418 /* force_align_arg_pointer says this function realigns the stack at entry. */
42419 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42420 false, true, true, ix86_handle_cconv_attribute, false },
42421 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42422 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42423 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42424 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42425 false },
42426 #endif
42427 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42428 false },
42429 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42430 false },
42431 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42432 SUBTARGET_ATTRIBUTE_TABLE,
42433 #endif
42434 /* ms_abi and sysv_abi calling convention function attributes. */
42435 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42436 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42437 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42438 false },
42439 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42440 ix86_handle_callee_pop_aggregate_return, true },
42441 /* End element. */
42442 { NULL, 0, 0, false, false, false, NULL, false }
42443 };
42444
42445 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42446 static int
42447 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42448 tree vectype, int)
42449 {
42450 unsigned elements;
42451
42452 switch (type_of_cost)
42453 {
42454 case scalar_stmt:
42455 return ix86_cost->scalar_stmt_cost;
42456
42457 case scalar_load:
42458 return ix86_cost->scalar_load_cost;
42459
42460 case scalar_store:
42461 return ix86_cost->scalar_store_cost;
42462
42463 case vector_stmt:
42464 return ix86_cost->vec_stmt_cost;
42465
42466 case vector_load:
42467 return ix86_cost->vec_align_load_cost;
42468
42469 case vector_store:
42470 return ix86_cost->vec_store_cost;
42471
42472 case vec_to_scalar:
42473 return ix86_cost->vec_to_scalar_cost;
42474
42475 case scalar_to_vec:
42476 return ix86_cost->scalar_to_vec_cost;
42477
42478 case unaligned_load:
42479 case unaligned_store:
42480 return ix86_cost->vec_unalign_load_cost;
42481
42482 case cond_branch_taken:
42483 return ix86_cost->cond_taken_branch_cost;
42484
42485 case cond_branch_not_taken:
42486 return ix86_cost->cond_not_taken_branch_cost;
42487
42488 case vec_perm:
42489 case vec_promote_demote:
42490 return ix86_cost->vec_stmt_cost;
42491
42492 case vec_construct:
42493 elements = TYPE_VECTOR_SUBPARTS (vectype);
42494 return elements / 2 + 1;
42495
42496 default:
42497 gcc_unreachable ();
42498 }
42499 }
42500
42501 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42502 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42503 insn every time. */
42504
42505 static GTY(()) rtx_insn *vselect_insn;
42506
42507 /* Initialize vselect_insn. */
42508
42509 static void
42510 init_vselect_insn (void)
42511 {
42512 unsigned i;
42513 rtx x;
42514
42515 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42516 for (i = 0; i < MAX_VECT_LEN; ++i)
42517 XVECEXP (x, 0, i) = const0_rtx;
42518 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42519 const0_rtx), x);
42520 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42521 start_sequence ();
42522 vselect_insn = emit_insn (x);
42523 end_sequence ();
42524 }
42525
42526 /* Construct (set target (vec_select op0 (parallel perm))) and
42527 return true if that's a valid instruction in the active ISA. */
42528
42529 static bool
42530 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42531 unsigned nelt, bool testing_p)
42532 {
42533 unsigned int i;
42534 rtx x, save_vconcat;
42535 int icode;
42536
42537 if (vselect_insn == NULL_RTX)
42538 init_vselect_insn ();
42539
42540 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42541 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42542 for (i = 0; i < nelt; ++i)
42543 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42544 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42545 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42546 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42547 SET_DEST (PATTERN (vselect_insn)) = target;
42548 icode = recog_memoized (vselect_insn);
42549
42550 if (icode >= 0 && !testing_p)
42551 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42552
42553 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42554 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42555 INSN_CODE (vselect_insn) = -1;
42556
42557 return icode >= 0;
42558 }
42559
42560 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42561
42562 static bool
42563 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42564 const unsigned char *perm, unsigned nelt,
42565 bool testing_p)
42566 {
42567 enum machine_mode v2mode;
42568 rtx x;
42569 bool ok;
42570
42571 if (vselect_insn == NULL_RTX)
42572 init_vselect_insn ();
42573
42574 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42575 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42576 PUT_MODE (x, v2mode);
42577 XEXP (x, 0) = op0;
42578 XEXP (x, 1) = op1;
42579 ok = expand_vselect (target, x, perm, nelt, testing_p);
42580 XEXP (x, 0) = const0_rtx;
42581 XEXP (x, 1) = const0_rtx;
42582 return ok;
42583 }
42584
42585 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42586 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42587
42588 static bool
42589 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42590 {
42591 enum machine_mode vmode = d->vmode;
42592 unsigned i, mask, nelt = d->nelt;
42593 rtx target, op0, op1, x;
42594 rtx rperm[32], vperm;
42595
42596 if (d->one_operand_p)
42597 return false;
42598 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42599 ;
42600 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42601 ;
42602 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42603 ;
42604 else
42605 return false;
42606
42607 /* This is a blend, not a permute. Elements must stay in their
42608 respective lanes. */
42609 for (i = 0; i < nelt; ++i)
42610 {
42611 unsigned e = d->perm[i];
42612 if (!(e == i || e == i + nelt))
42613 return false;
42614 }
42615
42616 if (d->testing_p)
42617 return true;
42618
42619 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42620 decision should be extracted elsewhere, so that we only try that
42621 sequence once all budget==3 options have been tried. */
42622 target = d->target;
42623 op0 = d->op0;
42624 op1 = d->op1;
42625 mask = 0;
42626
42627 switch (vmode)
42628 {
42629 case V4DFmode:
42630 case V8SFmode:
42631 case V2DFmode:
42632 case V4SFmode:
42633 case V8HImode:
42634 case V8SImode:
42635 for (i = 0; i < nelt; ++i)
42636 mask |= (d->perm[i] >= nelt) << i;
42637 break;
42638
42639 case V2DImode:
42640 for (i = 0; i < 2; ++i)
42641 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42642 vmode = V8HImode;
42643 goto do_subreg;
42644
42645 case V4SImode:
42646 for (i = 0; i < 4; ++i)
42647 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42648 vmode = V8HImode;
42649 goto do_subreg;
42650
42651 case V16QImode:
42652 /* See if bytes move in pairs so we can use pblendw with
42653 an immediate argument, rather than pblendvb with a vector
42654 argument. */
42655 for (i = 0; i < 16; i += 2)
42656 if (d->perm[i] + 1 != d->perm[i + 1])
42657 {
42658 use_pblendvb:
42659 for (i = 0; i < nelt; ++i)
42660 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42661
42662 finish_pblendvb:
42663 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42664 vperm = force_reg (vmode, vperm);
42665
42666 if (GET_MODE_SIZE (vmode) == 16)
42667 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42668 else
42669 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42670 if (target != d->target)
42671 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42672 return true;
42673 }
42674
42675 for (i = 0; i < 8; ++i)
42676 mask |= (d->perm[i * 2] >= 16) << i;
42677 vmode = V8HImode;
42678 /* FALLTHRU */
42679
42680 do_subreg:
42681 target = gen_reg_rtx (vmode);
42682 op0 = gen_lowpart (vmode, op0);
42683 op1 = gen_lowpart (vmode, op1);
42684 break;
42685
42686 case V32QImode:
42687 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42688 for (i = 0; i < 32; i += 2)
42689 if (d->perm[i] + 1 != d->perm[i + 1])
42690 goto use_pblendvb;
42691 /* See if bytes move in quadruplets. If yes, vpblendd
42692 with immediate can be used. */
42693 for (i = 0; i < 32; i += 4)
42694 if (d->perm[i] + 2 != d->perm[i + 2])
42695 break;
42696 if (i < 32)
42697 {
42698 /* See if bytes move the same in both lanes. If yes,
42699 vpblendw with immediate can be used. */
42700 for (i = 0; i < 16; i += 2)
42701 if (d->perm[i] + 16 != d->perm[i + 16])
42702 goto use_pblendvb;
42703
42704 /* Use vpblendw. */
42705 for (i = 0; i < 16; ++i)
42706 mask |= (d->perm[i * 2] >= 32) << i;
42707 vmode = V16HImode;
42708 goto do_subreg;
42709 }
42710
42711 /* Use vpblendd. */
42712 for (i = 0; i < 8; ++i)
42713 mask |= (d->perm[i * 4] >= 32) << i;
42714 vmode = V8SImode;
42715 goto do_subreg;
42716
42717 case V16HImode:
42718 /* See if words move in pairs. If yes, vpblendd can be used. */
42719 for (i = 0; i < 16; i += 2)
42720 if (d->perm[i] + 1 != d->perm[i + 1])
42721 break;
42722 if (i < 16)
42723 {
42724 /* See if words move the same in both lanes. If not,
42725 vpblendvb must be used. */
42726 for (i = 0; i < 8; i++)
42727 if (d->perm[i] + 8 != d->perm[i + 8])
42728 {
42729 /* Use vpblendvb. */
42730 for (i = 0; i < 32; ++i)
42731 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42732
42733 vmode = V32QImode;
42734 nelt = 32;
42735 target = gen_reg_rtx (vmode);
42736 op0 = gen_lowpart (vmode, op0);
42737 op1 = gen_lowpart (vmode, op1);
42738 goto finish_pblendvb;
42739 }
42740
42741 /* Use vpblendw. */
42742 for (i = 0; i < 16; ++i)
42743 mask |= (d->perm[i] >= 16) << i;
42744 break;
42745 }
42746
42747 /* Use vpblendd. */
42748 for (i = 0; i < 8; ++i)
42749 mask |= (d->perm[i * 2] >= 16) << i;
42750 vmode = V8SImode;
42751 goto do_subreg;
42752
42753 case V4DImode:
42754 /* Use vpblendd. */
42755 for (i = 0; i < 4; ++i)
42756 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42757 vmode = V8SImode;
42758 goto do_subreg;
42759
42760 default:
42761 gcc_unreachable ();
42762 }
42763
42764 /* This matches five different patterns with the different modes. */
42765 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42766 x = gen_rtx_SET (VOIDmode, target, x);
42767 emit_insn (x);
42768 if (target != d->target)
42769 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42770
42771 return true;
42772 }
42773
42774 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42775 in terms of the variable form of vpermilps.
42776
42777 Note that we will have already failed the immediate input vpermilps,
42778 which requires that the high and low part shuffle be identical; the
42779 variable form doesn't require that. */
42780
42781 static bool
42782 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42783 {
42784 rtx rperm[8], vperm;
42785 unsigned i;
42786
42787 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42788 return false;
42789
42790 /* We can only permute within the 128-bit lane. */
42791 for (i = 0; i < 8; ++i)
42792 {
42793 unsigned e = d->perm[i];
42794 if (i < 4 ? e >= 4 : e < 4)
42795 return false;
42796 }
42797
42798 if (d->testing_p)
42799 return true;
42800
42801 for (i = 0; i < 8; ++i)
42802 {
42803 unsigned e = d->perm[i];
42804
42805 /* Within each 128-bit lane, the elements of op0 are numbered
42806 from 0 and the elements of op1 are numbered from 4. */
42807 if (e >= 8 + 4)
42808 e -= 8;
42809 else if (e >= 4)
42810 e -= 4;
42811
42812 rperm[i] = GEN_INT (e);
42813 }
42814
42815 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42816 vperm = force_reg (V8SImode, vperm);
42817 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42818
42819 return true;
42820 }
42821
42822 /* Return true if permutation D can be performed as VMODE permutation
42823 instead. */
42824
42825 static bool
42826 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42827 {
42828 unsigned int i, j, chunk;
42829
42830 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42831 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42832 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42833 return false;
42834
42835 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42836 return true;
42837
42838 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42839 for (i = 0; i < d->nelt; i += chunk)
42840 if (d->perm[i] & (chunk - 1))
42841 return false;
42842 else
42843 for (j = 1; j < chunk; ++j)
42844 if (d->perm[i] + j != d->perm[i + j])
42845 return false;
42846
42847 return true;
42848 }
42849
42850 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42851 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42852
42853 static bool
42854 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42855 {
42856 unsigned i, nelt, eltsz, mask;
42857 unsigned char perm[32];
42858 enum machine_mode vmode = V16QImode;
42859 rtx rperm[32], vperm, target, op0, op1;
42860
42861 nelt = d->nelt;
42862
42863 if (!d->one_operand_p)
42864 {
42865 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42866 {
42867 if (TARGET_AVX2
42868 && valid_perm_using_mode_p (V2TImode, d))
42869 {
42870 if (d->testing_p)
42871 return true;
42872
42873 /* Use vperm2i128 insn. The pattern uses
42874 V4DImode instead of V2TImode. */
42875 target = d->target;
42876 if (d->vmode != V4DImode)
42877 target = gen_reg_rtx (V4DImode);
42878 op0 = gen_lowpart (V4DImode, d->op0);
42879 op1 = gen_lowpart (V4DImode, d->op1);
42880 rperm[0]
42881 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42882 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42883 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42884 if (target != d->target)
42885 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42886 return true;
42887 }
42888 return false;
42889 }
42890 }
42891 else
42892 {
42893 if (GET_MODE_SIZE (d->vmode) == 16)
42894 {
42895 if (!TARGET_SSSE3)
42896 return false;
42897 }
42898 else if (GET_MODE_SIZE (d->vmode) == 32)
42899 {
42900 if (!TARGET_AVX2)
42901 return false;
42902
42903 /* V4DImode should be already handled through
42904 expand_vselect by vpermq instruction. */
42905 gcc_assert (d->vmode != V4DImode);
42906
42907 vmode = V32QImode;
42908 if (d->vmode == V8SImode
42909 || d->vmode == V16HImode
42910 || d->vmode == V32QImode)
42911 {
42912 /* First see if vpermq can be used for
42913 V8SImode/V16HImode/V32QImode. */
42914 if (valid_perm_using_mode_p (V4DImode, d))
42915 {
42916 for (i = 0; i < 4; i++)
42917 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42918 if (d->testing_p)
42919 return true;
42920 target = gen_reg_rtx (V4DImode);
42921 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42922 perm, 4, false))
42923 {
42924 emit_move_insn (d->target,
42925 gen_lowpart (d->vmode, target));
42926 return true;
42927 }
42928 return false;
42929 }
42930
42931 /* Next see if vpermd can be used. */
42932 if (valid_perm_using_mode_p (V8SImode, d))
42933 vmode = V8SImode;
42934 }
42935 /* Or if vpermps can be used. */
42936 else if (d->vmode == V8SFmode)
42937 vmode = V8SImode;
42938
42939 if (vmode == V32QImode)
42940 {
42941 /* vpshufb only works intra lanes, it is not
42942 possible to shuffle bytes in between the lanes. */
42943 for (i = 0; i < nelt; ++i)
42944 if ((d->perm[i] ^ i) & (nelt / 2))
42945 return false;
42946 }
42947 }
42948 else
42949 return false;
42950 }
42951
42952 if (d->testing_p)
42953 return true;
42954
42955 if (vmode == V8SImode)
42956 for (i = 0; i < 8; ++i)
42957 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42958 else
42959 {
42960 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42961 if (!d->one_operand_p)
42962 mask = 2 * nelt - 1;
42963 else if (vmode == V16QImode)
42964 mask = nelt - 1;
42965 else
42966 mask = nelt / 2 - 1;
42967
42968 for (i = 0; i < nelt; ++i)
42969 {
42970 unsigned j, e = d->perm[i] & mask;
42971 for (j = 0; j < eltsz; ++j)
42972 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42973 }
42974 }
42975
42976 vperm = gen_rtx_CONST_VECTOR (vmode,
42977 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42978 vperm = force_reg (vmode, vperm);
42979
42980 target = d->target;
42981 if (d->vmode != vmode)
42982 target = gen_reg_rtx (vmode);
42983 op0 = gen_lowpart (vmode, d->op0);
42984 if (d->one_operand_p)
42985 {
42986 if (vmode == V16QImode)
42987 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42988 else if (vmode == V32QImode)
42989 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42990 else if (vmode == V8SFmode)
42991 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42992 else
42993 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42994 }
42995 else
42996 {
42997 op1 = gen_lowpart (vmode, d->op1);
42998 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42999 }
43000 if (target != d->target)
43001 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43002
43003 return true;
43004 }
43005
43006 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43007 in a single instruction. */
43008
43009 static bool
43010 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43011 {
43012 unsigned i, nelt = d->nelt;
43013 unsigned char perm2[MAX_VECT_LEN];
43014
43015 /* Check plain VEC_SELECT first, because AVX has instructions that could
43016 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43017 input where SEL+CONCAT may not. */
43018 if (d->one_operand_p)
43019 {
43020 int mask = nelt - 1;
43021 bool identity_perm = true;
43022 bool broadcast_perm = true;
43023
43024 for (i = 0; i < nelt; i++)
43025 {
43026 perm2[i] = d->perm[i] & mask;
43027 if (perm2[i] != i)
43028 identity_perm = false;
43029 if (perm2[i])
43030 broadcast_perm = false;
43031 }
43032
43033 if (identity_perm)
43034 {
43035 if (!d->testing_p)
43036 emit_move_insn (d->target, d->op0);
43037 return true;
43038 }
43039 else if (broadcast_perm && TARGET_AVX2)
43040 {
43041 /* Use vpbroadcast{b,w,d}. */
43042 rtx (*gen) (rtx, rtx) = NULL;
43043 switch (d->vmode)
43044 {
43045 case V32QImode:
43046 gen = gen_avx2_pbroadcastv32qi_1;
43047 break;
43048 case V16HImode:
43049 gen = gen_avx2_pbroadcastv16hi_1;
43050 break;
43051 case V8SImode:
43052 gen = gen_avx2_pbroadcastv8si_1;
43053 break;
43054 case V16QImode:
43055 gen = gen_avx2_pbroadcastv16qi;
43056 break;
43057 case V8HImode:
43058 gen = gen_avx2_pbroadcastv8hi;
43059 break;
43060 case V8SFmode:
43061 gen = gen_avx2_vec_dupv8sf_1;
43062 break;
43063 /* For other modes prefer other shuffles this function creates. */
43064 default: break;
43065 }
43066 if (gen != NULL)
43067 {
43068 if (!d->testing_p)
43069 emit_insn (gen (d->target, d->op0));
43070 return true;
43071 }
43072 }
43073
43074 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43075 return true;
43076
43077 /* There are plenty of patterns in sse.md that are written for
43078 SEL+CONCAT and are not replicated for a single op. Perhaps
43079 that should be changed, to avoid the nastiness here. */
43080
43081 /* Recognize interleave style patterns, which means incrementing
43082 every other permutation operand. */
43083 for (i = 0; i < nelt; i += 2)
43084 {
43085 perm2[i] = d->perm[i] & mask;
43086 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43087 }
43088 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43089 d->testing_p))
43090 return true;
43091
43092 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43093 if (nelt >= 4)
43094 {
43095 for (i = 0; i < nelt; i += 4)
43096 {
43097 perm2[i + 0] = d->perm[i + 0] & mask;
43098 perm2[i + 1] = d->perm[i + 1] & mask;
43099 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43100 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43101 }
43102
43103 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43104 d->testing_p))
43105 return true;
43106 }
43107 }
43108
43109 /* Finally, try the fully general two operand permute. */
43110 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43111 d->testing_p))
43112 return true;
43113
43114 /* Recognize interleave style patterns with reversed operands. */
43115 if (!d->one_operand_p)
43116 {
43117 for (i = 0; i < nelt; ++i)
43118 {
43119 unsigned e = d->perm[i];
43120 if (e >= nelt)
43121 e -= nelt;
43122 else
43123 e += nelt;
43124 perm2[i] = e;
43125 }
43126
43127 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43128 d->testing_p))
43129 return true;
43130 }
43131
43132 /* Try the SSE4.1 blend variable merge instructions. */
43133 if (expand_vec_perm_blend (d))
43134 return true;
43135
43136 /* Try one of the AVX vpermil variable permutations. */
43137 if (expand_vec_perm_vpermil (d))
43138 return true;
43139
43140 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43141 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43142 if (expand_vec_perm_pshufb (d))
43143 return true;
43144
43145 /* Try the AVX512F vpermi2 instructions. */
43146 rtx vec[64];
43147 enum machine_mode mode = d->vmode;
43148 if (mode == V8DFmode)
43149 mode = V8DImode;
43150 else if (mode == V16SFmode)
43151 mode = V16SImode;
43152 for (i = 0; i < nelt; ++i)
43153 vec[i] = GEN_INT (d->perm[i]);
43154 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43155 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43156 return true;
43157
43158 return false;
43159 }
43160
43161 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43162 in terms of a pair of pshuflw + pshufhw instructions. */
43163
43164 static bool
43165 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43166 {
43167 unsigned char perm2[MAX_VECT_LEN];
43168 unsigned i;
43169 bool ok;
43170
43171 if (d->vmode != V8HImode || !d->one_operand_p)
43172 return false;
43173
43174 /* The two permutations only operate in 64-bit lanes. */
43175 for (i = 0; i < 4; ++i)
43176 if (d->perm[i] >= 4)
43177 return false;
43178 for (i = 4; i < 8; ++i)
43179 if (d->perm[i] < 4)
43180 return false;
43181
43182 if (d->testing_p)
43183 return true;
43184
43185 /* Emit the pshuflw. */
43186 memcpy (perm2, d->perm, 4);
43187 for (i = 4; i < 8; ++i)
43188 perm2[i] = i;
43189 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43190 gcc_assert (ok);
43191
43192 /* Emit the pshufhw. */
43193 memcpy (perm2 + 4, d->perm + 4, 4);
43194 for (i = 0; i < 4; ++i)
43195 perm2[i] = i;
43196 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43197 gcc_assert (ok);
43198
43199 return true;
43200 }
43201
43202 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43203 the permutation using the SSSE3 palignr instruction. This succeeds
43204 when all of the elements in PERM fit within one vector and we merely
43205 need to shift them down so that a single vector permutation has a
43206 chance to succeed. */
43207
43208 static bool
43209 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43210 {
43211 unsigned i, nelt = d->nelt;
43212 unsigned min, max;
43213 bool in_order, ok;
43214 rtx shift, target;
43215 struct expand_vec_perm_d dcopy;
43216
43217 /* Even with AVX, palignr only operates on 128-bit vectors. */
43218 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43219 return false;
43220
43221 min = nelt, max = 0;
43222 for (i = 0; i < nelt; ++i)
43223 {
43224 unsigned e = d->perm[i];
43225 if (e < min)
43226 min = e;
43227 if (e > max)
43228 max = e;
43229 }
43230 if (min == 0 || max - min >= nelt)
43231 return false;
43232
43233 /* Given that we have SSSE3, we know we'll be able to implement the
43234 single operand permutation after the palignr with pshufb. */
43235 if (d->testing_p)
43236 return true;
43237
43238 dcopy = *d;
43239 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43240 target = gen_reg_rtx (TImode);
43241 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43242 gen_lowpart (TImode, d->op0), shift));
43243
43244 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43245 dcopy.one_operand_p = true;
43246
43247 in_order = true;
43248 for (i = 0; i < nelt; ++i)
43249 {
43250 unsigned e = dcopy.perm[i] - min;
43251 if (e != i)
43252 in_order = false;
43253 dcopy.perm[i] = e;
43254 }
43255
43256 /* Test for the degenerate case where the alignment by itself
43257 produces the desired permutation. */
43258 if (in_order)
43259 {
43260 emit_move_insn (d->target, dcopy.op0);
43261 return true;
43262 }
43263
43264 ok = expand_vec_perm_1 (&dcopy);
43265 gcc_assert (ok);
43266
43267 return ok;
43268 }
43269
43270 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43271 the permutation using the SSE4_1 pblendv instruction. Potentially
43272 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43273
43274 static bool
43275 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43276 {
43277 unsigned i, which, nelt = d->nelt;
43278 struct expand_vec_perm_d dcopy, dcopy1;
43279 enum machine_mode vmode = d->vmode;
43280 bool ok;
43281
43282 /* Use the same checks as in expand_vec_perm_blend, but skipping
43283 AVX and AVX2 as they require more than 2 instructions. */
43284 if (d->one_operand_p)
43285 return false;
43286 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43287 ;
43288 else
43289 return false;
43290
43291 /* Figure out where permutation elements stay not in their
43292 respective lanes. */
43293 for (i = 0, which = 0; i < nelt; ++i)
43294 {
43295 unsigned e = d->perm[i];
43296 if (e != i)
43297 which |= (e < nelt ? 1 : 2);
43298 }
43299 /* We can pblend the part where elements stay not in their
43300 respective lanes only when these elements are all in one
43301 half of a permutation.
43302 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43303 lanes, but both 8 and 9 >= 8
43304 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43305 respective lanes and 8 >= 8, but 2 not. */
43306 if (which != 1 && which != 2)
43307 return false;
43308 if (d->testing_p)
43309 return true;
43310
43311 /* First we apply one operand permutation to the part where
43312 elements stay not in their respective lanes. */
43313 dcopy = *d;
43314 if (which == 2)
43315 dcopy.op0 = dcopy.op1 = d->op1;
43316 else
43317 dcopy.op0 = dcopy.op1 = d->op0;
43318 dcopy.one_operand_p = true;
43319
43320 for (i = 0; i < nelt; ++i)
43321 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43322
43323 ok = expand_vec_perm_1 (&dcopy);
43324 gcc_assert (ok);
43325
43326 /* Next we put permuted elements into their positions. */
43327 dcopy1 = *d;
43328 if (which == 2)
43329 dcopy1.op1 = dcopy.target;
43330 else
43331 dcopy1.op0 = dcopy.target;
43332
43333 for (i = 0; i < nelt; ++i)
43334 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43335
43336 ok = expand_vec_perm_blend (&dcopy1);
43337 gcc_assert (ok);
43338
43339 return true;
43340 }
43341
43342 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43343
43344 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43345 a two vector permutation into a single vector permutation by using
43346 an interleave operation to merge the vectors. */
43347
43348 static bool
43349 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43350 {
43351 struct expand_vec_perm_d dremap, dfinal;
43352 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43353 unsigned HOST_WIDE_INT contents;
43354 unsigned char remap[2 * MAX_VECT_LEN];
43355 rtx_insn *seq;
43356 bool ok, same_halves = false;
43357
43358 if (GET_MODE_SIZE (d->vmode) == 16)
43359 {
43360 if (d->one_operand_p)
43361 return false;
43362 }
43363 else if (GET_MODE_SIZE (d->vmode) == 32)
43364 {
43365 if (!TARGET_AVX)
43366 return false;
43367 /* For 32-byte modes allow even d->one_operand_p.
43368 The lack of cross-lane shuffling in some instructions
43369 might prevent a single insn shuffle. */
43370 dfinal = *d;
43371 dfinal.testing_p = true;
43372 /* If expand_vec_perm_interleave3 can expand this into
43373 a 3 insn sequence, give up and let it be expanded as
43374 3 insn sequence. While that is one insn longer,
43375 it doesn't need a memory operand and in the common
43376 case that both interleave low and high permutations
43377 with the same operands are adjacent needs 4 insns
43378 for both after CSE. */
43379 if (expand_vec_perm_interleave3 (&dfinal))
43380 return false;
43381 }
43382 else
43383 return false;
43384
43385 /* Examine from whence the elements come. */
43386 contents = 0;
43387 for (i = 0; i < nelt; ++i)
43388 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43389
43390 memset (remap, 0xff, sizeof (remap));
43391 dremap = *d;
43392
43393 if (GET_MODE_SIZE (d->vmode) == 16)
43394 {
43395 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43396
43397 /* Split the two input vectors into 4 halves. */
43398 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43399 h2 = h1 << nelt2;
43400 h3 = h2 << nelt2;
43401 h4 = h3 << nelt2;
43402
43403 /* If the elements from the low halves use interleave low, and similarly
43404 for interleave high. If the elements are from mis-matched halves, we
43405 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43406 if ((contents & (h1 | h3)) == contents)
43407 {
43408 /* punpckl* */
43409 for (i = 0; i < nelt2; ++i)
43410 {
43411 remap[i] = i * 2;
43412 remap[i + nelt] = i * 2 + 1;
43413 dremap.perm[i * 2] = i;
43414 dremap.perm[i * 2 + 1] = i + nelt;
43415 }
43416 if (!TARGET_SSE2 && d->vmode == V4SImode)
43417 dremap.vmode = V4SFmode;
43418 }
43419 else if ((contents & (h2 | h4)) == contents)
43420 {
43421 /* punpckh* */
43422 for (i = 0; i < nelt2; ++i)
43423 {
43424 remap[i + nelt2] = i * 2;
43425 remap[i + nelt + nelt2] = i * 2 + 1;
43426 dremap.perm[i * 2] = i + nelt2;
43427 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43428 }
43429 if (!TARGET_SSE2 && d->vmode == V4SImode)
43430 dremap.vmode = V4SFmode;
43431 }
43432 else if ((contents & (h1 | h4)) == contents)
43433 {
43434 /* shufps */
43435 for (i = 0; i < nelt2; ++i)
43436 {
43437 remap[i] = i;
43438 remap[i + nelt + nelt2] = i + nelt2;
43439 dremap.perm[i] = i;
43440 dremap.perm[i + nelt2] = i + nelt + nelt2;
43441 }
43442 if (nelt != 4)
43443 {
43444 /* shufpd */
43445 dremap.vmode = V2DImode;
43446 dremap.nelt = 2;
43447 dremap.perm[0] = 0;
43448 dremap.perm[1] = 3;
43449 }
43450 }
43451 else if ((contents & (h2 | h3)) == contents)
43452 {
43453 /* shufps */
43454 for (i = 0; i < nelt2; ++i)
43455 {
43456 remap[i + nelt2] = i;
43457 remap[i + nelt] = i + nelt2;
43458 dremap.perm[i] = i + nelt2;
43459 dremap.perm[i + nelt2] = i + nelt;
43460 }
43461 if (nelt != 4)
43462 {
43463 /* shufpd */
43464 dremap.vmode = V2DImode;
43465 dremap.nelt = 2;
43466 dremap.perm[0] = 1;
43467 dremap.perm[1] = 2;
43468 }
43469 }
43470 else
43471 return false;
43472 }
43473 else
43474 {
43475 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43476 unsigned HOST_WIDE_INT q[8];
43477 unsigned int nonzero_halves[4];
43478
43479 /* Split the two input vectors into 8 quarters. */
43480 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43481 for (i = 1; i < 8; ++i)
43482 q[i] = q[0] << (nelt4 * i);
43483 for (i = 0; i < 4; ++i)
43484 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43485 {
43486 nonzero_halves[nzcnt] = i;
43487 ++nzcnt;
43488 }
43489
43490 if (nzcnt == 1)
43491 {
43492 gcc_assert (d->one_operand_p);
43493 nonzero_halves[1] = nonzero_halves[0];
43494 same_halves = true;
43495 }
43496 else if (d->one_operand_p)
43497 {
43498 gcc_assert (nonzero_halves[0] == 0);
43499 gcc_assert (nonzero_halves[1] == 1);
43500 }
43501
43502 if (nzcnt <= 2)
43503 {
43504 if (d->perm[0] / nelt2 == nonzero_halves[1])
43505 {
43506 /* Attempt to increase the likelihood that dfinal
43507 shuffle will be intra-lane. */
43508 char tmph = nonzero_halves[0];
43509 nonzero_halves[0] = nonzero_halves[1];
43510 nonzero_halves[1] = tmph;
43511 }
43512
43513 /* vperm2f128 or vperm2i128. */
43514 for (i = 0; i < nelt2; ++i)
43515 {
43516 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43517 remap[i + nonzero_halves[0] * nelt2] = i;
43518 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43519 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43520 }
43521
43522 if (d->vmode != V8SFmode
43523 && d->vmode != V4DFmode
43524 && d->vmode != V8SImode)
43525 {
43526 dremap.vmode = V8SImode;
43527 dremap.nelt = 8;
43528 for (i = 0; i < 4; ++i)
43529 {
43530 dremap.perm[i] = i + nonzero_halves[0] * 4;
43531 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43532 }
43533 }
43534 }
43535 else if (d->one_operand_p)
43536 return false;
43537 else if (TARGET_AVX2
43538 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43539 {
43540 /* vpunpckl* */
43541 for (i = 0; i < nelt4; ++i)
43542 {
43543 remap[i] = i * 2;
43544 remap[i + nelt] = i * 2 + 1;
43545 remap[i + nelt2] = i * 2 + nelt2;
43546 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43547 dremap.perm[i * 2] = i;
43548 dremap.perm[i * 2 + 1] = i + nelt;
43549 dremap.perm[i * 2 + nelt2] = i + nelt2;
43550 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43551 }
43552 }
43553 else if (TARGET_AVX2
43554 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43555 {
43556 /* vpunpckh* */
43557 for (i = 0; i < nelt4; ++i)
43558 {
43559 remap[i + nelt4] = i * 2;
43560 remap[i + nelt + nelt4] = i * 2 + 1;
43561 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43562 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43563 dremap.perm[i * 2] = i + nelt4;
43564 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43565 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43566 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43567 }
43568 }
43569 else
43570 return false;
43571 }
43572
43573 /* Use the remapping array set up above to move the elements from their
43574 swizzled locations into their final destinations. */
43575 dfinal = *d;
43576 for (i = 0; i < nelt; ++i)
43577 {
43578 unsigned e = remap[d->perm[i]];
43579 gcc_assert (e < nelt);
43580 /* If same_halves is true, both halves of the remapped vector are the
43581 same. Avoid cross-lane accesses if possible. */
43582 if (same_halves && i >= nelt2)
43583 {
43584 gcc_assert (e < nelt2);
43585 dfinal.perm[i] = e + nelt2;
43586 }
43587 else
43588 dfinal.perm[i] = e;
43589 }
43590 if (!d->testing_p)
43591 {
43592 dremap.target = gen_reg_rtx (dremap.vmode);
43593 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43594 }
43595 dfinal.op1 = dfinal.op0;
43596 dfinal.one_operand_p = true;
43597
43598 /* Test if the final remap can be done with a single insn. For V4SFmode or
43599 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43600 start_sequence ();
43601 ok = expand_vec_perm_1 (&dfinal);
43602 seq = get_insns ();
43603 end_sequence ();
43604
43605 if (!ok)
43606 return false;
43607
43608 if (d->testing_p)
43609 return true;
43610
43611 if (dremap.vmode != dfinal.vmode)
43612 {
43613 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43614 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43615 }
43616
43617 ok = expand_vec_perm_1 (&dremap);
43618 gcc_assert (ok);
43619
43620 emit_insn (seq);
43621 return true;
43622 }
43623
43624 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43625 a single vector cross-lane permutation into vpermq followed
43626 by any of the single insn permutations. */
43627
43628 static bool
43629 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43630 {
43631 struct expand_vec_perm_d dremap, dfinal;
43632 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43633 unsigned contents[2];
43634 bool ok;
43635
43636 if (!(TARGET_AVX2
43637 && (d->vmode == V32QImode || d->vmode == V16HImode)
43638 && d->one_operand_p))
43639 return false;
43640
43641 contents[0] = 0;
43642 contents[1] = 0;
43643 for (i = 0; i < nelt2; ++i)
43644 {
43645 contents[0] |= 1u << (d->perm[i] / nelt4);
43646 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43647 }
43648
43649 for (i = 0; i < 2; ++i)
43650 {
43651 unsigned int cnt = 0;
43652 for (j = 0; j < 4; ++j)
43653 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43654 return false;
43655 }
43656
43657 if (d->testing_p)
43658 return true;
43659
43660 dremap = *d;
43661 dremap.vmode = V4DImode;
43662 dremap.nelt = 4;
43663 dremap.target = gen_reg_rtx (V4DImode);
43664 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43665 dremap.op1 = dremap.op0;
43666 dremap.one_operand_p = true;
43667 for (i = 0; i < 2; ++i)
43668 {
43669 unsigned int cnt = 0;
43670 for (j = 0; j < 4; ++j)
43671 if ((contents[i] & (1u << j)) != 0)
43672 dremap.perm[2 * i + cnt++] = j;
43673 for (; cnt < 2; ++cnt)
43674 dremap.perm[2 * i + cnt] = 0;
43675 }
43676
43677 dfinal = *d;
43678 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43679 dfinal.op1 = dfinal.op0;
43680 dfinal.one_operand_p = true;
43681 for (i = 0, j = 0; i < nelt; ++i)
43682 {
43683 if (i == nelt2)
43684 j = 2;
43685 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43686 if ((d->perm[i] / nelt4) == dremap.perm[j])
43687 ;
43688 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43689 dfinal.perm[i] |= nelt4;
43690 else
43691 gcc_unreachable ();
43692 }
43693
43694 ok = expand_vec_perm_1 (&dremap);
43695 gcc_assert (ok);
43696
43697 ok = expand_vec_perm_1 (&dfinal);
43698 gcc_assert (ok);
43699
43700 return true;
43701 }
43702
43703 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43704 a vector permutation using two instructions, vperm2f128 resp.
43705 vperm2i128 followed by any single in-lane permutation. */
43706
43707 static bool
43708 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43709 {
43710 struct expand_vec_perm_d dfirst, dsecond;
43711 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43712 bool ok;
43713
43714 if (!TARGET_AVX
43715 || GET_MODE_SIZE (d->vmode) != 32
43716 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43717 return false;
43718
43719 dsecond = *d;
43720 dsecond.one_operand_p = false;
43721 dsecond.testing_p = true;
43722
43723 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43724 immediate. For perm < 16 the second permutation uses
43725 d->op0 as first operand, for perm >= 16 it uses d->op1
43726 as first operand. The second operand is the result of
43727 vperm2[fi]128. */
43728 for (perm = 0; perm < 32; perm++)
43729 {
43730 /* Ignore permutations which do not move anything cross-lane. */
43731 if (perm < 16)
43732 {
43733 /* The second shuffle for e.g. V4DFmode has
43734 0123 and ABCD operands.
43735 Ignore AB23, as 23 is already in the second lane
43736 of the first operand. */
43737 if ((perm & 0xc) == (1 << 2)) continue;
43738 /* And 01CD, as 01 is in the first lane of the first
43739 operand. */
43740 if ((perm & 3) == 0) continue;
43741 /* And 4567, as then the vperm2[fi]128 doesn't change
43742 anything on the original 4567 second operand. */
43743 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43744 }
43745 else
43746 {
43747 /* The second shuffle for e.g. V4DFmode has
43748 4567 and ABCD operands.
43749 Ignore AB67, as 67 is already in the second lane
43750 of the first operand. */
43751 if ((perm & 0xc) == (3 << 2)) continue;
43752 /* And 45CD, as 45 is in the first lane of the first
43753 operand. */
43754 if ((perm & 3) == 2) continue;
43755 /* And 0123, as then the vperm2[fi]128 doesn't change
43756 anything on the original 0123 first operand. */
43757 if ((perm & 0xf) == (1 << 2)) continue;
43758 }
43759
43760 for (i = 0; i < nelt; i++)
43761 {
43762 j = d->perm[i] / nelt2;
43763 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43764 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43765 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43766 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43767 else
43768 break;
43769 }
43770
43771 if (i == nelt)
43772 {
43773 start_sequence ();
43774 ok = expand_vec_perm_1 (&dsecond);
43775 end_sequence ();
43776 }
43777 else
43778 ok = false;
43779
43780 if (ok)
43781 {
43782 if (d->testing_p)
43783 return true;
43784
43785 /* Found a usable second shuffle. dfirst will be
43786 vperm2f128 on d->op0 and d->op1. */
43787 dsecond.testing_p = false;
43788 dfirst = *d;
43789 dfirst.target = gen_reg_rtx (d->vmode);
43790 for (i = 0; i < nelt; i++)
43791 dfirst.perm[i] = (i & (nelt2 - 1))
43792 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43793
43794 ok = expand_vec_perm_1 (&dfirst);
43795 gcc_assert (ok);
43796
43797 /* And dsecond is some single insn shuffle, taking
43798 d->op0 and result of vperm2f128 (if perm < 16) or
43799 d->op1 and result of vperm2f128 (otherwise). */
43800 dsecond.op1 = dfirst.target;
43801 if (perm >= 16)
43802 dsecond.op0 = dfirst.op1;
43803
43804 ok = expand_vec_perm_1 (&dsecond);
43805 gcc_assert (ok);
43806
43807 return true;
43808 }
43809
43810 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43811 if (d->one_operand_p)
43812 return false;
43813 }
43814
43815 return false;
43816 }
43817
43818 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43819 a two vector permutation using 2 intra-lane interleave insns
43820 and cross-lane shuffle for 32-byte vectors. */
43821
43822 static bool
43823 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43824 {
43825 unsigned i, nelt;
43826 rtx (*gen) (rtx, rtx, rtx);
43827
43828 if (d->one_operand_p)
43829 return false;
43830 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43831 ;
43832 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43833 ;
43834 else
43835 return false;
43836
43837 nelt = d->nelt;
43838 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43839 return false;
43840 for (i = 0; i < nelt; i += 2)
43841 if (d->perm[i] != d->perm[0] + i / 2
43842 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43843 return false;
43844
43845 if (d->testing_p)
43846 return true;
43847
43848 switch (d->vmode)
43849 {
43850 case V32QImode:
43851 if (d->perm[0])
43852 gen = gen_vec_interleave_highv32qi;
43853 else
43854 gen = gen_vec_interleave_lowv32qi;
43855 break;
43856 case V16HImode:
43857 if (d->perm[0])
43858 gen = gen_vec_interleave_highv16hi;
43859 else
43860 gen = gen_vec_interleave_lowv16hi;
43861 break;
43862 case V8SImode:
43863 if (d->perm[0])
43864 gen = gen_vec_interleave_highv8si;
43865 else
43866 gen = gen_vec_interleave_lowv8si;
43867 break;
43868 case V4DImode:
43869 if (d->perm[0])
43870 gen = gen_vec_interleave_highv4di;
43871 else
43872 gen = gen_vec_interleave_lowv4di;
43873 break;
43874 case V8SFmode:
43875 if (d->perm[0])
43876 gen = gen_vec_interleave_highv8sf;
43877 else
43878 gen = gen_vec_interleave_lowv8sf;
43879 break;
43880 case V4DFmode:
43881 if (d->perm[0])
43882 gen = gen_vec_interleave_highv4df;
43883 else
43884 gen = gen_vec_interleave_lowv4df;
43885 break;
43886 default:
43887 gcc_unreachable ();
43888 }
43889
43890 emit_insn (gen (d->target, d->op0, d->op1));
43891 return true;
43892 }
43893
43894 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43895 a single vector permutation using a single intra-lane vector
43896 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43897 the non-swapped and swapped vectors together. */
43898
43899 static bool
43900 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43901 {
43902 struct expand_vec_perm_d dfirst, dsecond;
43903 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43904 rtx_insn *seq;
43905 bool ok;
43906 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43907
43908 if (!TARGET_AVX
43909 || TARGET_AVX2
43910 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43911 || !d->one_operand_p)
43912 return false;
43913
43914 dfirst = *d;
43915 for (i = 0; i < nelt; i++)
43916 dfirst.perm[i] = 0xff;
43917 for (i = 0, msk = 0; i < nelt; i++)
43918 {
43919 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43920 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43921 return false;
43922 dfirst.perm[j] = d->perm[i];
43923 if (j != i)
43924 msk |= (1 << i);
43925 }
43926 for (i = 0; i < nelt; i++)
43927 if (dfirst.perm[i] == 0xff)
43928 dfirst.perm[i] = i;
43929
43930 if (!d->testing_p)
43931 dfirst.target = gen_reg_rtx (dfirst.vmode);
43932
43933 start_sequence ();
43934 ok = expand_vec_perm_1 (&dfirst);
43935 seq = get_insns ();
43936 end_sequence ();
43937
43938 if (!ok)
43939 return false;
43940
43941 if (d->testing_p)
43942 return true;
43943
43944 emit_insn (seq);
43945
43946 dsecond = *d;
43947 dsecond.op0 = dfirst.target;
43948 dsecond.op1 = dfirst.target;
43949 dsecond.one_operand_p = true;
43950 dsecond.target = gen_reg_rtx (dsecond.vmode);
43951 for (i = 0; i < nelt; i++)
43952 dsecond.perm[i] = i ^ nelt2;
43953
43954 ok = expand_vec_perm_1 (&dsecond);
43955 gcc_assert (ok);
43956
43957 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43958 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43959 return true;
43960 }
43961
43962 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43963 permutation using two vperm2f128, followed by a vshufpd insn blending
43964 the two vectors together. */
43965
43966 static bool
43967 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43968 {
43969 struct expand_vec_perm_d dfirst, dsecond, dthird;
43970 bool ok;
43971
43972 if (!TARGET_AVX || (d->vmode != V4DFmode))
43973 return false;
43974
43975 if (d->testing_p)
43976 return true;
43977
43978 dfirst = *d;
43979 dsecond = *d;
43980 dthird = *d;
43981
43982 dfirst.perm[0] = (d->perm[0] & ~1);
43983 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43984 dfirst.perm[2] = (d->perm[2] & ~1);
43985 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43986 dsecond.perm[0] = (d->perm[1] & ~1);
43987 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43988 dsecond.perm[2] = (d->perm[3] & ~1);
43989 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43990 dthird.perm[0] = (d->perm[0] % 2);
43991 dthird.perm[1] = (d->perm[1] % 2) + 4;
43992 dthird.perm[2] = (d->perm[2] % 2) + 2;
43993 dthird.perm[3] = (d->perm[3] % 2) + 6;
43994
43995 dfirst.target = gen_reg_rtx (dfirst.vmode);
43996 dsecond.target = gen_reg_rtx (dsecond.vmode);
43997 dthird.op0 = dfirst.target;
43998 dthird.op1 = dsecond.target;
43999 dthird.one_operand_p = false;
44000
44001 canonicalize_perm (&dfirst);
44002 canonicalize_perm (&dsecond);
44003
44004 ok = expand_vec_perm_1 (&dfirst)
44005 && expand_vec_perm_1 (&dsecond)
44006 && expand_vec_perm_1 (&dthird);
44007
44008 gcc_assert (ok);
44009
44010 return true;
44011 }
44012
44013 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44014 permutation with two pshufb insns and an ior. We should have already
44015 failed all two instruction sequences. */
44016
44017 static bool
44018 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44019 {
44020 rtx rperm[2][16], vperm, l, h, op, m128;
44021 unsigned int i, nelt, eltsz;
44022
44023 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44024 return false;
44025 gcc_assert (!d->one_operand_p);
44026
44027 if (d->testing_p)
44028 return true;
44029
44030 nelt = d->nelt;
44031 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44032
44033 /* Generate two permutation masks. If the required element is within
44034 the given vector it is shuffled into the proper lane. If the required
44035 element is in the other vector, force a zero into the lane by setting
44036 bit 7 in the permutation mask. */
44037 m128 = GEN_INT (-128);
44038 for (i = 0; i < nelt; ++i)
44039 {
44040 unsigned j, e = d->perm[i];
44041 unsigned which = (e >= nelt);
44042 if (e >= nelt)
44043 e -= nelt;
44044
44045 for (j = 0; j < eltsz; ++j)
44046 {
44047 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44048 rperm[1-which][i*eltsz + j] = m128;
44049 }
44050 }
44051
44052 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44053 vperm = force_reg (V16QImode, vperm);
44054
44055 l = gen_reg_rtx (V16QImode);
44056 op = gen_lowpart (V16QImode, d->op0);
44057 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44058
44059 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44060 vperm = force_reg (V16QImode, vperm);
44061
44062 h = gen_reg_rtx (V16QImode);
44063 op = gen_lowpart (V16QImode, d->op1);
44064 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44065
44066 op = d->target;
44067 if (d->vmode != V16QImode)
44068 op = gen_reg_rtx (V16QImode);
44069 emit_insn (gen_iorv16qi3 (op, l, h));
44070 if (op != d->target)
44071 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44072
44073 return true;
44074 }
44075
44076 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44077 with two vpshufb insns, vpermq and vpor. We should have already failed
44078 all two or three instruction sequences. */
44079
44080 static bool
44081 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44082 {
44083 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44084 unsigned int i, nelt, eltsz;
44085
44086 if (!TARGET_AVX2
44087 || !d->one_operand_p
44088 || (d->vmode != V32QImode && d->vmode != V16HImode))
44089 return false;
44090
44091 if (d->testing_p)
44092 return true;
44093
44094 nelt = d->nelt;
44095 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44096
44097 /* Generate two permutation masks. If the required element is within
44098 the same lane, it is shuffled in. If the required element from the
44099 other lane, force a zero by setting bit 7 in the permutation mask.
44100 In the other mask the mask has non-negative elements if element
44101 is requested from the other lane, but also moved to the other lane,
44102 so that the result of vpshufb can have the two V2TImode halves
44103 swapped. */
44104 m128 = GEN_INT (-128);
44105 for (i = 0; i < nelt; ++i)
44106 {
44107 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44108 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44109
44110 for (j = 0; j < eltsz; ++j)
44111 {
44112 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44113 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44114 }
44115 }
44116
44117 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44118 vperm = force_reg (V32QImode, vperm);
44119
44120 h = gen_reg_rtx (V32QImode);
44121 op = gen_lowpart (V32QImode, d->op0);
44122 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44123
44124 /* Swap the 128-byte lanes of h into hp. */
44125 hp = gen_reg_rtx (V4DImode);
44126 op = gen_lowpart (V4DImode, h);
44127 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44128 const1_rtx));
44129
44130 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44131 vperm = force_reg (V32QImode, vperm);
44132
44133 l = gen_reg_rtx (V32QImode);
44134 op = gen_lowpart (V32QImode, d->op0);
44135 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44136
44137 op = d->target;
44138 if (d->vmode != V32QImode)
44139 op = gen_reg_rtx (V32QImode);
44140 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44141 if (op != d->target)
44142 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44143
44144 return true;
44145 }
44146
44147 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44148 and extract-odd permutations of two V32QImode and V16QImode operand
44149 with two vpshufb insns, vpor and vpermq. We should have already
44150 failed all two or three instruction sequences. */
44151
44152 static bool
44153 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44154 {
44155 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44156 unsigned int i, nelt, eltsz;
44157
44158 if (!TARGET_AVX2
44159 || d->one_operand_p
44160 || (d->vmode != V32QImode && d->vmode != V16HImode))
44161 return false;
44162
44163 for (i = 0; i < d->nelt; ++i)
44164 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44165 return false;
44166
44167 if (d->testing_p)
44168 return true;
44169
44170 nelt = d->nelt;
44171 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44172
44173 /* Generate two permutation masks. In the first permutation mask
44174 the first quarter will contain indexes for the first half
44175 of the op0, the second quarter will contain bit 7 set, third quarter
44176 will contain indexes for the second half of the op0 and the
44177 last quarter bit 7 set. In the second permutation mask
44178 the first quarter will contain bit 7 set, the second quarter
44179 indexes for the first half of the op1, the third quarter bit 7 set
44180 and last quarter indexes for the second half of the op1.
44181 I.e. the first mask e.g. for V32QImode extract even will be:
44182 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44183 (all values masked with 0xf except for -128) and second mask
44184 for extract even will be
44185 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44186 m128 = GEN_INT (-128);
44187 for (i = 0; i < nelt; ++i)
44188 {
44189 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44190 unsigned which = d->perm[i] >= nelt;
44191 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44192
44193 for (j = 0; j < eltsz; ++j)
44194 {
44195 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44196 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44197 }
44198 }
44199
44200 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44201 vperm = force_reg (V32QImode, vperm);
44202
44203 l = gen_reg_rtx (V32QImode);
44204 op = gen_lowpart (V32QImode, d->op0);
44205 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44206
44207 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44208 vperm = force_reg (V32QImode, vperm);
44209
44210 h = gen_reg_rtx (V32QImode);
44211 op = gen_lowpart (V32QImode, d->op1);
44212 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44213
44214 ior = gen_reg_rtx (V32QImode);
44215 emit_insn (gen_iorv32qi3 (ior, l, h));
44216
44217 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44218 op = gen_reg_rtx (V4DImode);
44219 ior = gen_lowpart (V4DImode, ior);
44220 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44221 const1_rtx, GEN_INT (3)));
44222 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44223
44224 return true;
44225 }
44226
44227 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44228 and extract-odd permutations. */
44229
44230 static bool
44231 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44232 {
44233 rtx t1, t2, t3, t4, t5;
44234
44235 switch (d->vmode)
44236 {
44237 case V4DFmode:
44238 if (d->testing_p)
44239 break;
44240 t1 = gen_reg_rtx (V4DFmode);
44241 t2 = gen_reg_rtx (V4DFmode);
44242
44243 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44244 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44245 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44246
44247 /* Now an unpck[lh]pd will produce the result required. */
44248 if (odd)
44249 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44250 else
44251 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44252 emit_insn (t3);
44253 break;
44254
44255 case V8SFmode:
44256 {
44257 int mask = odd ? 0xdd : 0x88;
44258
44259 if (d->testing_p)
44260 break;
44261 t1 = gen_reg_rtx (V8SFmode);
44262 t2 = gen_reg_rtx (V8SFmode);
44263 t3 = gen_reg_rtx (V8SFmode);
44264
44265 /* Shuffle within the 128-bit lanes to produce:
44266 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44267 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44268 GEN_INT (mask)));
44269
44270 /* Shuffle the lanes around to produce:
44271 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44272 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44273 GEN_INT (0x3)));
44274
44275 /* Shuffle within the 128-bit lanes to produce:
44276 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44277 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44278
44279 /* Shuffle within the 128-bit lanes to produce:
44280 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44281 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44282
44283 /* Shuffle the lanes around to produce:
44284 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44285 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44286 GEN_INT (0x20)));
44287 }
44288 break;
44289
44290 case V2DFmode:
44291 case V4SFmode:
44292 case V2DImode:
44293 case V4SImode:
44294 /* These are always directly implementable by expand_vec_perm_1. */
44295 gcc_unreachable ();
44296
44297 case V8HImode:
44298 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44299 return expand_vec_perm_pshufb2 (d);
44300 else
44301 {
44302 if (d->testing_p)
44303 break;
44304 /* We need 2*log2(N)-1 operations to achieve odd/even
44305 with interleave. */
44306 t1 = gen_reg_rtx (V8HImode);
44307 t2 = gen_reg_rtx (V8HImode);
44308 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44309 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44310 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44311 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44312 if (odd)
44313 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44314 else
44315 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44316 emit_insn (t3);
44317 }
44318 break;
44319
44320 case V16QImode:
44321 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44322 return expand_vec_perm_pshufb2 (d);
44323 else
44324 {
44325 if (d->testing_p)
44326 break;
44327 t1 = gen_reg_rtx (V16QImode);
44328 t2 = gen_reg_rtx (V16QImode);
44329 t3 = gen_reg_rtx (V16QImode);
44330 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44331 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44332 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44333 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44334 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44335 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44336 if (odd)
44337 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44338 else
44339 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44340 emit_insn (t3);
44341 }
44342 break;
44343
44344 case V16HImode:
44345 case V32QImode:
44346 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44347
44348 case V4DImode:
44349 if (!TARGET_AVX2)
44350 {
44351 struct expand_vec_perm_d d_copy = *d;
44352 d_copy.vmode = V4DFmode;
44353 if (d->testing_p)
44354 d_copy.target = gen_lowpart (V4DFmode, d->target);
44355 else
44356 d_copy.target = gen_reg_rtx (V4DFmode);
44357 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44358 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44359 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44360 {
44361 if (!d->testing_p)
44362 emit_move_insn (d->target,
44363 gen_lowpart (V4DImode, d_copy.target));
44364 return true;
44365 }
44366 return false;
44367 }
44368
44369 if (d->testing_p)
44370 break;
44371
44372 t1 = gen_reg_rtx (V4DImode);
44373 t2 = gen_reg_rtx (V4DImode);
44374
44375 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44376 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44377 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44378
44379 /* Now an vpunpck[lh]qdq will produce the result required. */
44380 if (odd)
44381 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44382 else
44383 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44384 emit_insn (t3);
44385 break;
44386
44387 case V8SImode:
44388 if (!TARGET_AVX2)
44389 {
44390 struct expand_vec_perm_d d_copy = *d;
44391 d_copy.vmode = V8SFmode;
44392 if (d->testing_p)
44393 d_copy.target = gen_lowpart (V8SFmode, d->target);
44394 else
44395 d_copy.target = gen_reg_rtx (V8SFmode);
44396 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44397 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44398 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44399 {
44400 if (!d->testing_p)
44401 emit_move_insn (d->target,
44402 gen_lowpart (V8SImode, d_copy.target));
44403 return true;
44404 }
44405 return false;
44406 }
44407
44408 if (d->testing_p)
44409 break;
44410
44411 t1 = gen_reg_rtx (V8SImode);
44412 t2 = gen_reg_rtx (V8SImode);
44413 t3 = gen_reg_rtx (V4DImode);
44414 t4 = gen_reg_rtx (V4DImode);
44415 t5 = gen_reg_rtx (V4DImode);
44416
44417 /* Shuffle the lanes around into
44418 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44419 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44420 gen_lowpart (V4DImode, d->op1),
44421 GEN_INT (0x20)));
44422 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44423 gen_lowpart (V4DImode, d->op1),
44424 GEN_INT (0x31)));
44425
44426 /* Swap the 2nd and 3rd position in each lane into
44427 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44428 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44429 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44430 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44431 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44432
44433 /* Now an vpunpck[lh]qdq will produce
44434 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44435 if (odd)
44436 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44437 gen_lowpart (V4DImode, t2));
44438 else
44439 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44440 gen_lowpart (V4DImode, t2));
44441 emit_insn (t3);
44442 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44443 break;
44444
44445 default:
44446 gcc_unreachable ();
44447 }
44448
44449 return true;
44450 }
44451
44452 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44453 extract-even and extract-odd permutations. */
44454
44455 static bool
44456 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44457 {
44458 unsigned i, odd, nelt = d->nelt;
44459
44460 odd = d->perm[0];
44461 if (odd != 0 && odd != 1)
44462 return false;
44463
44464 for (i = 1; i < nelt; ++i)
44465 if (d->perm[i] != 2 * i + odd)
44466 return false;
44467
44468 return expand_vec_perm_even_odd_1 (d, odd);
44469 }
44470
44471 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44472 permutations. We assume that expand_vec_perm_1 has already failed. */
44473
44474 static bool
44475 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44476 {
44477 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44478 enum machine_mode vmode = d->vmode;
44479 unsigned char perm2[4];
44480 rtx op0 = d->op0, dest;
44481 bool ok;
44482
44483 switch (vmode)
44484 {
44485 case V4DFmode:
44486 case V8SFmode:
44487 /* These are special-cased in sse.md so that we can optionally
44488 use the vbroadcast instruction. They expand to two insns
44489 if the input happens to be in a register. */
44490 gcc_unreachable ();
44491
44492 case V2DFmode:
44493 case V2DImode:
44494 case V4SFmode:
44495 case V4SImode:
44496 /* These are always implementable using standard shuffle patterns. */
44497 gcc_unreachable ();
44498
44499 case V8HImode:
44500 case V16QImode:
44501 /* These can be implemented via interleave. We save one insn by
44502 stopping once we have promoted to V4SImode and then use pshufd. */
44503 if (d->testing_p)
44504 return true;
44505 do
44506 {
44507 rtx dest;
44508 rtx (*gen) (rtx, rtx, rtx)
44509 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44510 : gen_vec_interleave_lowv8hi;
44511
44512 if (elt >= nelt2)
44513 {
44514 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44515 : gen_vec_interleave_highv8hi;
44516 elt -= nelt2;
44517 }
44518 nelt2 /= 2;
44519
44520 dest = gen_reg_rtx (vmode);
44521 emit_insn (gen (dest, op0, op0));
44522 vmode = get_mode_wider_vector (vmode);
44523 op0 = gen_lowpart (vmode, dest);
44524 }
44525 while (vmode != V4SImode);
44526
44527 memset (perm2, elt, 4);
44528 dest = gen_reg_rtx (V4SImode);
44529 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44530 gcc_assert (ok);
44531 if (!d->testing_p)
44532 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44533 return true;
44534
44535 case V32QImode:
44536 case V16HImode:
44537 case V8SImode:
44538 case V4DImode:
44539 /* For AVX2 broadcasts of the first element vpbroadcast* or
44540 vpermq should be used by expand_vec_perm_1. */
44541 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44542 return false;
44543
44544 default:
44545 gcc_unreachable ();
44546 }
44547 }
44548
44549 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44550 broadcast permutations. */
44551
44552 static bool
44553 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44554 {
44555 unsigned i, elt, nelt = d->nelt;
44556
44557 if (!d->one_operand_p)
44558 return false;
44559
44560 elt = d->perm[0];
44561 for (i = 1; i < nelt; ++i)
44562 if (d->perm[i] != elt)
44563 return false;
44564
44565 return expand_vec_perm_broadcast_1 (d);
44566 }
44567
44568 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44569 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44570 all the shorter instruction sequences. */
44571
44572 static bool
44573 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44574 {
44575 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44576 unsigned int i, nelt, eltsz;
44577 bool used[4];
44578
44579 if (!TARGET_AVX2
44580 || d->one_operand_p
44581 || (d->vmode != V32QImode && d->vmode != V16HImode))
44582 return false;
44583
44584 if (d->testing_p)
44585 return true;
44586
44587 nelt = d->nelt;
44588 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44589
44590 /* Generate 4 permutation masks. If the required element is within
44591 the same lane, it is shuffled in. If the required element from the
44592 other lane, force a zero by setting bit 7 in the permutation mask.
44593 In the other mask the mask has non-negative elements if element
44594 is requested from the other lane, but also moved to the other lane,
44595 so that the result of vpshufb can have the two V2TImode halves
44596 swapped. */
44597 m128 = GEN_INT (-128);
44598 for (i = 0; i < 32; ++i)
44599 {
44600 rperm[0][i] = m128;
44601 rperm[1][i] = m128;
44602 rperm[2][i] = m128;
44603 rperm[3][i] = m128;
44604 }
44605 used[0] = false;
44606 used[1] = false;
44607 used[2] = false;
44608 used[3] = false;
44609 for (i = 0; i < nelt; ++i)
44610 {
44611 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44612 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44613 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44614
44615 for (j = 0; j < eltsz; ++j)
44616 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44617 used[which] = true;
44618 }
44619
44620 for (i = 0; i < 2; ++i)
44621 {
44622 if (!used[2 * i + 1])
44623 {
44624 h[i] = NULL_RTX;
44625 continue;
44626 }
44627 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44628 gen_rtvec_v (32, rperm[2 * i + 1]));
44629 vperm = force_reg (V32QImode, vperm);
44630 h[i] = gen_reg_rtx (V32QImode);
44631 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44632 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44633 }
44634
44635 /* Swap the 128-byte lanes of h[X]. */
44636 for (i = 0; i < 2; ++i)
44637 {
44638 if (h[i] == NULL_RTX)
44639 continue;
44640 op = gen_reg_rtx (V4DImode);
44641 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44642 const2_rtx, GEN_INT (3), const0_rtx,
44643 const1_rtx));
44644 h[i] = gen_lowpart (V32QImode, op);
44645 }
44646
44647 for (i = 0; i < 2; ++i)
44648 {
44649 if (!used[2 * i])
44650 {
44651 l[i] = NULL_RTX;
44652 continue;
44653 }
44654 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44655 vperm = force_reg (V32QImode, vperm);
44656 l[i] = gen_reg_rtx (V32QImode);
44657 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44658 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44659 }
44660
44661 for (i = 0; i < 2; ++i)
44662 {
44663 if (h[i] && l[i])
44664 {
44665 op = gen_reg_rtx (V32QImode);
44666 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44667 l[i] = op;
44668 }
44669 else if (h[i])
44670 l[i] = h[i];
44671 }
44672
44673 gcc_assert (l[0] && l[1]);
44674 op = d->target;
44675 if (d->vmode != V32QImode)
44676 op = gen_reg_rtx (V32QImode);
44677 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44678 if (op != d->target)
44679 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44680 return true;
44681 }
44682
44683 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44684 With all of the interface bits taken care of, perform the expansion
44685 in D and return true on success. */
44686
44687 static bool
44688 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44689 {
44690 /* Try a single instruction expansion. */
44691 if (expand_vec_perm_1 (d))
44692 return true;
44693
44694 /* Try sequences of two instructions. */
44695
44696 if (expand_vec_perm_pshuflw_pshufhw (d))
44697 return true;
44698
44699 if (expand_vec_perm_palignr (d))
44700 return true;
44701
44702 if (expand_vec_perm_interleave2 (d))
44703 return true;
44704
44705 if (expand_vec_perm_broadcast (d))
44706 return true;
44707
44708 if (expand_vec_perm_vpermq_perm_1 (d))
44709 return true;
44710
44711 if (expand_vec_perm_vperm2f128 (d))
44712 return true;
44713
44714 if (expand_vec_perm_pblendv (d))
44715 return true;
44716
44717 /* Try sequences of three instructions. */
44718
44719 if (expand_vec_perm_2vperm2f128_vshuf (d))
44720 return true;
44721
44722 if (expand_vec_perm_pshufb2 (d))
44723 return true;
44724
44725 if (expand_vec_perm_interleave3 (d))
44726 return true;
44727
44728 if (expand_vec_perm_vperm2f128_vblend (d))
44729 return true;
44730
44731 /* Try sequences of four instructions. */
44732
44733 if (expand_vec_perm_vpshufb2_vpermq (d))
44734 return true;
44735
44736 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44737 return true;
44738
44739 /* ??? Look for narrow permutations whose element orderings would
44740 allow the promotion to a wider mode. */
44741
44742 /* ??? Look for sequences of interleave or a wider permute that place
44743 the data into the correct lanes for a half-vector shuffle like
44744 pshuf[lh]w or vpermilps. */
44745
44746 /* ??? Look for sequences of interleave that produce the desired results.
44747 The combinatorics of punpck[lh] get pretty ugly... */
44748
44749 if (expand_vec_perm_even_odd (d))
44750 return true;
44751
44752 /* Even longer sequences. */
44753 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44754 return true;
44755
44756 return false;
44757 }
44758
44759 /* If a permutation only uses one operand, make it clear. Returns true
44760 if the permutation references both operands. */
44761
44762 static bool
44763 canonicalize_perm (struct expand_vec_perm_d *d)
44764 {
44765 int i, which, nelt = d->nelt;
44766
44767 for (i = which = 0; i < nelt; ++i)
44768 which |= (d->perm[i] < nelt ? 1 : 2);
44769
44770 d->one_operand_p = true;
44771 switch (which)
44772 {
44773 default:
44774 gcc_unreachable();
44775
44776 case 3:
44777 if (!rtx_equal_p (d->op0, d->op1))
44778 {
44779 d->one_operand_p = false;
44780 break;
44781 }
44782 /* The elements of PERM do not suggest that only the first operand
44783 is used, but both operands are identical. Allow easier matching
44784 of the permutation by folding the permutation into the single
44785 input vector. */
44786 /* FALLTHRU */
44787
44788 case 2:
44789 for (i = 0; i < nelt; ++i)
44790 d->perm[i] &= nelt - 1;
44791 d->op0 = d->op1;
44792 break;
44793
44794 case 1:
44795 d->op1 = d->op0;
44796 break;
44797 }
44798
44799 return (which == 3);
44800 }
44801
44802 bool
44803 ix86_expand_vec_perm_const (rtx operands[4])
44804 {
44805 struct expand_vec_perm_d d;
44806 unsigned char perm[MAX_VECT_LEN];
44807 int i, nelt;
44808 bool two_args;
44809 rtx sel;
44810
44811 d.target = operands[0];
44812 d.op0 = operands[1];
44813 d.op1 = operands[2];
44814 sel = operands[3];
44815
44816 d.vmode = GET_MODE (d.target);
44817 gcc_assert (VECTOR_MODE_P (d.vmode));
44818 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44819 d.testing_p = false;
44820
44821 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44822 gcc_assert (XVECLEN (sel, 0) == nelt);
44823 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44824
44825 for (i = 0; i < nelt; ++i)
44826 {
44827 rtx e = XVECEXP (sel, 0, i);
44828 int ei = INTVAL (e) & (2 * nelt - 1);
44829 d.perm[i] = ei;
44830 perm[i] = ei;
44831 }
44832
44833 two_args = canonicalize_perm (&d);
44834
44835 if (ix86_expand_vec_perm_const_1 (&d))
44836 return true;
44837
44838 /* If the selector says both arguments are needed, but the operands are the
44839 same, the above tried to expand with one_operand_p and flattened selector.
44840 If that didn't work, retry without one_operand_p; we succeeded with that
44841 during testing. */
44842 if (two_args && d.one_operand_p)
44843 {
44844 d.one_operand_p = false;
44845 memcpy (d.perm, perm, sizeof (perm));
44846 return ix86_expand_vec_perm_const_1 (&d);
44847 }
44848
44849 return false;
44850 }
44851
44852 /* Implement targetm.vectorize.vec_perm_const_ok. */
44853
44854 static bool
44855 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44856 const unsigned char *sel)
44857 {
44858 struct expand_vec_perm_d d;
44859 unsigned int i, nelt, which;
44860 bool ret;
44861
44862 d.vmode = vmode;
44863 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44864 d.testing_p = true;
44865
44866 /* Given sufficient ISA support we can just return true here
44867 for selected vector modes. */
44868 if (d.vmode == V16SImode || d.vmode == V16SFmode
44869 || d.vmode == V8DFmode || d.vmode == V8DImode)
44870 /* All implementable with a single vpermi2 insn. */
44871 return true;
44872 if (GET_MODE_SIZE (d.vmode) == 16)
44873 {
44874 /* All implementable with a single vpperm insn. */
44875 if (TARGET_XOP)
44876 return true;
44877 /* All implementable with 2 pshufb + 1 ior. */
44878 if (TARGET_SSSE3)
44879 return true;
44880 /* All implementable with shufpd or unpck[lh]pd. */
44881 if (d.nelt == 2)
44882 return true;
44883 }
44884
44885 /* Extract the values from the vector CST into the permutation
44886 array in D. */
44887 memcpy (d.perm, sel, nelt);
44888 for (i = which = 0; i < nelt; ++i)
44889 {
44890 unsigned char e = d.perm[i];
44891 gcc_assert (e < 2 * nelt);
44892 which |= (e < nelt ? 1 : 2);
44893 }
44894
44895 /* For all elements from second vector, fold the elements to first. */
44896 if (which == 2)
44897 for (i = 0; i < nelt; ++i)
44898 d.perm[i] -= nelt;
44899
44900 /* Check whether the mask can be applied to the vector type. */
44901 d.one_operand_p = (which != 3);
44902
44903 /* Implementable with shufps or pshufd. */
44904 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44905 return true;
44906
44907 /* Otherwise we have to go through the motions and see if we can
44908 figure out how to generate the requested permutation. */
44909 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44910 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44911 if (!d.one_operand_p)
44912 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44913
44914 start_sequence ();
44915 ret = ix86_expand_vec_perm_const_1 (&d);
44916 end_sequence ();
44917
44918 return ret;
44919 }
44920
44921 void
44922 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44923 {
44924 struct expand_vec_perm_d d;
44925 unsigned i, nelt;
44926
44927 d.target = targ;
44928 d.op0 = op0;
44929 d.op1 = op1;
44930 d.vmode = GET_MODE (targ);
44931 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44932 d.one_operand_p = false;
44933 d.testing_p = false;
44934
44935 for (i = 0; i < nelt; ++i)
44936 d.perm[i] = i * 2 + odd;
44937
44938 /* We'll either be able to implement the permutation directly... */
44939 if (expand_vec_perm_1 (&d))
44940 return;
44941
44942 /* ... or we use the special-case patterns. */
44943 expand_vec_perm_even_odd_1 (&d, odd);
44944 }
44945
44946 static void
44947 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44948 {
44949 struct expand_vec_perm_d d;
44950 unsigned i, nelt, base;
44951 bool ok;
44952
44953 d.target = targ;
44954 d.op0 = op0;
44955 d.op1 = op1;
44956 d.vmode = GET_MODE (targ);
44957 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44958 d.one_operand_p = false;
44959 d.testing_p = false;
44960
44961 base = high_p ? nelt / 2 : 0;
44962 for (i = 0; i < nelt / 2; ++i)
44963 {
44964 d.perm[i * 2] = i + base;
44965 d.perm[i * 2 + 1] = i + base + nelt;
44966 }
44967
44968 /* Note that for AVX this isn't one instruction. */
44969 ok = ix86_expand_vec_perm_const_1 (&d);
44970 gcc_assert (ok);
44971 }
44972
44973
44974 /* Expand a vector operation CODE for a V*QImode in terms of the
44975 same operation on V*HImode. */
44976
44977 void
44978 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44979 {
44980 enum machine_mode qimode = GET_MODE (dest);
44981 enum machine_mode himode;
44982 rtx (*gen_il) (rtx, rtx, rtx);
44983 rtx (*gen_ih) (rtx, rtx, rtx);
44984 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44985 struct expand_vec_perm_d d;
44986 bool ok, full_interleave;
44987 bool uns_p = false;
44988 int i;
44989
44990 switch (qimode)
44991 {
44992 case V16QImode:
44993 himode = V8HImode;
44994 gen_il = gen_vec_interleave_lowv16qi;
44995 gen_ih = gen_vec_interleave_highv16qi;
44996 break;
44997 case V32QImode:
44998 himode = V16HImode;
44999 gen_il = gen_avx2_interleave_lowv32qi;
45000 gen_ih = gen_avx2_interleave_highv32qi;
45001 break;
45002 default:
45003 gcc_unreachable ();
45004 }
45005
45006 op2_l = op2_h = op2;
45007 switch (code)
45008 {
45009 case MULT:
45010 /* Unpack data such that we've got a source byte in each low byte of
45011 each word. We don't care what goes into the high byte of each word.
45012 Rather than trying to get zero in there, most convenient is to let
45013 it be a copy of the low byte. */
45014 op2_l = gen_reg_rtx (qimode);
45015 op2_h = gen_reg_rtx (qimode);
45016 emit_insn (gen_il (op2_l, op2, op2));
45017 emit_insn (gen_ih (op2_h, op2, op2));
45018 /* FALLTHRU */
45019
45020 op1_l = gen_reg_rtx (qimode);
45021 op1_h = gen_reg_rtx (qimode);
45022 emit_insn (gen_il (op1_l, op1, op1));
45023 emit_insn (gen_ih (op1_h, op1, op1));
45024 full_interleave = qimode == V16QImode;
45025 break;
45026
45027 case ASHIFT:
45028 case LSHIFTRT:
45029 uns_p = true;
45030 /* FALLTHRU */
45031 case ASHIFTRT:
45032 op1_l = gen_reg_rtx (himode);
45033 op1_h = gen_reg_rtx (himode);
45034 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45035 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45036 full_interleave = true;
45037 break;
45038 default:
45039 gcc_unreachable ();
45040 }
45041
45042 /* Perform the operation. */
45043 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45044 1, OPTAB_DIRECT);
45045 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45046 1, OPTAB_DIRECT);
45047 gcc_assert (res_l && res_h);
45048
45049 /* Merge the data back into the right place. */
45050 d.target = dest;
45051 d.op0 = gen_lowpart (qimode, res_l);
45052 d.op1 = gen_lowpart (qimode, res_h);
45053 d.vmode = qimode;
45054 d.nelt = GET_MODE_NUNITS (qimode);
45055 d.one_operand_p = false;
45056 d.testing_p = false;
45057
45058 if (full_interleave)
45059 {
45060 /* For SSE2, we used an full interleave, so the desired
45061 results are in the even elements. */
45062 for (i = 0; i < 32; ++i)
45063 d.perm[i] = i * 2;
45064 }
45065 else
45066 {
45067 /* For AVX, the interleave used above was not cross-lane. So the
45068 extraction is evens but with the second and third quarter swapped.
45069 Happily, that is even one insn shorter than even extraction. */
45070 for (i = 0; i < 32; ++i)
45071 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45072 }
45073
45074 ok = ix86_expand_vec_perm_const_1 (&d);
45075 gcc_assert (ok);
45076
45077 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45078 gen_rtx_fmt_ee (code, qimode, op1, op2));
45079 }
45080
45081 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45082 if op is CONST_VECTOR with all odd elements equal to their
45083 preceding element. */
45084
45085 static bool
45086 const_vector_equal_evenodd_p (rtx op)
45087 {
45088 enum machine_mode mode = GET_MODE (op);
45089 int i, nunits = GET_MODE_NUNITS (mode);
45090 if (GET_CODE (op) != CONST_VECTOR
45091 || nunits != CONST_VECTOR_NUNITS (op))
45092 return false;
45093 for (i = 0; i < nunits; i += 2)
45094 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45095 return false;
45096 return true;
45097 }
45098
45099 void
45100 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45101 bool uns_p, bool odd_p)
45102 {
45103 enum machine_mode mode = GET_MODE (op1);
45104 enum machine_mode wmode = GET_MODE (dest);
45105 rtx x;
45106 rtx orig_op1 = op1, orig_op2 = op2;
45107
45108 if (!nonimmediate_operand (op1, mode))
45109 op1 = force_reg (mode, op1);
45110 if (!nonimmediate_operand (op2, mode))
45111 op2 = force_reg (mode, op2);
45112
45113 /* We only play even/odd games with vectors of SImode. */
45114 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45115
45116 /* If we're looking for the odd results, shift those members down to
45117 the even slots. For some cpus this is faster than a PSHUFD. */
45118 if (odd_p)
45119 {
45120 /* For XOP use vpmacsdqh, but only for smult, as it is only
45121 signed. */
45122 if (TARGET_XOP && mode == V4SImode && !uns_p)
45123 {
45124 x = force_reg (wmode, CONST0_RTX (wmode));
45125 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45126 return;
45127 }
45128
45129 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45130 if (!const_vector_equal_evenodd_p (orig_op1))
45131 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45132 x, NULL, 1, OPTAB_DIRECT);
45133 if (!const_vector_equal_evenodd_p (orig_op2))
45134 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45135 x, NULL, 1, OPTAB_DIRECT);
45136 op1 = gen_lowpart (mode, op1);
45137 op2 = gen_lowpart (mode, op2);
45138 }
45139
45140 if (mode == V16SImode)
45141 {
45142 if (uns_p)
45143 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45144 else
45145 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45146 }
45147 else if (mode == V8SImode)
45148 {
45149 if (uns_p)
45150 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45151 else
45152 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45153 }
45154 else if (uns_p)
45155 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45156 else if (TARGET_SSE4_1)
45157 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45158 else
45159 {
45160 rtx s1, s2, t0, t1, t2;
45161
45162 /* The easiest way to implement this without PMULDQ is to go through
45163 the motions as if we are performing a full 64-bit multiply. With
45164 the exception that we need to do less shuffling of the elements. */
45165
45166 /* Compute the sign-extension, aka highparts, of the two operands. */
45167 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45168 op1, pc_rtx, pc_rtx);
45169 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45170 op2, pc_rtx, pc_rtx);
45171
45172 /* Multiply LO(A) * HI(B), and vice-versa. */
45173 t1 = gen_reg_rtx (wmode);
45174 t2 = gen_reg_rtx (wmode);
45175 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45176 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45177
45178 /* Multiply LO(A) * LO(B). */
45179 t0 = gen_reg_rtx (wmode);
45180 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45181
45182 /* Combine and shift the highparts into place. */
45183 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45184 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45185 1, OPTAB_DIRECT);
45186
45187 /* Combine high and low parts. */
45188 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45189 return;
45190 }
45191 emit_insn (x);
45192 }
45193
45194 void
45195 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45196 bool uns_p, bool high_p)
45197 {
45198 enum machine_mode wmode = GET_MODE (dest);
45199 enum machine_mode mode = GET_MODE (op1);
45200 rtx t1, t2, t3, t4, mask;
45201
45202 switch (mode)
45203 {
45204 case V4SImode:
45205 t1 = gen_reg_rtx (mode);
45206 t2 = gen_reg_rtx (mode);
45207 if (TARGET_XOP && !uns_p)
45208 {
45209 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45210 shuffle the elements once so that all elements are in the right
45211 place for immediate use: { A C B D }. */
45212 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45213 const1_rtx, GEN_INT (3)));
45214 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45215 const1_rtx, GEN_INT (3)));
45216 }
45217 else
45218 {
45219 /* Put the elements into place for the multiply. */
45220 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45221 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45222 high_p = false;
45223 }
45224 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45225 break;
45226
45227 case V8SImode:
45228 /* Shuffle the elements between the lanes. After this we
45229 have { A B E F | C D G H } for each operand. */
45230 t1 = gen_reg_rtx (V4DImode);
45231 t2 = gen_reg_rtx (V4DImode);
45232 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45233 const0_rtx, const2_rtx,
45234 const1_rtx, GEN_INT (3)));
45235 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45236 const0_rtx, const2_rtx,
45237 const1_rtx, GEN_INT (3)));
45238
45239 /* Shuffle the elements within the lanes. After this we
45240 have { A A B B | C C D D } or { E E F F | G G H H }. */
45241 t3 = gen_reg_rtx (V8SImode);
45242 t4 = gen_reg_rtx (V8SImode);
45243 mask = GEN_INT (high_p
45244 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45245 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45246 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45247 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45248
45249 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45250 break;
45251
45252 case V8HImode:
45253 case V16HImode:
45254 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45255 uns_p, OPTAB_DIRECT);
45256 t2 = expand_binop (mode,
45257 uns_p ? umul_highpart_optab : smul_highpart_optab,
45258 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45259 gcc_assert (t1 && t2);
45260
45261 t3 = gen_reg_rtx (mode);
45262 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45263 emit_move_insn (dest, gen_lowpart (wmode, t3));
45264 break;
45265
45266 case V16QImode:
45267 case V32QImode:
45268 t1 = gen_reg_rtx (wmode);
45269 t2 = gen_reg_rtx (wmode);
45270 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45271 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45272
45273 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45274 break;
45275
45276 default:
45277 gcc_unreachable ();
45278 }
45279 }
45280
45281 void
45282 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45283 {
45284 rtx res_1, res_2, res_3, res_4;
45285
45286 res_1 = gen_reg_rtx (V4SImode);
45287 res_2 = gen_reg_rtx (V4SImode);
45288 res_3 = gen_reg_rtx (V2DImode);
45289 res_4 = gen_reg_rtx (V2DImode);
45290 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45291 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45292
45293 /* Move the results in element 2 down to element 1; we don't care
45294 what goes in elements 2 and 3. Then we can merge the parts
45295 back together with an interleave.
45296
45297 Note that two other sequences were tried:
45298 (1) Use interleaves at the start instead of psrldq, which allows
45299 us to use a single shufps to merge things back at the end.
45300 (2) Use shufps here to combine the two vectors, then pshufd to
45301 put the elements in the correct order.
45302 In both cases the cost of the reformatting stall was too high
45303 and the overall sequence slower. */
45304
45305 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45306 const0_rtx, const2_rtx,
45307 const0_rtx, const0_rtx));
45308 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45309 const0_rtx, const2_rtx,
45310 const0_rtx, const0_rtx));
45311 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45312
45313 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45314 }
45315
45316 void
45317 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45318 {
45319 enum machine_mode mode = GET_MODE (op0);
45320 rtx t1, t2, t3, t4, t5, t6;
45321
45322 if (TARGET_XOP && mode == V2DImode)
45323 {
45324 /* op1: A,B,C,D, op2: E,F,G,H */
45325 op1 = gen_lowpart (V4SImode, op1);
45326 op2 = gen_lowpart (V4SImode, op2);
45327
45328 t1 = gen_reg_rtx (V4SImode);
45329 t2 = gen_reg_rtx (V4SImode);
45330 t3 = gen_reg_rtx (V2DImode);
45331 t4 = gen_reg_rtx (V2DImode);
45332
45333 /* t1: B,A,D,C */
45334 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45335 GEN_INT (1),
45336 GEN_INT (0),
45337 GEN_INT (3),
45338 GEN_INT (2)));
45339
45340 /* t2: (B*E),(A*F),(D*G),(C*H) */
45341 emit_insn (gen_mulv4si3 (t2, t1, op2));
45342
45343 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45344 emit_insn (gen_xop_phadddq (t3, t2));
45345
45346 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45347 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45348
45349 /* Multiply lower parts and add all */
45350 t5 = gen_reg_rtx (V2DImode);
45351 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45352 gen_lowpart (V4SImode, op1),
45353 gen_lowpart (V4SImode, op2)));
45354 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45355
45356 }
45357 else
45358 {
45359 enum machine_mode nmode;
45360 rtx (*umul) (rtx, rtx, rtx);
45361
45362 if (mode == V2DImode)
45363 {
45364 umul = gen_vec_widen_umult_even_v4si;
45365 nmode = V4SImode;
45366 }
45367 else if (mode == V4DImode)
45368 {
45369 umul = gen_vec_widen_umult_even_v8si;
45370 nmode = V8SImode;
45371 }
45372 else if (mode == V8DImode)
45373 {
45374 umul = gen_vec_widen_umult_even_v16si;
45375 nmode = V16SImode;
45376 }
45377 else
45378 gcc_unreachable ();
45379
45380
45381 /* Multiply low parts. */
45382 t1 = gen_reg_rtx (mode);
45383 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45384
45385 /* Shift input vectors right 32 bits so we can multiply high parts. */
45386 t6 = GEN_INT (32);
45387 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45388 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45389
45390 /* Multiply high parts by low parts. */
45391 t4 = gen_reg_rtx (mode);
45392 t5 = gen_reg_rtx (mode);
45393 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45394 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45395
45396 /* Combine and shift the highparts back. */
45397 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45398 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45399
45400 /* Combine high and low parts. */
45401 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45402 }
45403
45404 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45405 gen_rtx_MULT (mode, op1, op2));
45406 }
45407
45408 /* Calculate integer abs() using only SSE2 instructions. */
45409
45410 void
45411 ix86_expand_sse2_abs (rtx target, rtx input)
45412 {
45413 enum machine_mode mode = GET_MODE (target);
45414 rtx tmp0, tmp1, x;
45415
45416 switch (mode)
45417 {
45418 /* For 32-bit signed integer X, the best way to calculate the absolute
45419 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45420 case V4SImode:
45421 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45422 GEN_INT (GET_MODE_BITSIZE
45423 (GET_MODE_INNER (mode)) - 1),
45424 NULL, 0, OPTAB_DIRECT);
45425 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45426 NULL, 0, OPTAB_DIRECT);
45427 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45428 target, 0, OPTAB_DIRECT);
45429 break;
45430
45431 /* For 16-bit signed integer X, the best way to calculate the absolute
45432 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45433 case V8HImode:
45434 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45435
45436 x = expand_simple_binop (mode, SMAX, tmp0, input,
45437 target, 0, OPTAB_DIRECT);
45438 break;
45439
45440 /* For 8-bit signed integer X, the best way to calculate the absolute
45441 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45442 as SSE2 provides the PMINUB insn. */
45443 case V16QImode:
45444 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45445
45446 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45447 target, 0, OPTAB_DIRECT);
45448 break;
45449
45450 default:
45451 gcc_unreachable ();
45452 }
45453
45454 if (x != target)
45455 emit_move_insn (target, x);
45456 }
45457
45458 /* Expand an insert into a vector register through pinsr insn.
45459 Return true if successful. */
45460
45461 bool
45462 ix86_expand_pinsr (rtx *operands)
45463 {
45464 rtx dst = operands[0];
45465 rtx src = operands[3];
45466
45467 unsigned int size = INTVAL (operands[1]);
45468 unsigned int pos = INTVAL (operands[2]);
45469
45470 if (GET_CODE (dst) == SUBREG)
45471 {
45472 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45473 dst = SUBREG_REG (dst);
45474 }
45475
45476 if (GET_CODE (src) == SUBREG)
45477 src = SUBREG_REG (src);
45478
45479 switch (GET_MODE (dst))
45480 {
45481 case V16QImode:
45482 case V8HImode:
45483 case V4SImode:
45484 case V2DImode:
45485 {
45486 enum machine_mode srcmode, dstmode;
45487 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45488
45489 srcmode = mode_for_size (size, MODE_INT, 0);
45490
45491 switch (srcmode)
45492 {
45493 case QImode:
45494 if (!TARGET_SSE4_1)
45495 return false;
45496 dstmode = V16QImode;
45497 pinsr = gen_sse4_1_pinsrb;
45498 break;
45499
45500 case HImode:
45501 if (!TARGET_SSE2)
45502 return false;
45503 dstmode = V8HImode;
45504 pinsr = gen_sse2_pinsrw;
45505 break;
45506
45507 case SImode:
45508 if (!TARGET_SSE4_1)
45509 return false;
45510 dstmode = V4SImode;
45511 pinsr = gen_sse4_1_pinsrd;
45512 break;
45513
45514 case DImode:
45515 gcc_assert (TARGET_64BIT);
45516 if (!TARGET_SSE4_1)
45517 return false;
45518 dstmode = V2DImode;
45519 pinsr = gen_sse4_1_pinsrq;
45520 break;
45521
45522 default:
45523 return false;
45524 }
45525
45526 rtx d = dst;
45527 if (GET_MODE (dst) != dstmode)
45528 d = gen_reg_rtx (dstmode);
45529 src = gen_lowpart (srcmode, src);
45530
45531 pos /= size;
45532
45533 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45534 GEN_INT (1 << pos)));
45535 if (d != dst)
45536 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45537 return true;
45538 }
45539
45540 default:
45541 return false;
45542 }
45543 }
45544 \f
45545 /* This function returns the calling abi specific va_list type node.
45546 It returns the FNDECL specific va_list type. */
45547
45548 static tree
45549 ix86_fn_abi_va_list (tree fndecl)
45550 {
45551 if (!TARGET_64BIT)
45552 return va_list_type_node;
45553 gcc_assert (fndecl != NULL_TREE);
45554
45555 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45556 return ms_va_list_type_node;
45557 else
45558 return sysv_va_list_type_node;
45559 }
45560
45561 /* Returns the canonical va_list type specified by TYPE. If there
45562 is no valid TYPE provided, it return NULL_TREE. */
45563
45564 static tree
45565 ix86_canonical_va_list_type (tree type)
45566 {
45567 tree wtype, htype;
45568
45569 /* Resolve references and pointers to va_list type. */
45570 if (TREE_CODE (type) == MEM_REF)
45571 type = TREE_TYPE (type);
45572 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45573 type = TREE_TYPE (type);
45574 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45575 type = TREE_TYPE (type);
45576
45577 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45578 {
45579 wtype = va_list_type_node;
45580 gcc_assert (wtype != NULL_TREE);
45581 htype = type;
45582 if (TREE_CODE (wtype) == ARRAY_TYPE)
45583 {
45584 /* If va_list is an array type, the argument may have decayed
45585 to a pointer type, e.g. by being passed to another function.
45586 In that case, unwrap both types so that we can compare the
45587 underlying records. */
45588 if (TREE_CODE (htype) == ARRAY_TYPE
45589 || POINTER_TYPE_P (htype))
45590 {
45591 wtype = TREE_TYPE (wtype);
45592 htype = TREE_TYPE (htype);
45593 }
45594 }
45595 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45596 return va_list_type_node;
45597 wtype = sysv_va_list_type_node;
45598 gcc_assert (wtype != NULL_TREE);
45599 htype = type;
45600 if (TREE_CODE (wtype) == ARRAY_TYPE)
45601 {
45602 /* If va_list is an array type, the argument may have decayed
45603 to a pointer type, e.g. by being passed to another function.
45604 In that case, unwrap both types so that we can compare the
45605 underlying records. */
45606 if (TREE_CODE (htype) == ARRAY_TYPE
45607 || POINTER_TYPE_P (htype))
45608 {
45609 wtype = TREE_TYPE (wtype);
45610 htype = TREE_TYPE (htype);
45611 }
45612 }
45613 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45614 return sysv_va_list_type_node;
45615 wtype = ms_va_list_type_node;
45616 gcc_assert (wtype != NULL_TREE);
45617 htype = type;
45618 if (TREE_CODE (wtype) == ARRAY_TYPE)
45619 {
45620 /* If va_list is an array type, the argument may have decayed
45621 to a pointer type, e.g. by being passed to another function.
45622 In that case, unwrap both types so that we can compare the
45623 underlying records. */
45624 if (TREE_CODE (htype) == ARRAY_TYPE
45625 || POINTER_TYPE_P (htype))
45626 {
45627 wtype = TREE_TYPE (wtype);
45628 htype = TREE_TYPE (htype);
45629 }
45630 }
45631 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45632 return ms_va_list_type_node;
45633 return NULL_TREE;
45634 }
45635 return std_canonical_va_list_type (type);
45636 }
45637
45638 /* Iterate through the target-specific builtin types for va_list.
45639 IDX denotes the iterator, *PTREE is set to the result type of
45640 the va_list builtin, and *PNAME to its internal type.
45641 Returns zero if there is no element for this index, otherwise
45642 IDX should be increased upon the next call.
45643 Note, do not iterate a base builtin's name like __builtin_va_list.
45644 Used from c_common_nodes_and_builtins. */
45645
45646 static int
45647 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45648 {
45649 if (TARGET_64BIT)
45650 {
45651 switch (idx)
45652 {
45653 default:
45654 break;
45655
45656 case 0:
45657 *ptree = ms_va_list_type_node;
45658 *pname = "__builtin_ms_va_list";
45659 return 1;
45660
45661 case 1:
45662 *ptree = sysv_va_list_type_node;
45663 *pname = "__builtin_sysv_va_list";
45664 return 1;
45665 }
45666 }
45667
45668 return 0;
45669 }
45670
45671 #undef TARGET_SCHED_DISPATCH
45672 #define TARGET_SCHED_DISPATCH has_dispatch
45673 #undef TARGET_SCHED_DISPATCH_DO
45674 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45675 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45676 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45677 #undef TARGET_SCHED_REORDER
45678 #define TARGET_SCHED_REORDER ix86_sched_reorder
45679 #undef TARGET_SCHED_ADJUST_PRIORITY
45680 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45681 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45682 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45683 ix86_dependencies_evaluation_hook
45684
45685 /* The size of the dispatch window is the total number of bytes of
45686 object code allowed in a window. */
45687 #define DISPATCH_WINDOW_SIZE 16
45688
45689 /* Number of dispatch windows considered for scheduling. */
45690 #define MAX_DISPATCH_WINDOWS 3
45691
45692 /* Maximum number of instructions in a window. */
45693 #define MAX_INSN 4
45694
45695 /* Maximum number of immediate operands in a window. */
45696 #define MAX_IMM 4
45697
45698 /* Maximum number of immediate bits allowed in a window. */
45699 #define MAX_IMM_SIZE 128
45700
45701 /* Maximum number of 32 bit immediates allowed in a window. */
45702 #define MAX_IMM_32 4
45703
45704 /* Maximum number of 64 bit immediates allowed in a window. */
45705 #define MAX_IMM_64 2
45706
45707 /* Maximum total of loads or prefetches allowed in a window. */
45708 #define MAX_LOAD 2
45709
45710 /* Maximum total of stores allowed in a window. */
45711 #define MAX_STORE 1
45712
45713 #undef BIG
45714 #define BIG 100
45715
45716
45717 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45718 enum dispatch_group {
45719 disp_no_group = 0,
45720 disp_load,
45721 disp_store,
45722 disp_load_store,
45723 disp_prefetch,
45724 disp_imm,
45725 disp_imm_32,
45726 disp_imm_64,
45727 disp_branch,
45728 disp_cmp,
45729 disp_jcc,
45730 disp_last
45731 };
45732
45733 /* Number of allowable groups in a dispatch window. It is an array
45734 indexed by dispatch_group enum. 100 is used as a big number,
45735 because the number of these kind of operations does not have any
45736 effect in dispatch window, but we need them for other reasons in
45737 the table. */
45738 static unsigned int num_allowable_groups[disp_last] = {
45739 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45740 };
45741
45742 char group_name[disp_last + 1][16] = {
45743 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45744 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45745 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45746 };
45747
45748 /* Instruction path. */
45749 enum insn_path {
45750 no_path = 0,
45751 path_single, /* Single micro op. */
45752 path_double, /* Double micro op. */
45753 path_multi, /* Instructions with more than 2 micro op.. */
45754 last_path
45755 };
45756
45757 /* sched_insn_info defines a window to the instructions scheduled in
45758 the basic block. It contains a pointer to the insn_info table and
45759 the instruction scheduled.
45760
45761 Windows are allocated for each basic block and are linked
45762 together. */
45763 typedef struct sched_insn_info_s {
45764 rtx insn;
45765 enum dispatch_group group;
45766 enum insn_path path;
45767 int byte_len;
45768 int imm_bytes;
45769 } sched_insn_info;
45770
45771 /* Linked list of dispatch windows. This is a two way list of
45772 dispatch windows of a basic block. It contains information about
45773 the number of uops in the window and the total number of
45774 instructions and of bytes in the object code for this dispatch
45775 window. */
45776 typedef struct dispatch_windows_s {
45777 int num_insn; /* Number of insn in the window. */
45778 int num_uops; /* Number of uops in the window. */
45779 int window_size; /* Number of bytes in the window. */
45780 int window_num; /* Window number between 0 or 1. */
45781 int num_imm; /* Number of immediates in an insn. */
45782 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45783 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45784 int imm_size; /* Total immediates in the window. */
45785 int num_loads; /* Total memory loads in the window. */
45786 int num_stores; /* Total memory stores in the window. */
45787 int violation; /* Violation exists in window. */
45788 sched_insn_info *window; /* Pointer to the window. */
45789 struct dispatch_windows_s *next;
45790 struct dispatch_windows_s *prev;
45791 } dispatch_windows;
45792
45793 /* Immediate valuse used in an insn. */
45794 typedef struct imm_info_s
45795 {
45796 int imm;
45797 int imm32;
45798 int imm64;
45799 } imm_info;
45800
45801 static dispatch_windows *dispatch_window_list;
45802 static dispatch_windows *dispatch_window_list1;
45803
45804 /* Get dispatch group of insn. */
45805
45806 static enum dispatch_group
45807 get_mem_group (rtx insn)
45808 {
45809 enum attr_memory memory;
45810
45811 if (INSN_CODE (insn) < 0)
45812 return disp_no_group;
45813 memory = get_attr_memory (insn);
45814 if (memory == MEMORY_STORE)
45815 return disp_store;
45816
45817 if (memory == MEMORY_LOAD)
45818 return disp_load;
45819
45820 if (memory == MEMORY_BOTH)
45821 return disp_load_store;
45822
45823 return disp_no_group;
45824 }
45825
45826 /* Return true if insn is a compare instruction. */
45827
45828 static bool
45829 is_cmp (rtx insn)
45830 {
45831 enum attr_type type;
45832
45833 type = get_attr_type (insn);
45834 return (type == TYPE_TEST
45835 || type == TYPE_ICMP
45836 || type == TYPE_FCMP
45837 || GET_CODE (PATTERN (insn)) == COMPARE);
45838 }
45839
45840 /* Return true if a dispatch violation encountered. */
45841
45842 static bool
45843 dispatch_violation (void)
45844 {
45845 if (dispatch_window_list->next)
45846 return dispatch_window_list->next->violation;
45847 return dispatch_window_list->violation;
45848 }
45849
45850 /* Return true if insn is a branch instruction. */
45851
45852 static bool
45853 is_branch (rtx insn)
45854 {
45855 return (CALL_P (insn) || JUMP_P (insn));
45856 }
45857
45858 /* Return true if insn is a prefetch instruction. */
45859
45860 static bool
45861 is_prefetch (rtx insn)
45862 {
45863 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45864 }
45865
45866 /* This function initializes a dispatch window and the list container holding a
45867 pointer to the window. */
45868
45869 static void
45870 init_window (int window_num)
45871 {
45872 int i;
45873 dispatch_windows *new_list;
45874
45875 if (window_num == 0)
45876 new_list = dispatch_window_list;
45877 else
45878 new_list = dispatch_window_list1;
45879
45880 new_list->num_insn = 0;
45881 new_list->num_uops = 0;
45882 new_list->window_size = 0;
45883 new_list->next = NULL;
45884 new_list->prev = NULL;
45885 new_list->window_num = window_num;
45886 new_list->num_imm = 0;
45887 new_list->num_imm_32 = 0;
45888 new_list->num_imm_64 = 0;
45889 new_list->imm_size = 0;
45890 new_list->num_loads = 0;
45891 new_list->num_stores = 0;
45892 new_list->violation = false;
45893
45894 for (i = 0; i < MAX_INSN; i++)
45895 {
45896 new_list->window[i].insn = NULL;
45897 new_list->window[i].group = disp_no_group;
45898 new_list->window[i].path = no_path;
45899 new_list->window[i].byte_len = 0;
45900 new_list->window[i].imm_bytes = 0;
45901 }
45902 return;
45903 }
45904
45905 /* This function allocates and initializes a dispatch window and the
45906 list container holding a pointer to the window. */
45907
45908 static dispatch_windows *
45909 allocate_window (void)
45910 {
45911 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45912 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45913
45914 return new_list;
45915 }
45916
45917 /* This routine initializes the dispatch scheduling information. It
45918 initiates building dispatch scheduler tables and constructs the
45919 first dispatch window. */
45920
45921 static void
45922 init_dispatch_sched (void)
45923 {
45924 /* Allocate a dispatch list and a window. */
45925 dispatch_window_list = allocate_window ();
45926 dispatch_window_list1 = allocate_window ();
45927 init_window (0);
45928 init_window (1);
45929 }
45930
45931 /* This function returns true if a branch is detected. End of a basic block
45932 does not have to be a branch, but here we assume only branches end a
45933 window. */
45934
45935 static bool
45936 is_end_basic_block (enum dispatch_group group)
45937 {
45938 return group == disp_branch;
45939 }
45940
45941 /* This function is called when the end of a window processing is reached. */
45942
45943 static void
45944 process_end_window (void)
45945 {
45946 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45947 if (dispatch_window_list->next)
45948 {
45949 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45950 gcc_assert (dispatch_window_list->window_size
45951 + dispatch_window_list1->window_size <= 48);
45952 init_window (1);
45953 }
45954 init_window (0);
45955 }
45956
45957 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45958 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45959 for 48 bytes of instructions. Note that these windows are not dispatch
45960 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45961
45962 static dispatch_windows *
45963 allocate_next_window (int window_num)
45964 {
45965 if (window_num == 0)
45966 {
45967 if (dispatch_window_list->next)
45968 init_window (1);
45969 init_window (0);
45970 return dispatch_window_list;
45971 }
45972
45973 dispatch_window_list->next = dispatch_window_list1;
45974 dispatch_window_list1->prev = dispatch_window_list;
45975
45976 return dispatch_window_list1;
45977 }
45978
45979 /* Increment the number of immediate operands of an instruction. */
45980
45981 static int
45982 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45983 {
45984 if (*in_rtx == 0)
45985 return 0;
45986
45987 switch ( GET_CODE (*in_rtx))
45988 {
45989 case CONST:
45990 case SYMBOL_REF:
45991 case CONST_INT:
45992 (imm_values->imm)++;
45993 if (x86_64_immediate_operand (*in_rtx, SImode))
45994 (imm_values->imm32)++;
45995 else
45996 (imm_values->imm64)++;
45997 break;
45998
45999 case CONST_DOUBLE:
46000 (imm_values->imm)++;
46001 (imm_values->imm64)++;
46002 break;
46003
46004 case CODE_LABEL:
46005 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46006 {
46007 (imm_values->imm)++;
46008 (imm_values->imm32)++;
46009 }
46010 break;
46011
46012 default:
46013 break;
46014 }
46015
46016 return 0;
46017 }
46018
46019 /* Compute number of immediate operands of an instruction. */
46020
46021 static void
46022 find_constant (rtx in_rtx, imm_info *imm_values)
46023 {
46024 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46025 (rtx_function) find_constant_1, (void *) imm_values);
46026 }
46027
46028 /* Return total size of immediate operands of an instruction along with number
46029 of corresponding immediate-operands. It initializes its parameters to zero
46030 befor calling FIND_CONSTANT.
46031 INSN is the input instruction. IMM is the total of immediates.
46032 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46033 bit immediates. */
46034
46035 static int
46036 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46037 {
46038 imm_info imm_values = {0, 0, 0};
46039
46040 find_constant (insn, &imm_values);
46041 *imm = imm_values.imm;
46042 *imm32 = imm_values.imm32;
46043 *imm64 = imm_values.imm64;
46044 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46045 }
46046
46047 /* This function indicates if an operand of an instruction is an
46048 immediate. */
46049
46050 static bool
46051 has_immediate (rtx insn)
46052 {
46053 int num_imm_operand;
46054 int num_imm32_operand;
46055 int num_imm64_operand;
46056
46057 if (insn)
46058 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46059 &num_imm64_operand);
46060 return false;
46061 }
46062
46063 /* Return single or double path for instructions. */
46064
46065 static enum insn_path
46066 get_insn_path (rtx insn)
46067 {
46068 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46069
46070 if ((int)path == 0)
46071 return path_single;
46072
46073 if ((int)path == 1)
46074 return path_double;
46075
46076 return path_multi;
46077 }
46078
46079 /* Return insn dispatch group. */
46080
46081 static enum dispatch_group
46082 get_insn_group (rtx insn)
46083 {
46084 enum dispatch_group group = get_mem_group (insn);
46085 if (group)
46086 return group;
46087
46088 if (is_branch (insn))
46089 return disp_branch;
46090
46091 if (is_cmp (insn))
46092 return disp_cmp;
46093
46094 if (has_immediate (insn))
46095 return disp_imm;
46096
46097 if (is_prefetch (insn))
46098 return disp_prefetch;
46099
46100 return disp_no_group;
46101 }
46102
46103 /* Count number of GROUP restricted instructions in a dispatch
46104 window WINDOW_LIST. */
46105
46106 static int
46107 count_num_restricted (rtx insn, dispatch_windows *window_list)
46108 {
46109 enum dispatch_group group = get_insn_group (insn);
46110 int imm_size;
46111 int num_imm_operand;
46112 int num_imm32_operand;
46113 int num_imm64_operand;
46114
46115 if (group == disp_no_group)
46116 return 0;
46117
46118 if (group == disp_imm)
46119 {
46120 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46121 &num_imm64_operand);
46122 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46123 || num_imm_operand + window_list->num_imm > MAX_IMM
46124 || (num_imm32_operand > 0
46125 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46126 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46127 || (num_imm64_operand > 0
46128 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46129 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46130 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46131 && num_imm64_operand > 0
46132 && ((window_list->num_imm_64 > 0
46133 && window_list->num_insn >= 2)
46134 || window_list->num_insn >= 3)))
46135 return BIG;
46136
46137 return 1;
46138 }
46139
46140 if ((group == disp_load_store
46141 && (window_list->num_loads >= MAX_LOAD
46142 || window_list->num_stores >= MAX_STORE))
46143 || ((group == disp_load
46144 || group == disp_prefetch)
46145 && window_list->num_loads >= MAX_LOAD)
46146 || (group == disp_store
46147 && window_list->num_stores >= MAX_STORE))
46148 return BIG;
46149
46150 return 1;
46151 }
46152
46153 /* This function returns true if insn satisfies dispatch rules on the
46154 last window scheduled. */
46155
46156 static bool
46157 fits_dispatch_window (rtx insn)
46158 {
46159 dispatch_windows *window_list = dispatch_window_list;
46160 dispatch_windows *window_list_next = dispatch_window_list->next;
46161 unsigned int num_restrict;
46162 enum dispatch_group group = get_insn_group (insn);
46163 enum insn_path path = get_insn_path (insn);
46164 int sum;
46165
46166 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46167 instructions should be given the lowest priority in the
46168 scheduling process in Haifa scheduler to make sure they will be
46169 scheduled in the same dispatch window as the reference to them. */
46170 if (group == disp_jcc || group == disp_cmp)
46171 return false;
46172
46173 /* Check nonrestricted. */
46174 if (group == disp_no_group || group == disp_branch)
46175 return true;
46176
46177 /* Get last dispatch window. */
46178 if (window_list_next)
46179 window_list = window_list_next;
46180
46181 if (window_list->window_num == 1)
46182 {
46183 sum = window_list->prev->window_size + window_list->window_size;
46184
46185 if (sum == 32
46186 || (min_insn_size (insn) + sum) >= 48)
46187 /* Window 1 is full. Go for next window. */
46188 return true;
46189 }
46190
46191 num_restrict = count_num_restricted (insn, window_list);
46192
46193 if (num_restrict > num_allowable_groups[group])
46194 return false;
46195
46196 /* See if it fits in the first window. */
46197 if (window_list->window_num == 0)
46198 {
46199 /* The first widow should have only single and double path
46200 uops. */
46201 if (path == path_double
46202 && (window_list->num_uops + 2) > MAX_INSN)
46203 return false;
46204 else if (path != path_single)
46205 return false;
46206 }
46207 return true;
46208 }
46209
46210 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46211 dispatch window WINDOW_LIST. */
46212
46213 static void
46214 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46215 {
46216 int byte_len = min_insn_size (insn);
46217 int num_insn = window_list->num_insn;
46218 int imm_size;
46219 sched_insn_info *window = window_list->window;
46220 enum dispatch_group group = get_insn_group (insn);
46221 enum insn_path path = get_insn_path (insn);
46222 int num_imm_operand;
46223 int num_imm32_operand;
46224 int num_imm64_operand;
46225
46226 if (!window_list->violation && group != disp_cmp
46227 && !fits_dispatch_window (insn))
46228 window_list->violation = true;
46229
46230 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46231 &num_imm64_operand);
46232
46233 /* Initialize window with new instruction. */
46234 window[num_insn].insn = insn;
46235 window[num_insn].byte_len = byte_len;
46236 window[num_insn].group = group;
46237 window[num_insn].path = path;
46238 window[num_insn].imm_bytes = imm_size;
46239
46240 window_list->window_size += byte_len;
46241 window_list->num_insn = num_insn + 1;
46242 window_list->num_uops = window_list->num_uops + num_uops;
46243 window_list->imm_size += imm_size;
46244 window_list->num_imm += num_imm_operand;
46245 window_list->num_imm_32 += num_imm32_operand;
46246 window_list->num_imm_64 += num_imm64_operand;
46247
46248 if (group == disp_store)
46249 window_list->num_stores += 1;
46250 else if (group == disp_load
46251 || group == disp_prefetch)
46252 window_list->num_loads += 1;
46253 else if (group == disp_load_store)
46254 {
46255 window_list->num_stores += 1;
46256 window_list->num_loads += 1;
46257 }
46258 }
46259
46260 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46261 If the total bytes of instructions or the number of instructions in
46262 the window exceed allowable, it allocates a new window. */
46263
46264 static void
46265 add_to_dispatch_window (rtx insn)
46266 {
46267 int byte_len;
46268 dispatch_windows *window_list;
46269 dispatch_windows *next_list;
46270 dispatch_windows *window0_list;
46271 enum insn_path path;
46272 enum dispatch_group insn_group;
46273 bool insn_fits;
46274 int num_insn;
46275 int num_uops;
46276 int window_num;
46277 int insn_num_uops;
46278 int sum;
46279
46280 if (INSN_CODE (insn) < 0)
46281 return;
46282
46283 byte_len = min_insn_size (insn);
46284 window_list = dispatch_window_list;
46285 next_list = window_list->next;
46286 path = get_insn_path (insn);
46287 insn_group = get_insn_group (insn);
46288
46289 /* Get the last dispatch window. */
46290 if (next_list)
46291 window_list = dispatch_window_list->next;
46292
46293 if (path == path_single)
46294 insn_num_uops = 1;
46295 else if (path == path_double)
46296 insn_num_uops = 2;
46297 else
46298 insn_num_uops = (int) path;
46299
46300 /* If current window is full, get a new window.
46301 Window number zero is full, if MAX_INSN uops are scheduled in it.
46302 Window number one is full, if window zero's bytes plus window
46303 one's bytes is 32, or if the bytes of the new instruction added
46304 to the total makes it greater than 48, or it has already MAX_INSN
46305 instructions in it. */
46306 num_insn = window_list->num_insn;
46307 num_uops = window_list->num_uops;
46308 window_num = window_list->window_num;
46309 insn_fits = fits_dispatch_window (insn);
46310
46311 if (num_insn >= MAX_INSN
46312 || num_uops + insn_num_uops > MAX_INSN
46313 || !(insn_fits))
46314 {
46315 window_num = ~window_num & 1;
46316 window_list = allocate_next_window (window_num);
46317 }
46318
46319 if (window_num == 0)
46320 {
46321 add_insn_window (insn, window_list, insn_num_uops);
46322 if (window_list->num_insn >= MAX_INSN
46323 && insn_group == disp_branch)
46324 {
46325 process_end_window ();
46326 return;
46327 }
46328 }
46329 else if (window_num == 1)
46330 {
46331 window0_list = window_list->prev;
46332 sum = window0_list->window_size + window_list->window_size;
46333 if (sum == 32
46334 || (byte_len + sum) >= 48)
46335 {
46336 process_end_window ();
46337 window_list = dispatch_window_list;
46338 }
46339
46340 add_insn_window (insn, window_list, insn_num_uops);
46341 }
46342 else
46343 gcc_unreachable ();
46344
46345 if (is_end_basic_block (insn_group))
46346 {
46347 /* End of basic block is reached do end-basic-block process. */
46348 process_end_window ();
46349 return;
46350 }
46351 }
46352
46353 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46354
46355 DEBUG_FUNCTION static void
46356 debug_dispatch_window_file (FILE *file, int window_num)
46357 {
46358 dispatch_windows *list;
46359 int i;
46360
46361 if (window_num == 0)
46362 list = dispatch_window_list;
46363 else
46364 list = dispatch_window_list1;
46365
46366 fprintf (file, "Window #%d:\n", list->window_num);
46367 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46368 list->num_insn, list->num_uops, list->window_size);
46369 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46370 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46371
46372 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46373 list->num_stores);
46374 fprintf (file, " insn info:\n");
46375
46376 for (i = 0; i < MAX_INSN; i++)
46377 {
46378 if (!list->window[i].insn)
46379 break;
46380 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46381 i, group_name[list->window[i].group],
46382 i, (void *)list->window[i].insn,
46383 i, list->window[i].path,
46384 i, list->window[i].byte_len,
46385 i, list->window[i].imm_bytes);
46386 }
46387 }
46388
46389 /* Print to stdout a dispatch window. */
46390
46391 DEBUG_FUNCTION void
46392 debug_dispatch_window (int window_num)
46393 {
46394 debug_dispatch_window_file (stdout, window_num);
46395 }
46396
46397 /* Print INSN dispatch information to FILE. */
46398
46399 DEBUG_FUNCTION static void
46400 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46401 {
46402 int byte_len;
46403 enum insn_path path;
46404 enum dispatch_group group;
46405 int imm_size;
46406 int num_imm_operand;
46407 int num_imm32_operand;
46408 int num_imm64_operand;
46409
46410 if (INSN_CODE (insn) < 0)
46411 return;
46412
46413 byte_len = min_insn_size (insn);
46414 path = get_insn_path (insn);
46415 group = get_insn_group (insn);
46416 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46417 &num_imm64_operand);
46418
46419 fprintf (file, " insn info:\n");
46420 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46421 group_name[group], path, byte_len);
46422 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46423 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46424 }
46425
46426 /* Print to STDERR the status of the ready list with respect to
46427 dispatch windows. */
46428
46429 DEBUG_FUNCTION void
46430 debug_ready_dispatch (void)
46431 {
46432 int i;
46433 int no_ready = number_in_ready ();
46434
46435 fprintf (stdout, "Number of ready: %d\n", no_ready);
46436
46437 for (i = 0; i < no_ready; i++)
46438 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46439 }
46440
46441 /* This routine is the driver of the dispatch scheduler. */
46442
46443 static void
46444 do_dispatch (rtx_insn *insn, int mode)
46445 {
46446 if (mode == DISPATCH_INIT)
46447 init_dispatch_sched ();
46448 else if (mode == ADD_TO_DISPATCH_WINDOW)
46449 add_to_dispatch_window (insn);
46450 }
46451
46452 /* Return TRUE if Dispatch Scheduling is supported. */
46453
46454 static bool
46455 has_dispatch (rtx_insn *insn, int action)
46456 {
46457 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46458 && flag_dispatch_scheduler)
46459 switch (action)
46460 {
46461 default:
46462 return false;
46463
46464 case IS_DISPATCH_ON:
46465 return true;
46466 break;
46467
46468 case IS_CMP:
46469 return is_cmp (insn);
46470
46471 case DISPATCH_VIOLATION:
46472 return dispatch_violation ();
46473
46474 case FITS_DISPATCH_WINDOW:
46475 return fits_dispatch_window (insn);
46476 }
46477
46478 return false;
46479 }
46480
46481 /* Implementation of reassociation_width target hook used by
46482 reassoc phase to identify parallelism level in reassociated
46483 tree. Statements tree_code is passed in OPC. Arguments type
46484 is passed in MODE.
46485
46486 Currently parallel reassociation is enabled for Atom
46487 processors only and we set reassociation width to be 2
46488 because Atom may issue up to 2 instructions per cycle.
46489
46490 Return value should be fixed if parallel reassociation is
46491 enabled for other processors. */
46492
46493 static int
46494 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46495 {
46496 int res = 1;
46497
46498 /* Vector part. */
46499 if (VECTOR_MODE_P (mode))
46500 {
46501 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46502 return 2;
46503 else
46504 return 1;
46505 }
46506
46507 /* Scalar part. */
46508 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46509 res = 2;
46510 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46511 res = 2;
46512
46513 return res;
46514 }
46515
46516 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46517 place emms and femms instructions. */
46518
46519 static enum machine_mode
46520 ix86_preferred_simd_mode (enum machine_mode mode)
46521 {
46522 if (!TARGET_SSE)
46523 return word_mode;
46524
46525 switch (mode)
46526 {
46527 case QImode:
46528 return TARGET_AVX512BW ? V64QImode :
46529 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46530 case HImode:
46531 return TARGET_AVX512BW ? V32HImode :
46532 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46533 case SImode:
46534 return TARGET_AVX512F ? V16SImode :
46535 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46536 case DImode:
46537 return TARGET_AVX512F ? V8DImode :
46538 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46539
46540 case SFmode:
46541 if (TARGET_AVX512F)
46542 return V16SFmode;
46543 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46544 return V8SFmode;
46545 else
46546 return V4SFmode;
46547
46548 case DFmode:
46549 if (!TARGET_VECTORIZE_DOUBLE)
46550 return word_mode;
46551 else if (TARGET_AVX512F)
46552 return V8DFmode;
46553 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46554 return V4DFmode;
46555 else if (TARGET_SSE2)
46556 return V2DFmode;
46557 /* FALLTHRU */
46558
46559 default:
46560 return word_mode;
46561 }
46562 }
46563
46564 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46565 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46566 256bit and 128bit vectors. */
46567
46568 static unsigned int
46569 ix86_autovectorize_vector_sizes (void)
46570 {
46571 return TARGET_AVX512F ? 64 | 32 | 16 :
46572 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46573 }
46574
46575 \f
46576
46577 /* Return class of registers which could be used for pseudo of MODE
46578 and of class RCLASS for spilling instead of memory. Return NO_REGS
46579 if it is not possible or non-profitable. */
46580 static reg_class_t
46581 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46582 {
46583 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46584 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46585 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46586 return ALL_SSE_REGS;
46587 return NO_REGS;
46588 }
46589
46590 /* Implement targetm.vectorize.init_cost. */
46591
46592 static void *
46593 ix86_init_cost (struct loop *)
46594 {
46595 unsigned *cost = XNEWVEC (unsigned, 3);
46596 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46597 return cost;
46598 }
46599
46600 /* Implement targetm.vectorize.add_stmt_cost. */
46601
46602 static unsigned
46603 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46604 struct _stmt_vec_info *stmt_info, int misalign,
46605 enum vect_cost_model_location where)
46606 {
46607 unsigned *cost = (unsigned *) data;
46608 unsigned retval = 0;
46609
46610 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46611 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46612
46613 /* Statements in an inner loop relative to the loop being
46614 vectorized are weighted more heavily. The value here is
46615 arbitrary and could potentially be improved with analysis. */
46616 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46617 count *= 50; /* FIXME. */
46618
46619 retval = (unsigned) (count * stmt_cost);
46620
46621 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46622 for Silvermont as it has out of order integer pipeline and can execute
46623 2 scalar instruction per tick, but has in order SIMD pipeline. */
46624 if (TARGET_SILVERMONT || TARGET_INTEL)
46625 if (stmt_info && stmt_info->stmt)
46626 {
46627 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46628 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46629 retval = (retval * 17) / 10;
46630 }
46631
46632 cost[where] += retval;
46633
46634 return retval;
46635 }
46636
46637 /* Implement targetm.vectorize.finish_cost. */
46638
46639 static void
46640 ix86_finish_cost (void *data, unsigned *prologue_cost,
46641 unsigned *body_cost, unsigned *epilogue_cost)
46642 {
46643 unsigned *cost = (unsigned *) data;
46644 *prologue_cost = cost[vect_prologue];
46645 *body_cost = cost[vect_body];
46646 *epilogue_cost = cost[vect_epilogue];
46647 }
46648
46649 /* Implement targetm.vectorize.destroy_cost_data. */
46650
46651 static void
46652 ix86_destroy_cost_data (void *data)
46653 {
46654 free (data);
46655 }
46656
46657 /* Validate target specific memory model bits in VAL. */
46658
46659 static unsigned HOST_WIDE_INT
46660 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46661 {
46662 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46663 bool strong;
46664
46665 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46666 |MEMMODEL_MASK)
46667 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46668 {
46669 warning (OPT_Winvalid_memory_model,
46670 "Unknown architecture specific memory model");
46671 return MEMMODEL_SEQ_CST;
46672 }
46673 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46674 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46675 {
46676 warning (OPT_Winvalid_memory_model,
46677 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46678 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46679 }
46680 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46681 {
46682 warning (OPT_Winvalid_memory_model,
46683 "HLE_RELEASE not used with RELEASE or stronger memory model");
46684 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46685 }
46686 return val;
46687 }
46688
46689 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46690 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46691 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46692 or number of vecsize_mangle variants that should be emitted. */
46693
46694 static int
46695 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46696 struct cgraph_simd_clone *clonei,
46697 tree base_type, int num)
46698 {
46699 int ret = 1;
46700
46701 if (clonei->simdlen
46702 && (clonei->simdlen < 2
46703 || clonei->simdlen > 16
46704 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46705 {
46706 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46707 "unsupported simdlen %d", clonei->simdlen);
46708 return 0;
46709 }
46710
46711 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46712 if (TREE_CODE (ret_type) != VOID_TYPE)
46713 switch (TYPE_MODE (ret_type))
46714 {
46715 case QImode:
46716 case HImode:
46717 case SImode:
46718 case DImode:
46719 case SFmode:
46720 case DFmode:
46721 /* case SCmode: */
46722 /* case DCmode: */
46723 break;
46724 default:
46725 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46726 "unsupported return type %qT for simd\n", ret_type);
46727 return 0;
46728 }
46729
46730 tree t;
46731 int i;
46732
46733 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46734 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46735 switch (TYPE_MODE (TREE_TYPE (t)))
46736 {
46737 case QImode:
46738 case HImode:
46739 case SImode:
46740 case DImode:
46741 case SFmode:
46742 case DFmode:
46743 /* case SCmode: */
46744 /* case DCmode: */
46745 break;
46746 default:
46747 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46748 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46749 return 0;
46750 }
46751
46752 if (clonei->cilk_elemental)
46753 {
46754 /* Parse here processor clause. If not present, default to 'b'. */
46755 clonei->vecsize_mangle = 'b';
46756 }
46757 else if (!TREE_PUBLIC (node->decl))
46758 {
46759 /* If the function isn't exported, we can pick up just one ISA
46760 for the clones. */
46761 if (TARGET_AVX2)
46762 clonei->vecsize_mangle = 'd';
46763 else if (TARGET_AVX)
46764 clonei->vecsize_mangle = 'c';
46765 else
46766 clonei->vecsize_mangle = 'b';
46767 ret = 1;
46768 }
46769 else
46770 {
46771 clonei->vecsize_mangle = "bcd"[num];
46772 ret = 3;
46773 }
46774 switch (clonei->vecsize_mangle)
46775 {
46776 case 'b':
46777 clonei->vecsize_int = 128;
46778 clonei->vecsize_float = 128;
46779 break;
46780 case 'c':
46781 clonei->vecsize_int = 128;
46782 clonei->vecsize_float = 256;
46783 break;
46784 case 'd':
46785 clonei->vecsize_int = 256;
46786 clonei->vecsize_float = 256;
46787 break;
46788 }
46789 if (clonei->simdlen == 0)
46790 {
46791 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46792 clonei->simdlen = clonei->vecsize_int;
46793 else
46794 clonei->simdlen = clonei->vecsize_float;
46795 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46796 if (clonei->simdlen > 16)
46797 clonei->simdlen = 16;
46798 }
46799 return ret;
46800 }
46801
46802 /* Add target attribute to SIMD clone NODE if needed. */
46803
46804 static void
46805 ix86_simd_clone_adjust (struct cgraph_node *node)
46806 {
46807 const char *str = NULL;
46808 gcc_assert (node->decl == cfun->decl);
46809 switch (node->simdclone->vecsize_mangle)
46810 {
46811 case 'b':
46812 if (!TARGET_SSE2)
46813 str = "sse2";
46814 break;
46815 case 'c':
46816 if (!TARGET_AVX)
46817 str = "avx";
46818 break;
46819 case 'd':
46820 if (!TARGET_AVX2)
46821 str = "avx2";
46822 break;
46823 default:
46824 gcc_unreachable ();
46825 }
46826 if (str == NULL)
46827 return;
46828 push_cfun (NULL);
46829 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46830 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46831 gcc_assert (ok);
46832 pop_cfun ();
46833 ix86_previous_fndecl = NULL_TREE;
46834 ix86_set_current_function (node->decl);
46835 }
46836
46837 /* If SIMD clone NODE can't be used in a vectorized loop
46838 in current function, return -1, otherwise return a badness of using it
46839 (0 if it is most desirable from vecsize_mangle point of view, 1
46840 slightly less desirable, etc.). */
46841
46842 static int
46843 ix86_simd_clone_usable (struct cgraph_node *node)
46844 {
46845 switch (node->simdclone->vecsize_mangle)
46846 {
46847 case 'b':
46848 if (!TARGET_SSE2)
46849 return -1;
46850 if (!TARGET_AVX)
46851 return 0;
46852 return TARGET_AVX2 ? 2 : 1;
46853 case 'c':
46854 if (!TARGET_AVX)
46855 return -1;
46856 return TARGET_AVX2 ? 1 : 0;
46857 break;
46858 case 'd':
46859 if (!TARGET_AVX2)
46860 return -1;
46861 return 0;
46862 default:
46863 gcc_unreachable ();
46864 }
46865 }
46866
46867 /* This function gives out the number of memory references.
46868 This value determines the unrolling factor for
46869 bdver3 and bdver4 architectures. */
46870
46871 static int
46872 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46873 {
46874 if (*x != NULL_RTX && MEM_P (*x))
46875 {
46876 enum machine_mode mode;
46877 unsigned int n_words;
46878
46879 mode = GET_MODE (*x);
46880 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46881
46882 if (n_words > 4)
46883 (*mem_count)+=2;
46884 else
46885 (*mem_count)+=1;
46886 }
46887 return 0;
46888 }
46889
46890 /* This function adjusts the unroll factor based on
46891 the hardware capabilities. For ex, bdver3 has
46892 a loop buffer which makes unrolling of smaller
46893 loops less important. This function decides the
46894 unroll factor using number of memory references
46895 (value 32 is used) as a heuristic. */
46896
46897 static unsigned
46898 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46899 {
46900 basic_block *bbs;
46901 rtx_insn *insn;
46902 unsigned i;
46903 unsigned mem_count = 0;
46904
46905 if (!TARGET_ADJUST_UNROLL)
46906 return nunroll;
46907
46908 /* Count the number of memory references within the loop body. */
46909 bbs = get_loop_body (loop);
46910 for (i = 0; i < loop->num_nodes; i++)
46911 {
46912 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46913 if (NONDEBUG_INSN_P (insn))
46914 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
46915 &mem_count);
46916 }
46917 free (bbs);
46918
46919 if (mem_count && mem_count <=32)
46920 return 32/mem_count;
46921
46922 return nunroll;
46923 }
46924
46925
46926 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46927
46928 static bool
46929 ix86_float_exceptions_rounding_supported_p (void)
46930 {
46931 /* For x87 floating point with standard excess precision handling,
46932 there is no adddf3 pattern (since x87 floating point only has
46933 XFmode operations) so the default hook implementation gets this
46934 wrong. */
46935 return TARGET_80387 || TARGET_SSE_MATH;
46936 }
46937
46938 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46939
46940 static void
46941 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46942 {
46943 if (!TARGET_80387 && !TARGET_SSE_MATH)
46944 return;
46945 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46946 if (TARGET_80387)
46947 {
46948 tree fenv_index_type = build_index_type (size_int (6));
46949 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46950 tree fenv_var = create_tmp_var (fenv_type, NULL);
46951 mark_addressable (fenv_var);
46952 tree fenv_ptr = build_pointer_type (fenv_type);
46953 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46954 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46955 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46956 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46957 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46958 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46959 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46960 tree hold_fnclex = build_call_expr (fnclex, 0);
46961 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46962 hold_fnclex);
46963 *clear = build_call_expr (fnclex, 0);
46964 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46965 tree fnstsw_call = build_call_expr (fnstsw, 0);
46966 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46967 sw_var, fnstsw_call);
46968 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46969 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46970 exceptions_var, exceptions_x87);
46971 *update = build2 (COMPOUND_EXPR, integer_type_node,
46972 sw_mod, update_mod);
46973 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46974 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46975 }
46976 if (TARGET_SSE_MATH)
46977 {
46978 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46979 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46980 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46981 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46982 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46983 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46984 mxcsr_orig_var, stmxcsr_hold_call);
46985 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46986 mxcsr_orig_var,
46987 build_int_cst (unsigned_type_node, 0x1f80));
46988 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46989 build_int_cst (unsigned_type_node, 0xffffffc0));
46990 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46991 mxcsr_mod_var, hold_mod_val);
46992 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46993 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46994 hold_assign_orig, hold_assign_mod);
46995 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46996 ldmxcsr_hold_call);
46997 if (*hold)
46998 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46999 else
47000 *hold = hold_all;
47001 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47002 if (*clear)
47003 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47004 ldmxcsr_clear_call);
47005 else
47006 *clear = ldmxcsr_clear_call;
47007 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47008 tree exceptions_sse = fold_convert (integer_type_node,
47009 stxmcsr_update_call);
47010 if (*update)
47011 {
47012 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47013 exceptions_var, exceptions_sse);
47014 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47015 exceptions_var, exceptions_mod);
47016 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47017 exceptions_assign);
47018 }
47019 else
47020 *update = build2 (MODIFY_EXPR, integer_type_node,
47021 exceptions_var, exceptions_sse);
47022 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47023 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47024 ldmxcsr_update_call);
47025 }
47026 tree atomic_feraiseexcept
47027 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47028 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47029 1, exceptions_var);
47030 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47031 atomic_feraiseexcept_call);
47032 }
47033
47034 /* Initialize the GCC target structure. */
47035 #undef TARGET_RETURN_IN_MEMORY
47036 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47037
47038 #undef TARGET_LEGITIMIZE_ADDRESS
47039 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47040
47041 #undef TARGET_ATTRIBUTE_TABLE
47042 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47043 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47044 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47045 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47046 # undef TARGET_MERGE_DECL_ATTRIBUTES
47047 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47048 #endif
47049
47050 #undef TARGET_COMP_TYPE_ATTRIBUTES
47051 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47052
47053 #undef TARGET_INIT_BUILTINS
47054 #define TARGET_INIT_BUILTINS ix86_init_builtins
47055 #undef TARGET_BUILTIN_DECL
47056 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47057 #undef TARGET_EXPAND_BUILTIN
47058 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47059
47060 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47061 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47062 ix86_builtin_vectorized_function
47063
47064 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47065 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47066
47067 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47068 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47069
47070 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47071 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47072
47073 #undef TARGET_BUILTIN_RECIPROCAL
47074 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47075
47076 #undef TARGET_ASM_FUNCTION_EPILOGUE
47077 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47078
47079 #undef TARGET_ENCODE_SECTION_INFO
47080 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47081 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47082 #else
47083 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47084 #endif
47085
47086 #undef TARGET_ASM_OPEN_PAREN
47087 #define TARGET_ASM_OPEN_PAREN ""
47088 #undef TARGET_ASM_CLOSE_PAREN
47089 #define TARGET_ASM_CLOSE_PAREN ""
47090
47091 #undef TARGET_ASM_BYTE_OP
47092 #define TARGET_ASM_BYTE_OP ASM_BYTE
47093
47094 #undef TARGET_ASM_ALIGNED_HI_OP
47095 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47096 #undef TARGET_ASM_ALIGNED_SI_OP
47097 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47098 #ifdef ASM_QUAD
47099 #undef TARGET_ASM_ALIGNED_DI_OP
47100 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47101 #endif
47102
47103 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47104 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47105
47106 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47107 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47108
47109 #undef TARGET_ASM_UNALIGNED_HI_OP
47110 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47111 #undef TARGET_ASM_UNALIGNED_SI_OP
47112 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47113 #undef TARGET_ASM_UNALIGNED_DI_OP
47114 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47115
47116 #undef TARGET_PRINT_OPERAND
47117 #define TARGET_PRINT_OPERAND ix86_print_operand
47118 #undef TARGET_PRINT_OPERAND_ADDRESS
47119 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47120 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47121 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47122 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47123 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47124
47125 #undef TARGET_SCHED_INIT_GLOBAL
47126 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47127 #undef TARGET_SCHED_ADJUST_COST
47128 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47129 #undef TARGET_SCHED_ISSUE_RATE
47130 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47131 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47132 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47133 ia32_multipass_dfa_lookahead
47134 #undef TARGET_SCHED_MACRO_FUSION_P
47135 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47136 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47137 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47138
47139 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47140 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47141
47142 #undef TARGET_MEMMODEL_CHECK
47143 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47144
47145 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47146 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47147
47148 #ifdef HAVE_AS_TLS
47149 #undef TARGET_HAVE_TLS
47150 #define TARGET_HAVE_TLS true
47151 #endif
47152 #undef TARGET_CANNOT_FORCE_CONST_MEM
47153 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47154 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47155 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47156
47157 #undef TARGET_DELEGITIMIZE_ADDRESS
47158 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47159
47160 #undef TARGET_MS_BITFIELD_LAYOUT_P
47161 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47162
47163 #if TARGET_MACHO
47164 #undef TARGET_BINDS_LOCAL_P
47165 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47166 #endif
47167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47168 #undef TARGET_BINDS_LOCAL_P
47169 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47170 #endif
47171
47172 #undef TARGET_ASM_OUTPUT_MI_THUNK
47173 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47174 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47175 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47176
47177 #undef TARGET_ASM_FILE_START
47178 #define TARGET_ASM_FILE_START x86_file_start
47179
47180 #undef TARGET_OPTION_OVERRIDE
47181 #define TARGET_OPTION_OVERRIDE ix86_option_override
47182
47183 #undef TARGET_REGISTER_MOVE_COST
47184 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47185 #undef TARGET_MEMORY_MOVE_COST
47186 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47187 #undef TARGET_RTX_COSTS
47188 #define TARGET_RTX_COSTS ix86_rtx_costs
47189 #undef TARGET_ADDRESS_COST
47190 #define TARGET_ADDRESS_COST ix86_address_cost
47191
47192 #undef TARGET_FIXED_CONDITION_CODE_REGS
47193 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47194 #undef TARGET_CC_MODES_COMPATIBLE
47195 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47196
47197 #undef TARGET_MACHINE_DEPENDENT_REORG
47198 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47199
47200 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47201 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47202
47203 #undef TARGET_BUILD_BUILTIN_VA_LIST
47204 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47205
47206 #undef TARGET_FOLD_BUILTIN
47207 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47208
47209 #undef TARGET_COMPARE_VERSION_PRIORITY
47210 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47211
47212 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47213 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47214 ix86_generate_version_dispatcher_body
47215
47216 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47217 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47218 ix86_get_function_versions_dispatcher
47219
47220 #undef TARGET_ENUM_VA_LIST_P
47221 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47222
47223 #undef TARGET_FN_ABI_VA_LIST
47224 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47225
47226 #undef TARGET_CANONICAL_VA_LIST_TYPE
47227 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47228
47229 #undef TARGET_EXPAND_BUILTIN_VA_START
47230 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47231
47232 #undef TARGET_MD_ASM_CLOBBERS
47233 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47234
47235 #undef TARGET_PROMOTE_PROTOTYPES
47236 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47237 #undef TARGET_SETUP_INCOMING_VARARGS
47238 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47239 #undef TARGET_MUST_PASS_IN_STACK
47240 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47241 #undef TARGET_FUNCTION_ARG_ADVANCE
47242 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47243 #undef TARGET_FUNCTION_ARG
47244 #define TARGET_FUNCTION_ARG ix86_function_arg
47245 #undef TARGET_FUNCTION_ARG_BOUNDARY
47246 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47247 #undef TARGET_PASS_BY_REFERENCE
47248 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47249 #undef TARGET_INTERNAL_ARG_POINTER
47250 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47251 #undef TARGET_UPDATE_STACK_BOUNDARY
47252 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47253 #undef TARGET_GET_DRAP_RTX
47254 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47255 #undef TARGET_STRICT_ARGUMENT_NAMING
47256 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47257 #undef TARGET_STATIC_CHAIN
47258 #define TARGET_STATIC_CHAIN ix86_static_chain
47259 #undef TARGET_TRAMPOLINE_INIT
47260 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47261 #undef TARGET_RETURN_POPS_ARGS
47262 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47263
47264 #undef TARGET_LEGITIMATE_COMBINED_INSN
47265 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47266
47267 #undef TARGET_ASAN_SHADOW_OFFSET
47268 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47269
47270 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47271 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47272
47273 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47274 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47275
47276 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47277 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47278
47279 #undef TARGET_C_MODE_FOR_SUFFIX
47280 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47281
47282 #ifdef HAVE_AS_TLS
47283 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47284 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47285 #endif
47286
47287 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47288 #undef TARGET_INSERT_ATTRIBUTES
47289 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47290 #endif
47291
47292 #undef TARGET_MANGLE_TYPE
47293 #define TARGET_MANGLE_TYPE ix86_mangle_type
47294
47295 #if !TARGET_MACHO
47296 #undef TARGET_STACK_PROTECT_FAIL
47297 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47298 #endif
47299
47300 #undef TARGET_FUNCTION_VALUE
47301 #define TARGET_FUNCTION_VALUE ix86_function_value
47302
47303 #undef TARGET_FUNCTION_VALUE_REGNO_P
47304 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47305
47306 #undef TARGET_PROMOTE_FUNCTION_MODE
47307 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47308
47309 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47310 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47311
47312 #undef TARGET_INSTANTIATE_DECLS
47313 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47314
47315 #undef TARGET_SECONDARY_RELOAD
47316 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47317
47318 #undef TARGET_CLASS_MAX_NREGS
47319 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47320
47321 #undef TARGET_PREFERRED_RELOAD_CLASS
47322 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47323 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47324 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47325 #undef TARGET_CLASS_LIKELY_SPILLED_P
47326 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47327
47328 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47329 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47330 ix86_builtin_vectorization_cost
47331 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47332 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47333 ix86_vectorize_vec_perm_const_ok
47334 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47335 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47336 ix86_preferred_simd_mode
47337 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47338 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47339 ix86_autovectorize_vector_sizes
47340 #undef TARGET_VECTORIZE_INIT_COST
47341 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47342 #undef TARGET_VECTORIZE_ADD_STMT_COST
47343 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47344 #undef TARGET_VECTORIZE_FINISH_COST
47345 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47346 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47347 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47348
47349 #undef TARGET_SET_CURRENT_FUNCTION
47350 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47351
47352 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47353 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47354
47355 #undef TARGET_OPTION_SAVE
47356 #define TARGET_OPTION_SAVE ix86_function_specific_save
47357
47358 #undef TARGET_OPTION_RESTORE
47359 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47360
47361 #undef TARGET_OPTION_PRINT
47362 #define TARGET_OPTION_PRINT ix86_function_specific_print
47363
47364 #undef TARGET_OPTION_FUNCTION_VERSIONS
47365 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47366
47367 #undef TARGET_CAN_INLINE_P
47368 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47369
47370 #undef TARGET_EXPAND_TO_RTL_HOOK
47371 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47372
47373 #undef TARGET_LEGITIMATE_ADDRESS_P
47374 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47375
47376 #undef TARGET_LRA_P
47377 #define TARGET_LRA_P hook_bool_void_true
47378
47379 #undef TARGET_REGISTER_PRIORITY
47380 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47381
47382 #undef TARGET_REGISTER_USAGE_LEVELING_P
47383 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47384
47385 #undef TARGET_LEGITIMATE_CONSTANT_P
47386 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47387
47388 #undef TARGET_FRAME_POINTER_REQUIRED
47389 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47390
47391 #undef TARGET_CAN_ELIMINATE
47392 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47393
47394 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47395 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47396
47397 #undef TARGET_ASM_CODE_END
47398 #define TARGET_ASM_CODE_END ix86_code_end
47399
47400 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47401 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47402
47403 #if TARGET_MACHO
47404 #undef TARGET_INIT_LIBFUNCS
47405 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47406 #endif
47407
47408 #undef TARGET_LOOP_UNROLL_ADJUST
47409 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47410
47411 #undef TARGET_SPILL_CLASS
47412 #define TARGET_SPILL_CLASS ix86_spill_class
47413
47414 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47415 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47416 ix86_simd_clone_compute_vecsize_and_simdlen
47417
47418 #undef TARGET_SIMD_CLONE_ADJUST
47419 #define TARGET_SIMD_CLONE_ADJUST \
47420 ix86_simd_clone_adjust
47421
47422 #undef TARGET_SIMD_CLONE_USABLE
47423 #define TARGET_SIMD_CLONE_USABLE \
47424 ix86_simd_clone_usable
47425
47426 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47427 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47428 ix86_float_exceptions_rounding_supported_p
47429
47430 #undef TARGET_MODE_EMIT
47431 #define TARGET_MODE_EMIT ix86_emit_mode_set
47432
47433 #undef TARGET_MODE_NEEDED
47434 #define TARGET_MODE_NEEDED ix86_mode_needed
47435
47436 #undef TARGET_MODE_AFTER
47437 #define TARGET_MODE_AFTER ix86_mode_after
47438
47439 #undef TARGET_MODE_ENTRY
47440 #define TARGET_MODE_ENTRY ix86_mode_entry
47441
47442 #undef TARGET_MODE_EXIT
47443 #define TARGET_MODE_EXIT ix86_mode_exit
47444
47445 #undef TARGET_MODE_PRIORITY
47446 #define TARGET_MODE_PRIORITY ix86_mode_priority
47447
47448 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47449 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47450
47451 struct gcc_target targetm = TARGET_INITIALIZER;
47452 \f
47453 #include "gt-i386.h"